From 1238b040b91e57589ae786e3f0225e688bbfbad4 Mon Sep 17 00:00:00 2001 From: Steven C Date: Fri, 3 Oct 2025 22:22:21 -0400 Subject: [PATCH] Make Presidio a required dependency --- README.md | 9 +- docs/quickstart.md | 5 - examples/basic/hello_world.py | 9 +- pyproject.toml | 12 +- requirements.txt | 199 +++++++++++++++++++++- src/guardrails/checks/text/pii.py | 21 +-- src/guardrails/checks/text/secret_keys.py | 15 +- 7 files changed, 216 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index b7b5290..92ce3fa 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,9 @@ For full details, advanced usage, and API reference, see here: [OpenAI Guardrail - Use the [Guardrails web UI](https://guardrails.openai.com/) to create a JSON configuration file describing which guardrails to apply and how to configure them. - The wizard outputs a file like `guardrail_specs.json`. -2. **Install dependencies** - - **Install from this repo:** +2. **Install** ```bash - pip install -e '.[presidio]' - ``` - - **Eventually this will be:** - ```bash - pip install openai-guardrails + pip install guardrails ``` 3. **Wrap your OpenAI client with Guardrails** diff --git a/docs/quickstart.md b/docs/quickstart.md index 9f184b5..b8a2e80 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -8,11 +8,6 @@ Get started with Guardrails in minutes. Guardrails provides drop-in replacements pip install guardrails ``` -For PII detection (optional): -```bash -pip install guardrails[presidio] -``` - ## Set API Key ```bash diff --git a/examples/basic/hello_world.py b/examples/basic/hello_world.py index 30dd928..820f4ea 100644 --- a/examples/basic/hello_world.py +++ b/examples/basic/hello_world.py @@ -10,13 +10,18 @@ console = Console() -# Pipeline configuration with input guardrails +# Pipeline configuration with pre_flight and input guardrails PIPELINE_CONFIG = { "version": 1, - "input": { + "pre_flight": { "version": 1, "guardrails": [ {"name": "Contains PII", "config": {"entities": ["US_SSN", "PHONE_NUMBER", "EMAIL_ADDRESS"]}}, + ], + }, + "input": { + "version": 1, + "guardrails": [ { "name": "Custom Prompt Check", "config": { diff --git a/pyproject.toml b/pyproject.toml index ff2034b..57efe4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,13 @@ readme = "README.md" requires-python = ">=3.11" license = "MIT" authors = [{ name = "OpenAI", email = "support@openai.com" }] -dependencies = ["openai>=1.75.0", "pydantic>=2.11.3", "openai-agents>=0.3.3"] +dependencies = [ + "openai>=1.75.0", + "pydantic>=2.11.3", + "openai-agents>=0.3.3", + "pip>=25.0.1", + "presidio-analyzer>=2.2.358", +] classifiers = [ "Typing :: Typed", "Intended Audience :: Developers", @@ -25,10 +31,6 @@ examples = [ "pillow>=11.2.1", "rich>=14.0.0", ] -presidio = [ - "pip>=25.0.1", # Required to load spaCy dependency for presidio-analyzer - "presidio-analyzer>=2.2.358", -] benchmark = [ "numpy>=1.24.0", "scikit-learn>=1.3.0", diff --git a/requirements.txt b/requirements.txt index d703202..ee09d83 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,45 +5,232 @@ annotated-types==0.7.0 anyio==4.9.0 # via # httpx + # mcp # openai + # sse-starlette + # starlette +attrs==25.3.0 + # via + # jsonschema + # referencing +blis==1.3.0 + # via thinc +catalogue==2.0.10 + # via + # spacy + # srsly + # thinc certifi==2025.4.26 # via # httpcore # httpx + # requests +charset-normalizer==3.4.3 + # via requests +click==8.3.0 + # via + # typer + # uvicorn +cloudpathlib==0.22.0 + # via weasel +colorama==0.4.6 + # via griffe +confection==0.1.5 + # via + # thinc + # weasel +cymem==2.0.11 + # via + # preshed + # spacy + # thinc distro==1.9.0 # via openai +filelock==3.19.1 + # via tldextract +griffe==1.14.0 + # via openai-agents h11==0.16.0 - # via httpcore + # via + # httpcore + # uvicorn httpcore==1.0.9 # via httpx httpx==0.28.1 - # via openai + # via + # mcp + # openai +httpx-sse==0.4.1 + # via mcp idna==3.10 # via # anyio # httpx + # requests + # tldextract +jinja2==3.1.6 + # via spacy jiter==0.9.0 # via openai -openai==1.77.0 +jsonschema==4.25.1 + # via mcp +jsonschema-specifications==2025.9.1 + # via jsonschema +langcodes==3.5.0 + # via spacy +language-data==1.3.0 + # via langcodes +marisa-trie==1.3.1 + # via language-data +markdown-it-py==4.0.0 + # via rich +markupsafe==3.0.3 + # via jinja2 +mcp==1.16.0 + # via openai-agents +mdurl==0.1.2 + # via markdown-it-py +murmurhash==1.0.13 + # via + # preshed + # spacy + # thinc +numpy==2.3.3 + # via + # blis + # spacy + # thinc +openai==1.109.1 + # via + # guardrails (pyproject.toml) + # openai-agents +openai-agents==0.3.3 + # via guardrails (pyproject.toml) +packaging==25.0 + # via + # spacy + # thinc + # weasel +phonenumbers==9.0.15 + # via presidio-analyzer +pip==25.2 + # via guardrails (pyproject.toml) +preshed==3.0.10 + # via + # spacy + # thinc +presidio-analyzer==2.2.360 # via guardrails (pyproject.toml) pydantic==2.11.4 # via # guardrails (pyproject.toml) + # confection + # mcp # openai + # openai-agents + # pydantic-settings + # spacy + # thinc + # weasel pydantic-core==2.33.2 # via pydantic +pydantic-settings==2.11.0 + # via mcp +pygments==2.19.2 + # via rich +python-dotenv==1.1.1 + # via pydantic-settings +python-multipart==0.0.20 + # via mcp +pyyaml==6.0.3 + # via presidio-analyzer +referencing==0.36.2 + # via + # jsonschema + # jsonschema-specifications +regex==2025.9.18 + # via presidio-analyzer +requests==2.32.5 + # via + # openai-agents + # requests-file + # spacy + # tldextract + # weasel +requests-file==2.1.0 + # via tldextract +rich==14.1.0 + # via typer +rpds-py==0.27.1 + # via + # jsonschema + # referencing +setuptools==80.9.0 + # via + # spacy + # thinc +shellingham==1.5.4 + # via typer +smart-open==7.3.1 + # via weasel sniffio==1.3.1 # via # anyio # openai +spacy==3.8.7 + # via presidio-analyzer +spacy-legacy==3.0.12 + # via spacy +spacy-loggers==1.0.5 + # via spacy +srsly==2.5.1 + # via + # confection + # spacy + # thinc + # weasel +sse-starlette==3.0.2 + # via mcp +starlette==0.48.0 + # via mcp +thinc==8.3.6 + # via spacy +tldextract==5.3.0 + # via presidio-analyzer tqdm==4.67.1 - # via openai + # via + # openai + # spacy +typer==0.19.2 + # via + # spacy + # weasel +types-requests==2.32.4.20250913 + # via openai-agents typing-extensions==4.13.2 # via - # anyio # openai + # openai-agents # pydantic # pydantic-core + # typer # typing-inspection typing-inspection==0.4.0 - # via pydantic + # via + # pydantic + # pydantic-settings +urllib3==2.5.0 + # via + # requests + # types-requests +uvicorn==0.37.0 + # via mcp +wasabi==1.1.3 + # via + # spacy + # thinc + # weasel +weasel==0.4.1 + # via spacy +wrapt==1.17.3 + # via smart-open diff --git a/src/guardrails/checks/text/pii.py b/src/guardrails/checks/text/pii.py index 3a8086e..08ab74d 100644 --- a/src/guardrails/checks/text/pii.py +++ b/src/guardrails/checks/text/pii.py @@ -77,8 +77,10 @@ from collections.abc import Sequence from dataclasses import dataclass from enum import Enum -from typing import TYPE_CHECKING, Any, Final +from typing import Any, Final +from presidio_analyzer import AnalyzerEngine, RecognizerResult +from presidio_analyzer.nlp_engine import NlpEngineProvider from pydantic import BaseModel, ConfigDict, Field from guardrails.registry import default_spec_registry @@ -89,9 +91,6 @@ logger = logging.getLogger(__name__) -if TYPE_CHECKING: - from presidio_analyzer import AnalyzerEngine, AnalyzerResult - @functools.lru_cache(maxsize=1) def _get_analyzer_engine() -> AnalyzerEngine: @@ -99,17 +98,7 @@ def _get_analyzer_engine() -> AnalyzerEngine: Returns: AnalyzerEngine: Initialized Presidio analyzer engine. - - Raises: - ImportError: If required Presidio packages are not installed. """ - try: - from presidio_analyzer import AnalyzerEngine - from presidio_analyzer.nlp_engine import NlpEngineProvider - except ImportError as e: - logger.error("Failed to import Presidio analyzer: %s", e) - raise ImportError("Presidio analyzer package is required") from e - # Define a smaller NLP configuration sm_nlp_config: Final[dict[str, Any]] = { "nlp_engine_name": "spacy", @@ -226,11 +215,11 @@ class PiiDetectionResult: Attributes: mapping (dict[str, list[str]]): Mapping from entity type to list of detected strings. - analyzer_results (Sequence[AnalyzerResult]): Raw analyzer results for position information. + analyzer_results (Sequence[RecognizerResult]): Raw analyzer results for position information. """ mapping: dict[str, list[str]] - analyzer_results: Sequence[AnalyzerResult] + analyzer_results: Sequence[RecognizerResult] def to_dict(self) -> dict[str, list[str]]: """Convert the result to a dictionary. diff --git a/src/guardrails/checks/text/secret_keys.py b/src/guardrails/checks/text/secret_keys.py index ea9db6c..f9dbd16 100644 --- a/src/guardrails/checks/text/secret_keys.py +++ b/src/guardrails/checks/text/secret_keys.py @@ -43,8 +43,9 @@ import functools import math import re -from typing import TYPE_CHECKING, Any, TypedDict +from typing import Any, TypedDict +from presidio_analyzer import AnalyzerEngine, Pattern, PatternRecognizer from pydantic import BaseModel, ConfigDict, Field, field_validator from guardrails.registry import default_spec_registry @@ -53,9 +54,6 @@ __all__ = ["secret_keys"] -if TYPE_CHECKING: - from presidio_analyzer import AnalyzerEngine - class SecretCfg(TypedDict, total=False): strict_mode: bool @@ -166,15 +164,6 @@ def _get_analyzer_engine() -> AnalyzerEngine: Returns: AnalyzerEngine: Initialized Presidio analyzer engine. """ - try: - from presidio_analyzer import AnalyzerEngine, Pattern, PatternRecognizer - except ImportError as e: - raise RuntimeError( - "The 'presidio_analyzer' library is not installed. This library is required " - "to use the `_get_analyzer_engine` function. Please install it using " - "'pip install presidio-analyzer'." - ) from e - engine = AnalyzerEngine() # Recognise file extensions so we can filter them out in non‑strict mode.