Merge branch 'main' into exception-handling

CalebCourier · CalebCourier · commit 10bf13d443e7 · 2023-12-15T10:23:17.000-06:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -142,48 +142,47 @@ jobs:
 
   Notebooks:
     runs-on: ubuntu-latest
-    if: ${{ always() && contains(join(needs.*.result, ','), 'success') }}
     needs: [Linting, Typing, Pytests]
-
+    strategy:
+      matrix:
+        # this line is automatically generated by the script in .github/workflows/scripts/update_notebook_matrix.sh
+        notebook: ["bug_free_python_code.ipynb","check_for_pii.ipynb","competitors_check.ipynb","extracting_entities.ipynb","generate_structured_data.ipynb","generate_structured_data_cohere.ipynb","guardrails_with_chat_models.ipynb","input_validation.ipynb","llamaindex-output-parsing.ipynb","no_secrets_in_generated_text.ipynb","provenance.ipynb","recipe_generation.ipynb","regex_validation.ipynb","response_is_on_topic.ipynb","secrets_detection.ipynb","select_choice_based_on_action.ipynb","streaming.ipynb","syntax_error_free_sql.ipynb","text_summarization_quality.ipynb","toxic_language.ipynb","translation_to_specific_language.ipynb","translation_with_quality_check.ipynb","valid_chess_moves.ipynb","value_within_distribution.ipynb"]
     env:
-      COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
-      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }}
-
+        COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }}
+        NLTK_DATA: /tmp/nltk_data
     steps:
-      - name: Checkout repository
-        uses: actions/checkout@v2
-
-      - name: Set up Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.11.x
-
-      - name: Poetry cache
-        uses: actions/cache@v3
-        with:
-          path: ~/.cache/pypoetry
-          key: poetry-cache-${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-${{ env.POETRY_VERSION }}
-
-      - name: Install Poetry
-        uses: snok/install-poetry@v1
-        with:
-          virtualenvs-create: true
-          virtualenvs-in-project: true
-          installer-parallel: true
-
-      - name: Install dependencies
-        run: |
-          make full;
-          poetry add openai==0.28.1 jupyter nbconvert cohere;
-          # pip install openai==0.28.1 jupyter nbconvert;
-          # pip install .;
-
-      - name: Check for pypdfium2
-        run: poetry run pip show pypdfium2
-
-      - name: Huggingface Hub Login
-        run: poetry run huggingface-cli login --token $HUGGINGFACE_API_KEY
-
-      - name: Execute notebooks and check for errors
-        run: ./.github/workflows/scripts/run_notebooks.sh
+        - name: Checkout repository
+          uses: actions/checkout@v2    
+        - name: Set up Python
+          uses: actions/setup-python@v2
+          with: 
+            python-version: 3.11.x
+        - name: Poetry cache
+          uses: actions/cache@v3
+          with:
+            path: ~/.cache/pypoetry
+            key: poetry-cache-${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-${{ env.POETRY_VERSION }}
+        - name: Install Poetry
+          uses: snok/install-poetry@v1
+          with:
+            virtualenvs-create: true
+            virtualenvs-in-project: true
+            installer-parallel: true
+        - name: Install dependencies
+          run: |
+            make full;
+            poetry add openai==0.28.1 jupyter nbconvert cohere;
+        - name: Check for pypdfium2
+          run: poetry run pip show pypdfium2
+        - name: Huggingface Hub Login
+          run: poetry run huggingface-cli login --token $HUGGINGFACE_API_KEY
+        - name: download nltk data
+          run: |
+            mkdir /tmp/nltk_data;
+            poetry run python -m nltk.downloader -d /tmp/nltk_data punkt;
+        - name: Use venv
+          run: source .venv/bin/activate
+        - name: Execute notebooks and check for errors
+          run: bash ./.github/workflows/scripts/run_notebooks.sh ${{ matrix.notebook }}
diff --git a/.github/workflows/examples_check.yml b/.github/workflows/examples_check.yml
@@ -8,48 +8,47 @@ on:
 jobs:
   execute_notebooks:
     runs-on: ubuntu-latest
-    
+    strategy:
+      matrix:
+        # this line is automatically generated by the script in .github/workflows/scripts/update_notebook_matrix.sh
+        notebook: ["bug_free_python_code.ipynb","check_for_pii.ipynb","competitors_check.ipynb","extracting_entities.ipynb","generate_structured_data.ipynb","generate_structured_data_cohere.ipynb","guardrails_with_chat_models.ipynb","input_validation.ipynb","llamaindex-output-parsing.ipynb","no_secrets_in_generated_text.ipynb","provenance.ipynb","recipe_generation.ipynb","regex_validation.ipynb","response_is_on_topic.ipynb","secrets_detection.ipynb","select_choice_based_on_action.ipynb","streaming.ipynb","syntax_error_free_sql.ipynb","text_summarization_quality.ipynb","toxic_language.ipynb","translation_to_specific_language.ipynb","translation_with_quality_check.ipynb","valid_chess_moves.ipynb","value_within_distribution.ipynb"]
     env:
       COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }}
-
-
+      NLTK_DATA: /tmp/nltk_data
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v2
-
+      uses: actions/checkout@v2    
     - name: Set up Python
       uses: actions/setup-python@v2
-      with:
+      with: 
         python-version: 3.11.x
-
     - name: Poetry cache
       uses: actions/cache@v3
       with:
         path: ~/.cache/pypoetry
         key: poetry-cache-${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-${{ env.POETRY_VERSION }}
-
     - name: Install Poetry
       uses: snok/install-poetry@v1
       with:
-          virtualenvs-create: true
-          virtualenvs-in-project: true
-          installer-parallel: true
-
+        virtualenvs-create: true
+        virtualenvs-in-project: true
+        installer-parallel: true
     - name: Install dependencies
       run: |
         make full;
         poetry add openai==0.28.1 jupyter nbconvert cohere;
-        # pip install openai==0.28.1 jupyter nbconvert;
-        # pip install .;
-
     - name: Check for pypdfium2
       run: poetry run pip show pypdfium2
-
     - name: Huggingface Hub Login
-      run: huggingface-cli login --token $HUGGINGFACE_API_KEY
-
+      run: poetry run huggingface-cli login --token $HUGGINGFACE_API_KEY
+    - name: download nltk data
+      run: |
+        mkdir /tmp/nltk_data;
+        poetry run python -m nltk.downloader -d /tmp/nltk_data punkt;
+    - name: Use venv
+      run: source .venv/bin/activate
     - name: Execute notebooks and check for errors
-      run: ./.github/workflows/scripts/run_notebooks.sh
+      run: bash ./.github/workflows/scripts/run_notebooks.sh ${{ matrix.notebook }}
 
diff --git a/.github/workflows/scripts/run_notebooks.sh b/.github/workflows/scripts/run_notebooks.sh
@@ -1,38 +1,20 @@
 #!/bin/bash
-
-mkdir /tmp/nltk_data;
-poetry run python -m nltk.downloader -d /tmp/nltk_data punkt;
 export NLTK_DATA=/tmp/nltk_data;
 
 cd docs/examples
 
-# Function to process a notebook
-process_notebook() {
-    notebook="$1"
-    invalid_notebooks=("valid_chess_moves.ipynb" "llamaindex-output-parsing.ipynb" "competitors_check.ipynb")
-    if [[ ! " ${invalid_notebooks[@]} " =~ " ${notebook} " ]]; then
-        echo "Processing $notebook..."
-        poetry run jupyter nbconvert --to notebook --execute "$notebook"
-        if [ $? -ne 0 ]; then
-            echo "Error found in $notebook"
-            echo "Error in $notebook. See logs for details." >> errors.txt
-        fi
-    fi
-}
-
-export -f process_notebook  # Export the function for parallel execution
-
-# Create a file to collect errors
-> errors.txt
+# Get the notebook name from the matrix variable
+notebook="$1"
 
-# Run in parallel
-ls *.ipynb | parallel process_notebook
+# Check if the notebook should be processed
+invalid_notebooks=("valid_chess_moves.ipynb" "llamaindex-output-parsing.ipynb" "competitors_check.ipynb")
+if [[ ! " ${invalid_notebooks[@]} " =~ " ${notebook} " ]]; then
+  echo "Processing $notebook..."
+  poetry run jupyter nbconvert --to notebook --execute "$notebook"
+  if [ $? -ne 0 ]; then
+    echo "Error found in $notebook"
+    echo "Error in $notebook. See logs for details." >> errors.txt
+  fi
+fi
 
-# Check if there were any errors
-if [ -s errors.txt ]; then
-    echo "Some notebooks had errors"
-    cat errors.txt
-    exit 1
-else
-    echo "All notebooks ran successfully."
-fi
+exit 0
diff --git a/.github/workflows/scripts/update_notebook_matrix.sh b/.github/workflows/scripts/update_notebook_matrix.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# Array to store notebook names
+notebook_names="["
+
+# Compile list of file names
+for file in $(ls docs/examples/*.ipynb); do
+  # Add the full filename with extension
+  filename=$(basename "$file")
+
+  notebook_names+="\"$filename\","
+done
+notebook_names="${notebook_names%,}]"
+
+# echo $notebook_names
+
+
+# find line that begins with "notebook:" and replace it with notebook: $notebook_names
+sed "s/notebook: \[.*\]/notebook: $notebook_names/" .github/workflows/examples_check.yml > .github/workflows/examples_check.yml.tmp
+mv .github/workflows/examples_check.yml.tmp .github/workflows/examples_check.yml
+
+sed "s/notebook: \[.*\]/notebook: $notebook_names/" .github/workflows/ci.yml > .github/workflows/ci.yml.tmp
+mv .github/workflows/ci.yml.tmp .github/workflows/ci.yml
diff --git a/Makefile b/Makefile
@@ -71,3 +71,4 @@ precommit:
 	# pytest -x -q --no-summary
 	pyright guardrails/
 	make lint
+	./github/workflows/scripts/update_notebook_matrix.sh
diff --git a/docs/llm_api_wrappers.md b/docs/llm_api_wrappers.md
@@ -86,7 +86,7 @@ guard = gd.Guard.from_rail(...)
 anthropic_client = Anthropic(api_key="my_api_key")
 
 # Wrap Anthropic API call
-raw_llm_output, guardrail_output = guard(
+raw_llm_output, guardrail_output, *rest = guard(
     anthropic_client.completions.create,
     prompt_params={
         "prompt_param_1": "value_1", 
@@ -100,6 +100,108 @@ raw_llm_output, guardrail_output = guard(
 ```
 
 
+## Hugging Face
+
+### Text Generation Models
+```py
+from guardrails import Guard
+from guardrails.validators import ValidLength, ToxicLanguage
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+# Create your prompt or starting text
+prompt = "Hello, I'm a language model,"
+
+# Setup torch
+torch_device = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Instantiate your tokenizer
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+
+# Instantiate your model
+model = AutoModelForCausalLM.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id).to(torch_device)
+
+# Customize your model inputs if desired.
+# If you don't pass and inputs (`input_ids`, `input_values`, `input_features`, or `pixel_values`)
+# We'll try to do something similar to below using the tokenizer and the prompt.
+# We strongly suggest passing in your own inputs.
+model_inputs = tokenizer(prompt, return_tensors="pt").to(torch_device)
+
+
+# Create the Guard
+guard = Guard.from_string(
+    validators=[
+        ValidLength(
+            min=48,
+            on_fail="fix"
+        ),
+        ToxicLanguage(
+            on_fail="fix"
+        )
+    ],
+    prompt=prompt
+)
+
+# Run the Guard
+response = guard(
+    llm_api=model.generate,
+    max_new_tokens=40,
+    tokenizer=tokenizer,
+    **model_inputs,
+)
+
+# Check the output
+if response.validation_passed:
+    print("validated_output: ", response.validated_output)
+else:
+    print("error: ", response.error)
+
+```
+
+### Pipelines
+```py
+from guardrails import Guard
+from guardrails.validators import ValidLength, ToxicLanguage
+import torch
+from transformers import pipeline
+
+
+# Create your prompt or starting text
+prompt = "What are we having for dinner?"
+
+# Setup pipeline
+generator = pipeline("text-generation", model="facebook/opt-350m")
+
+
+# Create the Guard
+guard = Guard.from_string(
+    validators=[
+        ValidLength(
+            min=48,
+            on_fail="fix"
+        ),
+        ToxicLanguage(
+            on_fail="fix"
+        )
+    ],
+    prompt=prompt
+)
+
+# Run the Guard
+response = guard(
+    llm_api=generator,
+    max_new_tokens=40
+)
+
+if response.validation_passed:
+    print("validated_output: ", response.validated_output)
+else:
+    print("error: ", response.error)
+
+```
+
+
 ## Using Manifest
 [Manifest](https://github.com/HazyResearch/manifest) is a wrapper around most model APIs and supports hosting local models. It can be used as a LLM API.
 
diff --git a/guardrails/llm_providers.py b/guardrails/llm_providers.py
diff --git a/tests/unit_tests/test_llm_providers.py b/tests/unit_tests/test_llm_providers.py