diff --git a/.github/workflows/inference-mock.yml b/.github/workflows/inference-mock.yml new file mode 100644 index 0000000..8529543 --- /dev/null +++ b/.github/workflows/inference-mock.yml @@ -0,0 +1,35 @@ +name: Inference Mock Tests + +on: + pull_request: + branches: + - "main" + - "release-**" + paths: + - 'actions/inference-mock/**' + - '.github/workflows/inference-mock.yml' # This workflow + +jobs: + inference-mock-unit-tests: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11"] + steps: + - uses: actions/checkout@v4 + with: + sparse-checkout: | + actions/inference-mock + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + working-directory: actions/inference-mock + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Run Unit Tests + working-directory: actions/inference-mock + run: | + python -m unittest test/test.py diff --git a/.pylintrc b/.pylintrc index 22c302b..23f78a1 100644 --- a/.pylintrc +++ b/.pylintrc @@ -627,7 +627,7 @@ missing-member-max-choices=1 mixin-class-rgx=.*[Mm]ixin # List of decorators that change the signature of a decorated function. -signature-mutators=unittest.mock.patch,unittest.mock.patch.object +signature-mutators=unittest.mock.patch,unittest.mock.patch.object,click.decorators.option [VARIABLES] diff --git a/README.md b/README.md index 07aa875..5beade3 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Below is a list of the in-house GitHub actions stored in this repository: | [launch-ec2-runner-with-fallback](./actions/launch-ec2-runner-with-fallback/launch-ec2-runner-with-fallback.md) | Used launch an EC2 instance in AWS, either as a spot instance or a dedicated instance. If your preferred availability zone lacks availability for your instance type, "backup" availability zones will be tried. | | | [validate-notebooks](./actions/launch-ec2-runner-with-fallback/launch-ec2-runner-with-fallback.md) | Used to validate `.ipynb` files | | [update-constraints](./actions/update-constraints/update-constraints.md) | Used to update `constraints-dev.txt` file | +| [inference-mock](./actions/inference-mock/README.md) | Used to mock LLM calls | | ## ❓ How to Use One or More In-House GitHub Actions diff --git a/actions/inference-mock/README.md b/actions/inference-mock/README.md new file mode 100644 index 0000000..547aa04 --- /dev/null +++ b/actions/inference-mock/README.md @@ -0,0 +1,62 @@ +# Inference Mock + +## Overview + +Inference Mock is a tool that creates a flask server that runs as a background process. OpenAI comptabile calls can be made to its completions API. +Based on how the server is configured, it will send a set of programmed responses back. + +## When to Use it? + +Testing notebooks is difficult to do since you often don't write functions or unit tests in them. Instead, if you want to mock an LLM call and response, +this is an easy way to rig that up in your testing environment. This is best used for integration, unit, and smoke tests. This is obviously not a real +inference service, so its best used for testing code that makes occasional calls to an LLM to do a task. + +## Usage + +This is a reusable workflow, and can be referenced and used in any github actions workflow. First, you will need to make a config file. You can set the following fields: + +```yaml +# debug: enable debug logging and debug mode in flask +# optional: this defaults to False +debug: True + +# port: the port the server will listen on +# optional: this defaults to 11434 +port: 11434 + +# matches: a list of matching strategies for expected sets of prompt response pairs. The following strategies are available: +# - contains: accepts a list of substrings. All incoming prompts will need to contain all listed substrings for this match to be positive +# - response: passing only a response is an `Always` match strategy. If no other strategy has matched yet, this will always be a positive match. +# +# note: the strategies are executed in the order listed, and the first succesful match is accepted. If you start with an `Always` strategy, its +# response will be the only response returned. +matches: + + # this is an example of a `contains` strategy. If the prompt contains the substrings, it returns the response. + - contains: + - 'I need you to generate three questions that must be answered only with information contained in this passage, and nothing else.' + response: '{"fact_single": "What are some common ways to assign rewards to partial answers?", "fact_single_answer": "There are three: prod, which takes the product of rewards across all steps; min, which selects the minimum reward over all steps; and last, which uses the reward from the final step.", "reasoning": "What is the best method for rewarding models?", "reasoning_answer": "That depends on whether the training data is prepared using MC rollout, human annotation, or model annotation.", "summary": "How does QWEN implement model reward?", "summary_answer": "Qwen computes the aggregate reward based on the entire partial reward trajectory. I also uses a method that feeds the performance reference model with partial answers, then only considering the final reward token."}' + + # this is an example of an `Always` strategy. It will always match, and return this response. + - response: "hi I am the default response" +``` + +This config must be passed to this action as an input. Here is an example of a workflow that invokes this action to create a mock server. + +```yaml +jobs: + example-job: + runs-on: ubuntu-latest + steps: + - name: Checkout "inference-mock" in-house CI action + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: instructlab/ci-actions + path: ci-actions + sparse-checkout: | + actions/inference-mock + - name: Inference Mock + uses: ./ci-actions/actions/inference-mock + with: + config: "example-config.yml" +``` diff --git a/actions/inference-mock/action.yml b/actions/inference-mock/action.yml new file mode 100644 index 0000000..4c7b58b --- /dev/null +++ b/actions/inference-mock/action.yml @@ -0,0 +1,21 @@ +name: 'Inference Mock' +description: 'Creates and runs a server that returns mock Open AI completions as a background process' +author: "InstructLab" + +inputs: + config: + description: the path to a config.yml file for the inference mock server + required: true + type: string + +runs: + using: "composite" + steps: + - name: Install Dependencies + shell: bash + run: pip install -r ${{ github.action_path }}/requirements.txt + - name: Run Inference Mock Server + shell: bash + run: | + nohup python ${{ github.action_path }}/app.py --config ${{ inputs.config }} & + sleep 2 diff --git a/actions/inference-mock/app.py b/actions/inference-mock/app.py new file mode 100644 index 0000000..c957e4a --- /dev/null +++ b/actions/inference-mock/app.py @@ -0,0 +1,85 @@ +# Standard +from dataclasses import dataclass +import logging +import pprint + +# Third Party +from completions.completion import create_chat_completion +from flask import Flask, request # type: ignore[import-not-found] +from matching.matching import Matcher +from werkzeug import exceptions # type: ignore[import-not-found] +import click # type: ignore[import-not-found] +import yaml + +# Globals +app = Flask(__name__) +strategies: Matcher # a read only list of matching strategies + + +# Routes +@app.route("/v1/completions", methods=["POST"]) +def completions(): + data = request.get_json() + if not data or "prompt" not in data: + raise exceptions.BadRequest("prompt is empty or None") + + prompt = data.get("prompt") + prompt_debug_str = prompt + if len(prompt) > 90: + prompt_debug_str = data["prompt"][:90] + "..." + + app.logger.debug( + f"{request.method} {request.url} {data['model']} {prompt_debug_str}" + ) + + chat_response = strategies.find_match( + prompt + ) # handle prompt and generate correct response + + response = create_chat_completion(chat_response, model=data.get("model")) + app.logger.debug(f"response: {pprint.pformat(response, compact=True)}") + return response + + +# config +@dataclass +class Config: + matches: list[dict] + port: int = 11434 + debug: bool = False + + +@click.command() +@click.option( + "-c", + "--config", + "config", + type=click.File(mode="r", encoding="utf-8"), + required=True, + help="yaml config file", +) +def start_server(config): + # get config + yaml_data = yaml.safe_load(config) + if not isinstance(yaml_data, dict): + raise ValueError(f"config file {config} must be a set of key-value pairs") + + conf = Config(**yaml_data) + + # configure logger + if conf.debug: + app.logger.setLevel(logging.DEBUG) + app.logger.debug("debug mode enabled") + else: + app.logger.setLevel(logging.INFO) + + # create match strategy object + global strategies # pylint: disable=global-statement + strategies = Matcher(conf.matches) + + # init server + app.run(debug=conf.debug, port=conf.port) + + +if __name__ == "__main__": + start_server() # pylint: disable=no-value-for-parameter diff --git a/actions/inference-mock/completions/__init__.py b/actions/inference-mock/completions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/actions/inference-mock/completions/completion.py b/actions/inference-mock/completions/completion.py new file mode 100644 index 0000000..b238e03 --- /dev/null +++ b/actions/inference-mock/completions/completion.py @@ -0,0 +1,33 @@ +# mock openAI completion responses +# credit: https://github.com/openai/openai-python/issues/715#issuecomment-1809203346 +# License: MIT + +# Standard +import random + + +# TODO: use a library to return and validate completions so this doesn't need to be maintained +def create_chat_completion(content: str, model: str = "gpt-3.5") -> dict: + response = { + "id": "chatcmpl-2nYZXNHxx1PeK1u8xXcE1Fqr1U6Ve", + "object": "chat.completion", + "created": "12345678", + "model": model, + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "text": content, + "content": content, + "index": 0, + "logprobs": None, + "finish_reason": "length", + }, + ], + "usage": { + "prompt_tokens": random.randint(10, 500), + "completion_tokens": random.randint(10, 500), + "total_tokens": random.randint(10, 500), + }, + } + + return response diff --git a/actions/inference-mock/matching/__init__.py b/actions/inference-mock/matching/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/actions/inference-mock/matching/matching.py b/actions/inference-mock/matching/matching.py new file mode 100644 index 0000000..38efc3d --- /dev/null +++ b/actions/inference-mock/matching/matching.py @@ -0,0 +1,99 @@ +# Standard +from abc import abstractmethod +from typing import Protocol +import pprint + + +class Match(Protocol): + """ + Match represents a single prompt matching + strategy. When a match is successful, + the response is what should be returned. + """ + + response: str + + @abstractmethod + def match(self, prompt: str) -> str | None: + raise NotImplementedError + + +class Always: + """ + Always is a matching strategy that always + is a positive match on a given prompt. + + This is best used when only one prompt response + is expected. + """ + + def __init__(self, response: str): + self.response = response + + def match(self, prompt: str) -> str | None: + if prompt: + return self.response + return None + + +class Contains: + """ + Contains is a matching strategy that checks + if the prompt string contains all of + the substrings in the `contains` attribute. + """ + + contains: list[str] + + def __init__(self, contains: list[str], response: str): + if not contains or len(contains) == 0: + raise ValueError("contains must not be empty") + self.response = response + self.contains = contains + + def match(self, prompt: str) -> str | None: + if not prompt: + return None + for context in self.contains: + if context not in prompt: + return None + + return self.response + + +# helper function pulled out for easier testing +def to_match(pattern: dict) -> Match: + response = pattern.get("response") + if not response: + raise ValueError( + f"matching strategy must have a response: {pprint.pformat(pattern, compact=True)}" + ) + if "contains" in pattern: + return Contains(**pattern) + return Always(**pattern) + + +class Matcher: + """ + Matcher matches prompt context and then + selects a user provided reply. + """ + + strategies: list[Match] + + def __init__(self, matching_patterns: list[dict]): + if not matching_patterns: + raise ValueError( + "matching strategies must contain at least one Match strategy" + ) + + self.strategies: list[Match] = [] + for matching_pattern in matching_patterns: + self.strategies.append(to_match(matching_pattern)) + + def find_match(self, prompt: str) -> str: + for strategy in self.strategies: + response = strategy.match(prompt) + if response: + return response + return "" diff --git a/actions/inference-mock/requirements.txt b/actions/inference-mock/requirements.txt new file mode 100644 index 0000000..20d3485 --- /dev/null +++ b/actions/inference-mock/requirements.txt @@ -0,0 +1,4 @@ +flask +werkzeug +click +pyyaml diff --git a/actions/inference-mock/test/__init__.py b/actions/inference-mock/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/actions/inference-mock/test/test.py b/actions/inference-mock/test/test.py new file mode 100644 index 0000000..2157a67 --- /dev/null +++ b/actions/inference-mock/test/test.py @@ -0,0 +1,119 @@ +# Standard +import unittest + +# Third Party +from matching.matching import Always, Contains, Matcher, to_match + + +class TestAlways(unittest.TestCase): + # match on any prompt + def test_always(self): + expect_response = "expected response" + prompt = "example prompt" + always = Always(expect_response) + actual_response = always.match(prompt) + self.assertEqual(actual_response, expect_response) + + # reject empty prompts + def test_always_empty_prompt(self): + response = "expected response" + prompt = "" + always = Always(response) + actual_response = always.match(prompt) + self.assertIsNone(actual_response) + + +class TestContains(unittest.TestCase): + def test_contains(self): + expect_response = "expected response" + prompt = "example prompt" + match_on = ["example"] + contains = Contains(match_on, expect_response) + actual_response = contains.match(prompt) + self.assertEqual(actual_response, expect_response) + + def test_contains_many(self): + expect_response = "expected response" + prompt = "a much longer example prompt so we can match on many substring elements of this string" + match_on = ["example", "many substring elements", "match on"] + contains = Contains(match_on, expect_response) + actual_response = contains.match(prompt) + self.assertEqual(actual_response, expect_response) + + # if any substrings don't match, return None + def test_contains_mismatch(self): + response = "expected response" + prompt = "a much longer example prompt so we can match on many substring elements of this string" + match_on = ["example", "many substring elements", "match on", "banana"] + contains = Contains(match_on, response) + actual_response = contains.match(prompt) + self.assertIsNone(actual_response) + + # reject empty prompts + def test_contains_empty(self): + response = "expected response" + prompt = "" + match_on = ["example"] + contains = Contains(match_on, response) + actual_response = contains.match(prompt) + self.assertIsNone(actual_response) + + +class TestMatcher(unittest.TestCase): + def test_to_contains(self): + response = "I am a response" + substr = ["a", "b", "c"] + pattern = {"contains": substr, "response": response} + contains = to_match(pattern) + self.assertIsInstance(contains, Contains) + self.assertEqual(contains.response, response) + + def test_to_always(self): + response = "I am a response" + always_pattern = {"response": response} + always = to_match(always_pattern) + self.assertIsInstance(always, Always) + self.assertEqual(always.response, response) + + def test_to_invalid(self): + response = "I am a response" + invalid_pattern = {"banana": "foo", "response": response} + self.assertRaises(Exception, to_match, invalid_pattern) + + def test_find_match_contains(self): + expect_response = "I am a response" + substr = ["example", "p"] + patterns = [{"contains": substr, "response": expect_response}] + matcher = Matcher(patterns) + + prompt = "example prompt" + actual_response = matcher.find_match(prompt) + self.assertEqual(actual_response, expect_response) + + def test_find_match_always(self): + expect_response = "I am a response" + patterns = [{"response": expect_response}] + matcher = Matcher(patterns) + + prompt = "example prompt" + actual_response = matcher.find_match(prompt) + self.assertEqual(actual_response, expect_response) + + # test that order is preserved and responses fall back until a match or end of strategies + def test_find_match_fallback(self): + patterns = [ + { + "contains": ["this is the fallback response"], + "response": "a response you will not get", + }, + {"response": "this is the fallback response"}, + ] + matcher = Matcher(patterns) + always_response = matcher.find_match(prompt="example prompt") + self.assertEqual(always_response, "this is the fallback response") + contains_response = matcher.find_match(prompt="this is the fallback response") + self.assertEqual(contains_response, "a response you will not get") + + +if __name__ == "__main__": + unittest.main()