From 9d2255bbf1497b73c625465e878e0eb1a844f79e Mon Sep 17 00:00:00 2001 From: Emilio Garcia Date: Tue, 27 May 2025 13:42:40 -0400 Subject: [PATCH 1/2] use validate notebooks job from ci actions repo Signed-off-by: Emilio Garcia --- .github/workflows/instructlab-knowledge-e2e.yml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/instructlab-knowledge-e2e.yml b/.github/workflows/instructlab-knowledge-e2e.yml index 0223d47..ad318c8 100644 --- a/.github/workflows/instructlab-knowledge-e2e.yml +++ b/.github/workflows/instructlab-knowledge-e2e.yml @@ -35,11 +35,16 @@ jobs: run: | pip install ${{ env.TESTING_PIP_DEPENDENCIES }} ipython kernel install --name "python3" --user - - name: Validate The Notebook - working-directory: ./notebooks/instructlab-knowledge - run: | - git clone https://github.com/instructlab/instructlab.git - python3 instructlab/scripts/validate_notebook.py ${{ matrix.notebooks_to_test }} + - name: Checkout "validate-notebooks" in-house CI action + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: instructlab/ci-actions + path: ci-actions + sparse-checkout: actions/validate-notebooks + - name: Validate Jupyter Notebooks + uses: ./ci-actions/actions/validate-notebooks + with: + path: "./notebooks/instructlab-knowledge/${{ matrix.notebooks_to_test }}" - name: Start Inference Mock Server working-directory: ./tests/inference-mock run: | From 5382c9e0d5819a55384322e04bed2b36356e4643 Mon Sep 17 00:00:00 2001 From: Emilio Garcia Date: Mon, 2 Jun 2025 10:01:52 -0400 Subject: [PATCH 2/2] migrate inference mock to ci-actions Signed-off-by: Emilio Garcia --- .../workflows/instructlab-knowledge-e2e.yml | 24 +++---- notebooks/instructlab-knowledge/ci-config.yml | 7 +++ tests/inference-mock/app.py | 63 ------------------- tests/inference-mock/qna/__init__.py | 0 tests/inference-mock/qna/qna.py | 40 ------------ tests/inference-mock/requirements.txt | 1 - 6 files changed, 17 insertions(+), 118 deletions(-) create mode 100644 notebooks/instructlab-knowledge/ci-config.yml delete mode 100644 tests/inference-mock/app.py delete mode 100644 tests/inference-mock/qna/__init__.py delete mode 100644 tests/inference-mock/qna/qna.py delete mode 100644 tests/inference-mock/requirements.txt diff --git a/.github/workflows/instructlab-knowledge-e2e.yml b/.github/workflows/instructlab-knowledge-e2e.yml index ad318c8..652ed16 100644 --- a/.github/workflows/instructlab-knowledge-e2e.yml +++ b/.github/workflows/instructlab-knowledge-e2e.yml @@ -9,10 +9,6 @@ on: branches: - "main" -env: - # dependencies that need to be installed using pip for testing to work - TESTING_PIP_DEPENDENCIES: "papermill nbformat ipykernel" - jobs: end_to_end: strategy: @@ -33,25 +29,25 @@ jobs: cache: pip - name: Install Testing Tools run: | - pip install ${{ env.TESTING_PIP_DEPENDENCIES }} + pip install papermill nbformat ipykernel ipython kernel install --name "python3" --user - - name: Checkout "validate-notebooks" in-house CI action + - name: Checkout in-house CI actions uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - repository: instructlab/ci-actions + repository: iamemilio/ci-actions path: ci-actions - sparse-checkout: actions/validate-notebooks + ref: inference-mock + sparse-checkout: | + actions/validate-notebooks + actions/inference-mock - name: Validate Jupyter Notebooks uses: ./ci-actions/actions/validate-notebooks with: path: "./notebooks/instructlab-knowledge/${{ matrix.notebooks_to_test }}" - name: Start Inference Mock Server - working-directory: ./tests/inference-mock - run: | - pip install -r requirements.txt - nohup python app.py & - sleep 1 - echo "Inference mock server started on port 11434" + uses: ./ci-actions/actions/inference-mock + with: + config: ./notebooks/instructlab-knowledge/ci-config.yml - name: Run End To End Tests working-directory: ./notebooks/instructlab-knowledge # NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks diff --git a/notebooks/instructlab-knowledge/ci-config.yml b/notebooks/instructlab-knowledge/ci-config.yml new file mode 100644 index 0000000..32251b8 --- /dev/null +++ b/notebooks/instructlab-knowledge/ci-config.yml @@ -0,0 +1,7 @@ +# Inference Mock Config + +debug: False +matches: + - contains: + - 'I need you to generate three questions that must be answered only with information contained in this passage, and nothing else.' + response: '{"fact_single": "What are some common ways to assign rewards to partial answers?", "fact_single_answer": "There are three: prod, which takes the product of rewards across all steps; min, which selects the minimum reward over all steps; and last, which uses the reward from the final step.", "reasoning": "What is the best method for rewarding models?", "reasoning_answer": "That depends on whether the training data is prepared using MC rollout, human annotation, or model annotation.", "summary": "How does QWEN implement model reward?", "summary_answer": "Qwen computes the aggregate reward based on the entire partial reward trajectory. I also uses a method that feeds the performance reference model with partial answers, then only considering the final reward token."}' diff --git a/tests/inference-mock/app.py b/tests/inference-mock/app.py deleted file mode 100644 index ee568bb..0000000 --- a/tests/inference-mock/app.py +++ /dev/null @@ -1,63 +0,0 @@ -from flask import Flask, request, Response, jsonify -from werkzeug import exceptions -from qna.qna import MockFactGenerator - -import logging - -# Globals -app = Flask(__name__) -port_number = 11434 -app.logger.setLevel(logging.DEBUG) -fact_generator = MockFactGenerator(app.logger) - -def completion_response(content: str, model: str="gpt-3.5") -> dict: - response = { - "id": "chatcmpl-2nYZXNHxx1PeK1u8xXcE1Fqr1U6Ve", - "object": "chat.completion", - "created": "12345678", - "model": model, - "system_fingerprint": "fp_44709d6fcb", - "choices": [ - { - "text": content, - "content": content, - "index": 0, - "logprobs": None, - "finish_reason": "length" - }, - ], - "usage": { - "prompt_tokens": 10, - "completion_tokens": 20, - "total_tokens":30, - } - } - - return response - -# Routes -@app.route('/v1/completions', methods=['POST']) -def mock_generate(): - if not request.json or 'prompt' not in request.json: - issue = "prompt is empty or None" - app.logger.debug(issue) - raise exceptions.BadRequest(issue) - - prompt_debug_str = request.json['prompt'][:90] + "..." - app.logger.debug(f"{request.method} {request.url} {request.json['model']} {prompt_debug_str}") - if request.json == None: - raise exceptions.BadRequest("Bad Request: request json empty") - - prompt = request.json['prompt'] - - # handle prompt and generate correct response - completion = fact_generator.generatePair(prompt) - if not completion: - completion = "" - - response = jsonify(completion_response(completion, request.json['model'])) - app.logger.debug(f"response: {response}") - return response - -if __name__ == '__main__': - app.run(debug=True, port=port_number) diff --git a/tests/inference-mock/qna/__init__.py b/tests/inference-mock/qna/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/inference-mock/qna/qna.py b/tests/inference-mock/qna/qna.py deleted file mode 100644 index a9756f2..0000000 --- a/tests/inference-mock/qna/qna.py +++ /dev/null @@ -1,40 +0,0 @@ -import logging -import json -import threading - -''' -All possible responses must go here. The server will return one response per query, and iterate over them in a circular fashion. -You can use the empty response template below when adding a new response to this list. - -Since the chunks are randomly selected, there is not a good way to associate the responses to the chunks at this time. That should -not be necessary for basic testing anyway. -''' -response = { - "fact_single":"What are some common ways to assign rewards to partial answers?", - "fact_single_answer":"There are three: prod , which takes the product of rewards across all steps; min , which selects the minimum reward over all steps; and last, which uses the reward from the final step.", - "reasoning":"What is the best method for rewarding models?", - "reasoning_answer":"That depends on whether the training data is prepared using MC rollout, human annotation, or model annotation.", - "summary":"How does QWEN implement model reward?", - "summary_answer":"Qwen computes the aggregate reward based on the entire partial reward trajectory. I also uses a method that feeds the performance reference model with partial answers, then only considering the final reward token.", - } - - -fact_prompt_match = "I need you to generate three questions that must be answered only with information contained in this passage, and nothing else." - -class MockFactGenerator: - """ - MockFact Generator is a tool that mocks generating a specific sequence of facts. - """ - def __init__(self, logger:logging.Logger): - self.logger = logger - - def generatePair(self, prompt:str): - ''' - generatePair returns a JSON string if the prompt was a Q&A generation prompt, otherwise it returns None. - ''' - - if fact_prompt_match not in prompt: - self.logger.debug("prompt is not for Q&A gen") - return None - - return json.dumps(response) diff --git a/tests/inference-mock/requirements.txt b/tests/inference-mock/requirements.txt deleted file mode 100644 index 7e10602..0000000 --- a/tests/inference-mock/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -flask