From 9d2255bbf1497b73c625465e878e0eb1a844f79e Mon Sep 17 00:00:00 2001
From: Emilio Garcia <i.am.emilio@gmail.com>
Date: Tue, 27 May 2025 13:42:40 -0400
Subject: [PATCH 1/2] use validate notebooks job from ci actions repo

Signed-off-by: Emilio Garcia <i.am.emilio@gmail.com>
---
 .github/workflows/instructlab-knowledge-e2e.yml | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/instructlab-knowledge-e2e.yml b/.github/workflows/instructlab-knowledge-e2e.yml
index 0223d47..ad318c8 100644
--- a/.github/workflows/instructlab-knowledge-e2e.yml
+++ b/.github/workflows/instructlab-knowledge-e2e.yml
@@ -35,11 +35,16 @@ jobs:
               run: |
                   pip install ${{ env.TESTING_PIP_DEPENDENCIES }}
                   ipython kernel install --name "python3" --user
-            - name: Validate The Notebook
-              working-directory: ./notebooks/instructlab-knowledge
-              run: |
-                git clone https://github.com/instructlab/instructlab.git
-                python3 instructlab/scripts/validate_notebook.py ${{ matrix.notebooks_to_test }}
+            - name: Checkout "validate-notebooks" in-house CI action
+              uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+              with:
+                repository: instructlab/ci-actions
+                path: ci-actions
+                sparse-checkout: actions/validate-notebooks
+            - name: Validate Jupyter Notebooks
+              uses: ./ci-actions/actions/validate-notebooks
+              with:
+                path: "./notebooks/instructlab-knowledge/${{ matrix.notebooks_to_test }}"
             - name: Start Inference Mock Server
               working-directory: ./tests/inference-mock
               run: |

From 5382c9e0d5819a55384322e04bed2b36356e4643 Mon Sep 17 00:00:00 2001
From: Emilio Garcia <i.am.emilio@gmail.com>
Date: Mon, 2 Jun 2025 10:01:52 -0400
Subject: [PATCH 2/2] migrate inference mock to ci-actions

Signed-off-by: Emilio Garcia <i.am.emilio@gmail.com>
---
 .../workflows/instructlab-knowledge-e2e.yml   | 24 +++----
 notebooks/instructlab-knowledge/ci-config.yml |  7 +++
 tests/inference-mock/app.py                   | 63 -------------------
 tests/inference-mock/qna/__init__.py          |  0
 tests/inference-mock/qna/qna.py               | 40 ------------
 tests/inference-mock/requirements.txt         |  1 -
 6 files changed, 17 insertions(+), 118 deletions(-)
 create mode 100644 notebooks/instructlab-knowledge/ci-config.yml
 delete mode 100644 tests/inference-mock/app.py
 delete mode 100644 tests/inference-mock/qna/__init__.py
 delete mode 100644 tests/inference-mock/qna/qna.py
 delete mode 100644 tests/inference-mock/requirements.txt

diff --git a/.github/workflows/instructlab-knowledge-e2e.yml b/.github/workflows/instructlab-knowledge-e2e.yml
index ad318c8..652ed16 100644
--- a/.github/workflows/instructlab-knowledge-e2e.yml
+++ b/.github/workflows/instructlab-knowledge-e2e.yml
@@ -9,10 +9,6 @@ on:
         branches:
           - "main"
 
-env:
-    # dependencies that need to be installed using pip for testing to work
-    TESTING_PIP_DEPENDENCIES: "papermill nbformat ipykernel"
-
 jobs:
     end_to_end:
         strategy:
@@ -33,25 +29,25 @@ jobs:
                 cache: pip
             - name: Install Testing Tools
               run: |
-                  pip install ${{ env.TESTING_PIP_DEPENDENCIES }}
+                  pip install papermill nbformat ipykernel
                   ipython kernel install --name "python3" --user
-            - name: Checkout "validate-notebooks" in-house CI action
+            - name: Checkout in-house CI actions
               uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
               with:
-                repository: instructlab/ci-actions
+                repository: iamemilio/ci-actions
                 path: ci-actions
-                sparse-checkout: actions/validate-notebooks
+                ref: inference-mock
+                sparse-checkout: |
+                  actions/validate-notebooks
+                  actions/inference-mock
             - name: Validate Jupyter Notebooks
               uses: ./ci-actions/actions/validate-notebooks
               with:
                 path: "./notebooks/instructlab-knowledge/${{ matrix.notebooks_to_test }}"
             - name: Start Inference Mock Server
-              working-directory: ./tests/inference-mock
-              run: |
-                pip install -r requirements.txt
-                nohup python app.py &
-                sleep 1
-                echo "Inference mock server started on port 11434"
+              uses: ./ci-actions/actions/inference-mock
+              with:
+                config: ./notebooks/instructlab-knowledge/ci-config.yml
             - name: Run End To End Tests
               working-directory: ./notebooks/instructlab-knowledge
               # NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks
diff --git a/notebooks/instructlab-knowledge/ci-config.yml b/notebooks/instructlab-knowledge/ci-config.yml
new file mode 100644
index 0000000..32251b8
--- /dev/null
+++ b/notebooks/instructlab-knowledge/ci-config.yml
@@ -0,0 +1,7 @@
+# Inference Mock Config
+
+debug: False
+matches:
+  - contains:
+    - 'I need you to generate three questions that must be answered only with information contained in this passage, and nothing else.'
+    response: '{"fact_single": "What are some common ways to assign rewards to partial answers?", "fact_single_answer": "There are three: prod, which takes the product of rewards across all steps; min, which selects the minimum reward over all steps; and last, which uses the reward from the final step.", "reasoning": "What is the best method for rewarding models?", "reasoning_answer": "That depends on whether the training data is prepared using MC rollout, human annotation, or model annotation.", "summary": "How does QWEN implement model reward?", "summary_answer": "Qwen computes the aggregate reward based on the entire partial reward trajectory. I also uses a method that feeds the performance reference model with partial answers, then only considering the final reward token."}'
diff --git a/tests/inference-mock/app.py b/tests/inference-mock/app.py
deleted file mode 100644
index ee568bb..0000000
--- a/tests/inference-mock/app.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from flask import Flask, request, Response, jsonify
-from werkzeug import exceptions
-from qna.qna import MockFactGenerator
-
-import logging
-
-# Globals
-app = Flask(__name__)
-port_number = 11434
-app.logger.setLevel(logging.DEBUG)
-fact_generator = MockFactGenerator(app.logger)
-
-def completion_response(content: str, model: str="gpt-3.5") -> dict:
-    response = {
-        "id": "chatcmpl-2nYZXNHxx1PeK1u8xXcE1Fqr1U6Ve",
-        "object": "chat.completion",
-        "created": "12345678",
-        "model": model,
-        "system_fingerprint": "fp_44709d6fcb",
-        "choices": [
-            {
-                "text": content,
-                "content": content,
-                "index": 0,
-                "logprobs": None,
-                "finish_reason": "length"
-            },
-        ],
-        "usage": {
-            "prompt_tokens": 10,
-            "completion_tokens": 20,
-            "total_tokens":30,
-        }
-    }
-
-    return response
-
-# Routes
-@app.route('/v1/completions', methods=['POST'])
-def mock_generate():
-    if not request.json or 'prompt' not in request.json:
-        issue = "prompt is empty or None"
-        app.logger.debug(issue)
-        raise exceptions.BadRequest(issue)
-
-    prompt_debug_str = request.json['prompt'][:90] + "..."
-    app.logger.debug(f"{request.method} {request.url} {request.json['model']} {prompt_debug_str}")
-    if request.json == None:
-        raise exceptions.BadRequest("Bad Request: request json empty")
-    
-    prompt = request.json['prompt']
-    
-    # handle prompt and generate correct response
-    completion = fact_generator.generatePair(prompt)
-    if not completion:
-        completion = ""
-
-    response = jsonify(completion_response(completion, request.json['model']))
-    app.logger.debug(f"response: {response}")
-    return response
-
-if __name__ == '__main__':
-    app.run(debug=True, port=port_number)
diff --git a/tests/inference-mock/qna/__init__.py b/tests/inference-mock/qna/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/tests/inference-mock/qna/qna.py b/tests/inference-mock/qna/qna.py
deleted file mode 100644
index a9756f2..0000000
--- a/tests/inference-mock/qna/qna.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import logging
-import json
-import threading
-
-'''
-All possible responses must go here. The server will return one response per query, and iterate over them in a circular fashion.
-You can use the empty response template below when adding a new response to this list.
-
-Since the chunks are randomly selected, there is not a good way to associate the responses to the chunks at this time. That should
-not be necessary for basic testing anyway.
-'''
-response = {
-        "fact_single":"What are some common ways to assign rewards to partial answers?",
-        "fact_single_answer":"There are three: prod , which takes the product of rewards across all steps; min , which selects the minimum reward over all steps; and last, which uses the reward from the final step.",
-        "reasoning":"What is the best method for rewarding models?",
-        "reasoning_answer":"That depends on whether the training data is prepared using MC rollout, human annotation, or model annotation.",
-        "summary":"How does QWEN implement model reward?",
-        "summary_answer":"Qwen computes the aggregate reward based on the entire partial reward trajectory. I also uses a method that feeds the performance reference model with partial answers, then only considering the final reward token.",
-    }
-   
-
-fact_prompt_match = "I need you to generate three questions that must be answered only with information contained in this passage, and nothing else."
-
-class MockFactGenerator:
-    """
-    MockFact Generator is a tool that mocks generating a specific sequence of facts.
-    """
-    def __init__(self, logger:logging.Logger):
-        self.logger = logger
-
-    def generatePair(self, prompt:str):
-        ''' 
-        generatePair returns a JSON string if the prompt was a Q&A generation prompt, otherwise it returns None.
-        '''
-
-        if fact_prompt_match not in prompt:
-            self.logger.debug("prompt is not for Q&A gen")
-            return None
-        
-        return json.dumps(response)
diff --git a/tests/inference-mock/requirements.txt b/tests/inference-mock/requirements.txt
deleted file mode 100644
index 7e10602..0000000
--- a/tests/inference-mock/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-flask