instructlab
diff --git a/‎.github/workflows/chunking.yml‎
Lines changed: 48 additions & 0 deletions b/‎.github/workflows/chunking.yml‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎.github/workflows/conversion.yml‎
Lines changed: 48 additions & 0 deletions b/‎.github/workflows/conversion.yml‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎.github/workflows/instructlab-knowledge-e2e.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/instructlab-knowledge-e2e.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/seed-data-for-knowledge.yml‎
Lines changed: 60 additions & 0 deletions b/‎.github/workflows/seed-data-for-knowledge.yml‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 2 deletions b/‎.gitignore‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎model-customization/data-processing/chunking/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎model-customization/data-processing/chunking/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎model-customization/data-processing/chunking/data/sample-docling-json/2022-nfl-rulebook.json‎
Lines changed: 1 addition & 0 deletions b/‎model-customization/data-processing/chunking/data/sample-docling-json/2022-nfl-rulebook.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎model-customization/data-processing/chunking/data/sample-docling-json/2023-nfl-rulebook.json‎
Lines changed: 1 addition & 0 deletions b/‎model-customization/data-processing/chunking/data/sample-docling-json/2023-nfl-rulebook.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎model-customization/data-processing/chunking/data/sample-docling-json/inference-time-scaling.json‎
Lines changed: 1 addition & 0 deletions b/‎model-customization/data-processing/chunking/data/sample-docling-json/inference-time-scaling.json‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,48 @@
+# This workflow is designed to test the notebooks in "notebooks/chunking"
+# This job can be coppied for end to end tests of notebooks in other working directories
+
+name: Chunking Tests
+
+# trigger on open pull requests only
+on:
+    pull_request:
+        branches:
+          - "main"
+
+env:
+    # dependencies that need to be installed using pip for testing to work
+    TESTING_PIP_DEPENDENCIES: "papermill nbformat ipykernel"
+
+jobs:
+    end_to_end:
+        strategy:
+            matrix:
+                # add notebooks within the "model-customization/data-processing/chunking" repo to end to end test here
+                notebooks_to_test: ["docling-chunking.ipynb"]
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@v3
+            - name: Set up Python
+              uses: actions/setup-python@v5
+              with:
+                python-version: '3.12'
+                cache: pip
+            - name: Install Testing Tools
+              run: |
+                  pip install ${{ env.TESTING_PIP_DEPENDENCIES }}
+                  ipython kernel install --name "python3" --user
+            - name: Checkout "validate-notebooks" in-house CI action
+              uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+              with:
+                repository: instructlab/ci-actions
+                path: ci-actions
+                ref: v0.2.0
+                sparse-checkout: actions/validate-notebooks
+            - name: Validate Jupyter Notebooks
+              uses: ./ci-actions/actions/validate-notebooks
+              with:
+                path: "./model-customization/data-processing/chunking/${{ matrix.notebooks_to_test }}"
+            - name: Run End To End Tests
+              working-directory: ./model-customization/data-processing/chunking
+              # NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks
+              run: papermill ${{ matrix.notebooks_to_test }} ${{ matrix.notebooks_to_test }}.tmp
@@ -0,0 +1,48 @@
+# This workflow is designed to test the notebooks in "notebooks/conversion"
+# This job can be coppied for end to end tests of notebooks in other working directories
+
+name: Conversion End to End Tests
+
+# trigger on open pull requests only
+on:
+    pull_request:
+        branches:
+          - "main"
+
+env:
+    # dependencies that need to be installed using pip for testing to work
+    TESTING_PIP_DEPENDENCIES: "papermill nbformat ipykernel"
+
+jobs:
+    end_to_end:
+        strategy:
+            matrix:
+                # add notebooks within the "notebooks/instructlab-knowledge" repo to end to end test here
+                notebooks_to_test: ["docling-conversion.ipynb"]
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@v3
+            - name: Set up Python
+              uses: actions/setup-python@v5
+              with:
+                python-version: '3.12'
+                cache: pip
+            - name: Install Testing Tools
+              run: |
+                  pip install ${{ env.TESTING_PIP_DEPENDENCIES }}
+                  ipython kernel install --name "python3" --user
+            - name: Checkout "validate-notebooks" in-house CI action
+              uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+              with:
+                repository: instructlab/ci-actions
+                path: ci-actions
+                ref: v0.2.0
+                sparse-checkout: actions/validate-notebooks
+            - name: Validate Jupyter Notebooks
+              uses: ./ci-actions/actions/validate-notebooks
+              with:
+                path: "./model-customization/data-processing/conversion/${{ matrix.notebooks_to_test }}"
+            - name: Run End To End Tests
+              working-directory: ./model-customization/data-processing/conversion
+              # NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks
+              run: papermill ${{ matrix.notebooks_to_test }} ${{ matrix.notebooks_to_test }}.tmp
@@ -46,7 +46,7 @@ jobs:
             - name: Validate Jupyter Notebooks
               uses: ./ci-actions/actions/validate-notebooks
               with:
-                path: "./notebooks/instructlab-knowledge/${{ matrix.notebooks_to_test }}"
+                path: "./quick-starts/instructlab-knowledge/${{ matrix.notebooks_to_test }}"
             - name: Start Inference Mock Server
               working-directory: ./tests/inference-mock
               run: |
@@ -55,6 +55,6 @@ jobs:
                 sleep 1
                 echo "Inference mock server started on port 11434"
             - name: Run End To End Tests
-              working-directory: ./notebooks/instructlab-knowledge
+              working-directory: ./quick-starts/instructlab-knowledge
               # NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks
               run: papermill ${{ matrix.notebooks_to_test }} ${{ matrix.notebooks_to_test }}.tmp
@@ -0,0 +1,60 @@
+# This workflow is designed to test the notebooks in "notebooks/chunking"
+# This job can be coppied for end to end tests of notebooks in other working directories
+
+name: Seed Data for knowledge Tests
+
+# trigger on open pull requests only
+on:
+    pull_request:
+        branches:
+          - "main"
+
+env:
+    # dependencies that need to be installed using pip for testing to work
+    TESTING_PIP_DEPENDENCIES: "papermill nbformat ipykernel"
+
+jobs:
+    end_to_end:
+        strategy:
+            matrix:
+                # add notebooks within the "model-customization/data-processing/seed-data-for-knowledge" repo to end to end test here
+                notebooks_to_test: ["seed-data-for-knowledge.ipynb"]
+        runs-on: ubuntu-latest
+        env:
+            # customize the workflow here
+            MODEL_ENDPOINT_URL: "http://127.0.0.1:11434/v1"
+            MODEL_API_KEY: "none"
+            MODEL_NAME: "Mixtral-8x7B" # must be open-AI compatible when using inference mock
+        steps:
+            - uses: actions/checkout@v3
+            - name: Set up Python
+              uses: actions/setup-python@v5
+              with:
+                python-version: '3.12'
+                cache: pip
+            - name: Install Testing Tools
+              run: |
+                  pip install ${{ env.TESTING_PIP_DEPENDENCIES }}
+                  ipython kernel install --name "python3" --user
+            - name: Checkout "validate-notebooks" in-house CI action
+              uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+              with:
+                repository: instructlab/ci-actions
+                path: ci-actions
+                ref: v0.2.0
+                sparse-checkout: actions/validate-notebooks
+            - name: Validate Jupyter Notebooks
+              uses: ./ci-actions/actions/validate-notebooks
+              with:
+                path: "./model-customization/data-processing/seed-data-for-knowledge/${{ matrix.notebooks_to_test }}"
+            - name: Start Inference Mock Server
+              working-directory: ./tests/inference-mock
+              run: |
+                pip install -r requirements.txt
+                nohup python app.py &
+                sleep 1
+                echo "Inference mock server started on port 11434"
+            - name: Run End To End Tests
+              working-directory: ./model-customization/data-processing/seed-data-for-knowledge
+              # NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks
+              run: papermill ${{ matrix.notebooks_to_test }} ${{ matrix.notebooks_to_test }}.tmp
@@ -1,3 +1,3 @@
 .DS_Store
-notebooks/instructlab-knowledge/workspaces/*
-!notebooks/instructlab-knowledge/workspaces/.gitkeep
+quick-starts/instructlab-knowledge/workspaces/*
+!quick-starts/instructlab-knowledge/workspaces/.gitkeep
@@ -0,0 +1,2 @@
+./data/output
+.ipynb_checkpoints
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+./data/output`
	`2`	`+.ipynb_checkpoints`