Skip to content

Commit a08ed1d

Browse files
committed
Refactor examples repo into deep-dives and quick-starts
Model-customization repo has deep dives for conversion, subset selection, chunking, and seed data creation. Quick starts include existing instructlab-knowledge notebook. Signed-off-by: Ali Maredia <[email protected]>
1 parent ee41913 commit a08ed1d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+2113
-5
lines changed

.github/workflows/chunking.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# This workflow is designed to test the notebooks in "notebooks/chunking"
2+
# This job can be coppied for end to end tests of notebooks in other working directories
3+
4+
name: Chunking Tests
5+
6+
# trigger on open pull requests only
7+
on:
8+
pull_request:
9+
branches:
10+
- "main"
11+
12+
env:
13+
# dependencies that need to be installed using pip for testing to work
14+
TESTING_PIP_DEPENDENCIES: "papermill nbformat ipykernel"
15+
16+
jobs:
17+
end_to_end:
18+
strategy:
19+
matrix:
20+
# add notebooks within the "model-customization/data-processing/chunking" repo to end to end test here
21+
notebooks_to_test: ["docling-chunking.ipynb"]
22+
runs-on: ubuntu-latest
23+
steps:
24+
- uses: actions/checkout@v3
25+
- name: Set up Python
26+
uses: actions/setup-python@v5
27+
with:
28+
python-version: '3.12'
29+
cache: pip
30+
- name: Install Testing Tools
31+
run: |
32+
pip install ${{ env.TESTING_PIP_DEPENDENCIES }}
33+
ipython kernel install --name "python3" --user
34+
- name: Checkout "validate-notebooks" in-house CI action
35+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
36+
with:
37+
repository: instructlab/ci-actions
38+
path: ci-actions
39+
ref: v0.2.0
40+
sparse-checkout: actions/validate-notebooks
41+
- name: Validate Jupyter Notebooks
42+
uses: ./ci-actions/actions/validate-notebooks
43+
with:
44+
path: "./model-customization/data-processing/chunking/${{ matrix.notebooks_to_test }}"
45+
- name: Run End To End Tests
46+
working-directory: ./model-customization/data-processing/chunking
47+
# NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks
48+
run: papermill ${{ matrix.notebooks_to_test }} ${{ matrix.notebooks_to_test }}.tmp

.github/workflows/conversion.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# This workflow is designed to test the notebooks in "notebooks/conversion"
2+
# This job can be coppied for end to end tests of notebooks in other working directories
3+
4+
name: Conversion End to End Tests
5+
6+
# trigger on open pull requests only
7+
on:
8+
pull_request:
9+
branches:
10+
- "main"
11+
12+
env:
13+
# dependencies that need to be installed using pip for testing to work
14+
TESTING_PIP_DEPENDENCIES: "papermill nbformat ipykernel"
15+
16+
jobs:
17+
end_to_end:
18+
strategy:
19+
matrix:
20+
# add notebooks within the "notebooks/instructlab-knowledge" repo to end to end test here
21+
notebooks_to_test: ["docling-conversion.ipynb"]
22+
runs-on: ubuntu-latest
23+
steps:
24+
- uses: actions/checkout@v3
25+
- name: Set up Python
26+
uses: actions/setup-python@v5
27+
with:
28+
python-version: '3.12'
29+
cache: pip
30+
- name: Install Testing Tools
31+
run: |
32+
pip install ${{ env.TESTING_PIP_DEPENDENCIES }}
33+
ipython kernel install --name "python3" --user
34+
- name: Checkout "validate-notebooks" in-house CI action
35+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
36+
with:
37+
repository: instructlab/ci-actions
38+
path: ci-actions
39+
ref: v0.2.0
40+
sparse-checkout: actions/validate-notebooks
41+
- name: Validate Jupyter Notebooks
42+
uses: ./ci-actions/actions/validate-notebooks
43+
with:
44+
path: "./model-customization/data-processing/conversion/${{ matrix.notebooks_to_test }}"
45+
- name: Run End To End Tests
46+
working-directory: ./model-customization/data-processing/conversion
47+
# NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks
48+
run: papermill ${{ matrix.notebooks_to_test }} ${{ matrix.notebooks_to_test }}.tmp

.github/workflows/instructlab-knowledge-e2e.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
- name: Validate Jupyter Notebooks
4747
uses: ./ci-actions/actions/validate-notebooks
4848
with:
49-
path: "./notebooks/instructlab-knowledge/${{ matrix.notebooks_to_test }}"
49+
path: "./quick-starts/instructlab-knowledge/${{ matrix.notebooks_to_test }}"
5050
- name: Start Inference Mock Server
5151
working-directory: ./tests/inference-mock
5252
run: |
@@ -55,6 +55,6 @@ jobs:
5555
sleep 1
5656
echo "Inference mock server started on port 11434"
5757
- name: Run End To End Tests
58-
working-directory: ./notebooks/instructlab-knowledge
58+
working-directory: ./quick-starts/instructlab-knowledge
5959
# NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks
6060
run: papermill ${{ matrix.notebooks_to_test }} ${{ matrix.notebooks_to_test }}.tmp
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# This workflow is designed to test the notebooks in "notebooks/chunking"
2+
# This job can be coppied for end to end tests of notebooks in other working directories
3+
4+
name: Seed Data for knowledge Tests
5+
6+
# trigger on open pull requests only
7+
on:
8+
pull_request:
9+
branches:
10+
- "main"
11+
12+
env:
13+
# dependencies that need to be installed using pip for testing to work
14+
TESTING_PIP_DEPENDENCIES: "papermill nbformat ipykernel"
15+
16+
jobs:
17+
end_to_end:
18+
strategy:
19+
matrix:
20+
# add notebooks within the "model-customization/data-processing/seed-data-for-knowledge" repo to end to end test here
21+
notebooks_to_test: ["seed-data-for-knowledge.ipynb"]
22+
runs-on: ubuntu-latest
23+
env:
24+
# customize the workflow here
25+
MODEL_ENDPOINT_URL: "http://127.0.0.1:11434/v1"
26+
MODEL_API_KEY: "none"
27+
MODEL_NAME: "Mixtral-8x7B" # must be open-AI compatible when using inference mock
28+
steps:
29+
- uses: actions/checkout@v3
30+
- name: Set up Python
31+
uses: actions/setup-python@v5
32+
with:
33+
python-version: '3.12'
34+
cache: pip
35+
- name: Install Testing Tools
36+
run: |
37+
pip install ${{ env.TESTING_PIP_DEPENDENCIES }}
38+
ipython kernel install --name "python3" --user
39+
- name: Checkout "validate-notebooks" in-house CI action
40+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
41+
with:
42+
repository: instructlab/ci-actions
43+
path: ci-actions
44+
ref: v0.2.0
45+
sparse-checkout: actions/validate-notebooks
46+
- name: Validate Jupyter Notebooks
47+
uses: ./ci-actions/actions/validate-notebooks
48+
with:
49+
path: "./model-customization/data-processing/seed-data-for-knowledge/${{ matrix.notebooks_to_test }}"
50+
- name: Start Inference Mock Server
51+
working-directory: ./tests/inference-mock
52+
run: |
53+
pip install -r requirements.txt
54+
nohup python app.py &
55+
sleep 1
56+
echo "Inference mock server started on port 11434"
57+
- name: Run End To End Tests
58+
working-directory: ./model-customization/data-processing/seed-data-for-knowledge
59+
# NOTE: for this to work, cells with parameters need to be tagged as parameters in the target notebooks
60+
run: papermill ${{ matrix.notebooks_to_test }} ${{ matrix.notebooks_to_test }}.tmp

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
.DS_Store
2-
notebooks/instructlab-knowledge/workspaces/*
3-
!notebooks/instructlab-knowledge/workspaces/.gitkeep
2+
quick-starts/instructlab-knowledge/workspaces/*
3+
!quick-starts/instructlab-knowledge/workspaces/.gitkeep
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
./data/output
2+
.ipynb_checkpoints

model-customization/data-processing/chunking/data/sample-docling-json/2022-nfl-rulebook.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

model-customization/data-processing/chunking/data/sample-docling-json/2023-nfl-rulebook.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

model-customization/data-processing/chunking/data/sample-docling-json/inference-time-scaling.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)