instructlab · mergify · May 30, 2025 · May 21, 2025 · May 29, 2025 · May 29, 2025
diff --git a/.github/actions/run-smoke/action.yml b/.github/actions/run-smoke/action.yml
@@ -29,7 +29,7 @@ runs:
       run: |
         python${{ inputs.python-version }} -m venv --upgrade-deps venv
         . venv/bin/activate
-        pip install tox
+        pip install tox -c constraints-dev.txt
 
     # flash-attn has a bug in the setup.py that causes pip to attempt
     # installing it before torch is installed. This is a bug because their
@@ -41,7 +41,7 @@ runs:
       run: |
         source venv/bin/activate
         # The list is taken from the pull request linked above
-        pip install torch packaging setuptools wheel psutil ninja
+        pip install torch packaging setuptools wheel psutil ninja -c constraints-dev.txt
 
     - name: "Install tox-current-env to reuse the venv with pre-installed build dependencies"
       shell: bash
@@ -54,8 +54,9 @@ runs:
       run: |
         source venv/bin/activate
         tox -e py3-smoke --print-deps-to-file=./deps.txt
-        pip install -r ./deps.txt --no-build-isolation
-        pip install .
+        pip_install="pip install -c constraints-dev.txt"
+        $pip_install -r ./deps.txt --no-build-isolation
+        $pip_install .
 
     - name: "Show disk utilization BEFORE tests"
       shell: bash

diff --git a/.github/workflows/constraints-update.yml b/.github/workflows/constraints-update.yml
@@ -0,0 +1,35 @@
+# Aligned with: https://github.com/instructlab/dev-docs/pull/198
+name: Update constraints-dev.txt
+
+on:
+  schedule:
+    - cron: '0 3 * * 1'  # Every Monday at 03:00 UTC
+  workflow_dispatch:
+
+jobs:
+  update-constraints:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Checkout "update-constraints" in-house CI action
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          repository: instructlab/ci-actions
+          path: ci-actions
+          # no tag that includes https://github.com/instructlab/ci-actions/pull/26, yet
+          ref: 88641ccaf122964eacdc1a82b18bda369b6f99bd # main
+          sparse-checkout: |
+            actions/update-constraints
+
+      - name: Update constraints
+        id: update-constraints
+        uses: ./ci-actions/actions/update-constraints
+        with:
+          gh-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+
diff --git a/.github/workflows/e2e-nvidia-l40s-x4.yml b/.github/workflows/e2e-nvidia-l40s-x4.yml
@@ -177,27 +177,28 @@ jobs:
       - name: Install ilab
         working-directory: ./instructlab
         run: |
-          export CUDA_HOME="/usr/local/cuda"
-          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
-          export PATH="$PATH:$CUDA_HOME/bin"
-          python3.11 -m venv --upgrade-deps venv
-          . venv/bin/activate
-          nvidia-smi
-          python3.11 -m pip cache remove llama_cpp_python
-
-          CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install . -c constraints-dev.txt
-
-          # https://github.com/instructlab/instructlab/issues/1821
-          # install with Torch and build dependencies installed
-          python3.11 -m pip install packaging wheel setuptools-scm
-          python3.11 -m pip install .[cuda] -r requirements-vllm-cuda.txt
+          PYTHON=python3.11 ./scripts/install-ilab-with-cuda.sh
 
       - name: Update instructlab-training library
         working-directory: ./training
         run: |
           . ../instructlab/venv/bin/activate
-          pip install .
-          pip install .[cuda]
+          # Patch out our own pin from the ilab repo constraints file
+          ilab_constraints=../instructlab/constraints-dev.txt
+          sed -i '/instructlab-training==/d' $ilab_constraints
+          # Since we reuse the virtual environment prepared using ilab
+          # constraints, we should stick to the same constraints when
+          # installing latest training.
+          #
+          # FIX: this is not ideal; a proper fix would require decoupling the
+          # two repos in CI: either by removing the job completely and relying
+          # on "sdk" (no ilab) test runs; or by preparing a separate
+          # constraints file that would consider both the requirements files
+          # for the training library AND for the ilab - so that they are
+          # consistent.
+          pip_install="pip install -c $ilab_constraints"
+          $pip_install .
+          $pip_install .[cuda]
 
       - name: Check disk before tests
         if: always()
@@ -380,7 +381,7 @@ jobs:
         working-directory: ./training
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements-dev.txt
+          pip install -r requirements-dev.txt -c constraints-dev.txt
 
       - name: Try to upload Phase 1 to s3
         id: phase-1-upload-s3

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -80,7 +80,7 @@ jobs:
       - name: Install tox
         run: |
           python -m pip install --upgrade pip
-          python -m pip install tox tox-gh
+          python -m pip install tox tox-gh -c constraints-dev.txt
 
       - name: "${{ matrix.lint.name }}"
         run: |

diff --git a/.github/workflows/unit.yaml b/.github/workflows/unit.yaml
@@ -72,7 +72,7 @@ jobs:
         run: |
           python -m venv --upgrade-deps venv
           . venv/bin/activate
-          pip install tox
+          pip install tox -c constraints-dev.txt
 
       - name: "Show disk utilization BEFORE tests"
         if: always()

diff --git a/constraints-dev.txt b/constraints-dev.txt
@@ -0,0 +1,193 @@
+accelerate==1.0.1         # via -r requirements-cuda.txt, -r requirements-rocm.txt, peft, trl
+aiofiles==24.1.0          # via -r requirements.txt
+aiohappyeyeballs==2.6.1   # via aiohttp
+aiohttp==3.12.2           # via fsspec
+aiosignal==1.3.2          # via aiohttp
+annotated-types==0.7.0    # via pydantic
+anyio==4.9.0              # via httpx, jupyter-server
+argon2-cffi==23.1.0       # via jupyter-server
+argon2-cffi-bindings==21.2.0  # via argon2-cffi
+arrow==1.3.0              # via isoduration
+astroid==3.3.10           # via pylint
+asttokens==3.0.0          # via stack-data
+async-lru==2.0.5          # via jupyterlab
+attrs==25.3.0             # via aiohttp, jsonschema, referencing
+babel==2.17.0             # via jupyterlab-server
+beautifulsoup4==4.13.4    # via nbconvert
+bitsandbytes==0.46.0      # via -r requirements-cuda.txt
+bleach==6.2.0             # via nbconvert
+cachetools==6.0.0         # via tox
+certifi==2025.4.26        # via httpcore, httpx, requests
+cffi==1.17.1              # via argon2-cffi-bindings
+cfgv==3.4.0               # via pre-commit
+chardet==5.2.0            # via tox
+charset-normalizer==3.4.2  # via requests
+colorama==0.4.6           # via tox
+comm==0.2.2               # via ipykernel, ipywidgets
+contourpy==1.3.2          # via matplotlib
+cycler==0.12.1            # via matplotlib
+datasets==3.6.0           # via -r requirements.txt, trl
+debugpy==1.8.14           # via ipykernel
+decorator==5.2.1          # via ipython
+deepspeed==0.16.9         # via -r requirements-deepspeed.txt
+defusedxml==0.7.1         # via nbconvert
+dill==0.3.8               # via datasets, multiprocess, pylint
+distlib==0.3.9            # via virtualenv
+einops==0.8.1             # via deepspeed, flash-attn
+executing==2.2.0          # via stack-data
+fastjsonschema==2.21.1    # via nbformat
+filelock==3.18.0          # via datasets, huggingface-hub, torch, tox, transformers, virtualenv
+flash-attn==2.7.4.post1   # via -r requirements-cuda.txt, -r requirements-rocm.txt
+fonttools==4.58.1         # via matplotlib
+fqdn==1.5.1               # via jsonschema
+frozenlist==1.6.0         # via aiohttp, aiosignal
+fsspec==2025.3.0          # via datasets, huggingface-hub, torch
+h11==0.16.0               # via httpcore
+hf-xet==1.1.2             # via huggingface-hub
+hjson==3.1.0              # via deepspeed
+httpcore==1.0.9           # via httpx
+httpx==0.28.1             # via jupyterlab
+huggingface-hub==0.32.2   # via -r requirements-dev.txt, accelerate, datasets, peft, tokenizers, transformers
+identify==2.6.12          # via pre-commit
+idna==3.10                # via anyio, httpx, jsonschema, requests, yarl
+instructlab-dolomite==0.2.0  # via -r requirements.txt
+ipykernel==6.29.5         # via -r requirements-dev.txt, jupyter, jupyter-console, jupyterlab
+ipython==9.2.0            # via -r requirements-dev.txt, ipykernel, ipywidgets, jupyter-console
+ipython-pygments-lexers==1.1.1  # via ipython
+ipywidgets==8.1.7         # via jupyter
+isoduration==20.11.0      # via jsonschema
+isort==6.0.1              # via -r requirements-dev.txt, pylint
+jedi==0.19.2              # via ipython
+jinja2==3.1.6             # via jupyter-server, jupyterlab, jupyterlab-server, nbconvert, torch
+json5==0.12.0             # via jupyterlab-server
+jsonpointer==3.0.0        # via jsonschema
+jsonschema==4.24.0        # via jupyter-events, jupyterlab-server, nbformat
+jsonschema-specifications==2025.4.1  # via jsonschema
+jupyter==1.1.1            # via -r requirements-dev.txt
+jupyter-client==8.6.3     # via ipykernel, jupyter-console, jupyter-server, nbclient
+jupyter-console==6.6.3    # via jupyter
+jupyter-core==5.8.1       # via ipykernel, jupyter-client, jupyter-console, jupyter-server, jupyterlab, nbclient, nbconvert, nbformat
+jupyter-events==0.12.0    # via jupyter-server
+jupyter-lsp==2.2.5        # via jupyterlab
+jupyter-server==2.16.0    # via jupyter-lsp, jupyterlab, jupyterlab-server, notebook, notebook-shim
+jupyter-server-terminals==0.5.3  # via jupyter-server
+jupyterlab==4.4.3         # via jupyter, notebook
+jupyterlab-pygments==0.3.0  # via nbconvert
+jupyterlab-server==2.27.3  # via jupyterlab, notebook
+jupyterlab-widgets==3.0.15  # via ipywidgets
+kiwisolver==1.4.8         # via matplotlib
+liger-kernel==0.5.10      # via -r requirements-cuda.txt
+llvmlite==0.44.0          # via numba
+markdown-it-py==3.0.0     # via rich
+markupsafe==3.0.2         # via jinja2, nbconvert
+matplotlib==3.10.3        # via -r requirements-dev.txt
+matplotlib-inline==0.1.7  # via ipykernel, ipython
+mccabe==0.7.0             # via pylint
+mdurl==0.1.2              # via markdown-it-py
+mistune==3.1.3            # via nbconvert
+mpmath==1.3.0             # via sympy
+msgpack==1.1.0            # via deepspeed
+multidict==6.4.4          # via aiohttp, yarl
+multiprocess==0.70.16     # via datasets
+nbclient==0.10.2          # via nbconvert
+nbconvert==7.16.6         # via jupyter, jupyter-server
+nbformat==5.10.4          # via jupyter-server, nbclient, nbconvert
+nest-asyncio==1.6.0       # via ipykernel
+networkx==3.4.2           # via torch
+ninja==1.11.1.4           # via deepspeed
+nodeenv==1.9.1            # via pre-commit
+notebook==7.4.3           # via jupyter
+notebook-shim==0.2.4      # via jupyterlab, notebook
+numba==0.61.2             # via -r requirements.txt
+numpy==1.26.4             # via -r requirements-dev.txt, -r requirements.txt, accelerate, bitsandbytes, contourpy, datasets, deepspeed, matplotlib, numba, pandas, peft, transformers
+nvidia-cublas-cu12==12.4.5.8  # via nvidia-cudnn-cu12, nvidia-cusolver-cu12, torch
+nvidia-cuda-cupti-cu12==12.4.127  # via torch
+nvidia-cuda-nvrtc-cu12==12.4.127  # via torch
+nvidia-cuda-runtime-cu12==12.4.127  # via torch
+nvidia-cudnn-cu12==9.1.0.70  # via torch
+nvidia-cufft-cu12==11.2.1.3  # via torch
+nvidia-curand-cu12==10.3.5.147  # via torch
+nvidia-cusolver-cu12==11.6.1.9  # via torch
+nvidia-cusparse-cu12==12.3.1.170  # via nvidia-cusolver-cu12, torch
+nvidia-cusparselt-cu12==0.6.2  # via torch
+nvidia-ml-py==12.575.51   # via deepspeed
+nvidia-nccl-cu12==2.21.5  # via torch
+nvidia-nvjitlink-cu12==12.4.127  # via nvidia-cusolver-cu12, nvidia-cusparse-cu12, torch
+nvidia-nvtx-cu12==12.4.127  # via torch
+overrides==7.7.0          # via jupyter-server
+packaging==25.0           # via -r requirements.txt, accelerate, datasets, deepspeed, huggingface-hub, ipykernel, jupyter-events, jupyter-server, jupyterlab, jupyterlab-server, matplotlib, nbconvert, peft, pyproject-api, tox, transformers
+pandas==2.2.3             # via datasets
+pandocfilters==1.5.1      # via nbconvert
+parso==0.8.4              # via jedi
+peft==0.15.2              # via -r requirements.txt
+pexpect==4.9.0            # via ipython
+pillow==11.2.1            # via matplotlib
+platformdirs==4.3.8       # via jupyter-core, pylint, tox, virtualenv
+pluggy==1.6.0             # via tox
+pre-commit==4.2.0         # via -r requirements-dev.txt
+prometheus-client==0.22.0  # via jupyter-server
+prompt-toolkit==3.0.51    # via ipython, jupyter-console
+propcache==0.3.1          # via aiohttp, yarl
+psutil==7.0.0             # via accelerate, deepspeed, ipykernel, peft
+ptyprocess==0.7.0         # via pexpect, terminado
+pure-eval==0.2.3          # via stack-data
+py-cpuinfo==9.0.0         # via -r requirements.txt, deepspeed
+pyarrow==20.0.0           # via datasets
+pycparser==2.22           # via cffi
+pydantic==2.11.5          # via -r requirements.txt, deepspeed, pylint-pydantic
+pydantic-core==2.33.2     # via pydantic
+pygments==2.19.1          # via ipython, ipython-pygments-lexers, jupyter-console, nbconvert, rich
+pylint==3.3.7             # via -r requirements-dev.txt, pylint-plugin-utils, pylint-pydantic
+pylint-plugin-utils==0.8.2  # via pylint-pydantic
+pylint-pydantic==0.3.5    # via -r requirements-dev.txt
+pyparsing==3.2.3          # via matplotlib
+pyproject-api==1.9.1      # via tox
+python-dateutil==2.9.0.post0  # via arrow, jupyter-client, matplotlib, pandas
+python-json-logger==3.3.0  # via jupyter-events
+pytz==2025.2              # via pandas
+pyyaml==6.0.2             # via -r requirements.txt, accelerate, datasets, huggingface-hub, jupyter-events, peft, pre-commit, transformers
+pyzmq==26.4.0             # via ipykernel, jupyter-client, jupyter-console, jupyter-server
+referencing==0.36.2       # via jsonschema, jsonschema-specifications, jupyter-events
+regex==2024.11.6          # via transformers
+requests==2.32.3          # via datasets, huggingface-hub, jupyterlab-server, transformers
+rfc3339-validator==0.1.4  # via jsonschema, jupyter-events
+rfc3986-validator==0.1.1  # via jsonschema, jupyter-events
+rich==14.0.0              # via -r requirements.txt
+rpds-py==0.25.1           # via jsonschema, referencing
+ruff==0.11.11             # via -r requirements-dev.txt
+safetensors==0.5.3        # via accelerate, instructlab-dolomite, peft, transformers
+send2trash==1.8.3         # via jupyter-server
+six==1.17.0               # via python-dateutil, rfc3339-validator
+sniffio==1.3.1            # via anyio
+soupsieve==2.7            # via beautifulsoup4
+stack-data==0.6.3         # via ipython
+sympy==1.13.1             # via torch
+terminado==0.18.1         # via jupyter-server, jupyter-server-terminals
+tinycss2==1.4.0           # via bleach
+tokenizers==0.21.1        # via transformers
+tomlkit==0.13.2           # via pylint
+torch==2.6.0              # via -c constraints-dev.txt.in, -r requirements.txt, accelerate, bitsandbytes, deepspeed, flash-attn, instructlab-dolomite, liger-kernel, peft
+tornado==6.5.1            # via ipykernel, jupyter-client, jupyter-server, jupyterlab, notebook, terminado
+tox==4.26.0               # via -r requirements-dev.txt, tox-current-env
+tox-current-env==0.0.16   # via -r requirements-dev.txt
+tqdm==4.67.1              # via datasets, deepspeed, huggingface-hub, peft, transformers
+traitlets==5.14.3         # via comm, ipykernel, ipython, ipywidgets, jupyter-client, jupyter-console, jupyter-core, jupyter-events, jupyter-server, jupyterlab, matplotlib-inline, nbclient, nbconvert, nbformat
+transformers==4.52.3      # via -r requirements.txt, instructlab-dolomite, peft, trl
+triton==3.2.0             # via liger-kernel, torch
+trl==0.18.0               # via -r requirements.txt
+types-python-dateutil==2.9.0.20250516  # via arrow
+typing-extensions==4.13.2  # via anyio, beautifulsoup4, huggingface-hub, ipython, pydantic, pydantic-core, referencing, torch, typing-inspection
+typing-inspection==0.4.1  # via pydantic
+tzdata==2025.2            # via pandas
+uri-template==1.3.0       # via jsonschema
+urllib3==2.4.0            # via requests
+virtualenv==20.31.2       # via pre-commit, tox
+wcwidth==0.2.13           # via prompt-toolkit
+webcolors==24.11.1        # via jsonschema
+webencodings==0.5.1       # via bleach, tinycss2
+websocket-client==1.8.0   # via jupyter-server
+wheel==0.45.1             # via -r requirements.txt
+widgetsnbextension==4.0.14  # via ipywidgets
+xxhash==3.5.0             # via datasets
+yarl==1.20.0              # via aiohttp
+setuptools==80.9.0        # via jupyterlab
diff --git a/constraints-dev.txt.in b/constraints-dev.txt.in
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# These are synchronized with instructlab repo; we have to keep them in sync at
+# least until we no longer tie training repo CI with ilab repo through e2e jobs.
+torch<2.7.0
+vllm<0.9.0