Merge branch 'instructlab:main' into add-longbench

eshwarprasadS · web-flow · commit ba0ec6cffbf5 · 2025-05-28T11:59:42.000-04:00
diff --git a/.github/workflows/e2e-nvidia-l4-x1.yml b/.github/workflows/e2e-nvidia-l4-x1.yml
@@ -112,19 +112,38 @@ jobs:
         working-directory: ./instructlab
         run: |
           export CUDA_HOME="/usr/local/cuda"
-          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
+          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64"
           export PATH="$PATH:$CUDA_HOME/bin"
           python3.11 -m venv --upgrade-deps venv
           . venv/bin/activate
           nvidia-smi
           python3.11 -m pip cache remove llama_cpp_python
 
-          CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install -v .
+          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
 
-          # https://github.com/instructlab/instructlab/issues/1821
-          # install with Torch and build dependencies installed
-          python3.11 -m pip install -v packaging wheel setuptools-scm
-          python3.11 -m pip install -v .[cuda] -r requirements-vllm-cuda.txt
+          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
+
+          # pre-install some build dependencies
+          $pip_install packaging wheel setuptools-scm
+
+          # flash-attn has a bug in the setup.py that causes pip to attempt installing it
+          # before torch is installed. This is a bug because their setup.py depends on
+          # importing the module, so it should have been listed in build_requires. Alas!
+          #
+          # See: https://github.com/Dao-AILab/flash-attention/pull/958
+          # Also: https://github.com/instructlab/instructlab/issues/1821
+          #
+          # first, pre-install flash-attn build dependencies
+          $pip_install torch packaging setuptools wheel psutil ninja
+
+          # now build flash-attn using the pre-installed build dependencies; this will
+          # guarantee that the build version of torch will match the runtime version of
+          # torch; otherwise, all kinds of problems may occur, like missing symbols when
+          # accessing C extensions and such
+          $pip_install flash-attn --no-build-isolation
+
+          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
+          $pip_install .[cuda] -r requirements-vllm-cuda.txt
         
       - name: Update instructlab-eval library
         working-directory: ./eval
diff --git a/.github/workflows/e2e-nvidia-l40s-x4.yml b/.github/workflows/e2e-nvidia-l40s-x4.yml
@@ -143,19 +143,36 @@ jobs:
         working-directory: ./instructlab
         run: |
           export CUDA_HOME="/usr/local/cuda"
-          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
+          export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64"
           export PATH="$PATH:$CUDA_HOME/bin"
           python3.11 -m venv --upgrade-deps venv
           . venv/bin/activate
           nvidia-smi
           python3.11 -m pip cache remove llama_cpp_python
 
-          CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install .
-
-          # https://github.com/instructlab/instructlab/issues/1821
-          # install with Torch and build dependencies installed
-          python3.11 -m pip install packaging wheel setuptools-scm
-          python3.11 -m pip install .[cuda] -r requirements-vllm-cuda.txt
+          pip_install="python3.11 -m pip install -v -c constraints-dev.txt"
+
+          # pre-install some build dependencies
+          $pip_install packaging wheel setuptools-scm
+
+          # flash-attn has a bug in the setup.py that causes pip to attempt installing it
+          # before torch is installed. This is a bug because their setup.py depends on
+          # importing the module, so it should have been listed in build_requires. Alas!
+          #
+          # See: https://github.com/Dao-AILab/flash-attention/pull/958
+          # Also: https://github.com/instructlab/instructlab/issues/1821
+          #
+          # first, pre-install flash-attn build dependencies
+          $pip_install torch packaging setuptools wheel psutil ninja
+
+          # now build flash-attn using the pre-installed build dependencies; this will
+          # guarantee that the build version of torch will match the runtime version of
+          # torch; otherwise, all kinds of problems may occur, like missing symbols when
+          # accessing C extensions and such
+          $pip_install flash-attn --no-build-isolation
+
+          CMAKE_ARGS="-DGGML_CUDA=on" $pip_install .
+          $pip_install .[cuda] -r requirements-vllm-cuda.txt
 
       - name: Update instructlab-eval library
         working-directory: ./eval
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -72,7 +72,7 @@ jobs:
         uses: ./.github/actions/free-disk-space
 
       - name: Setup Python 3.11
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
         with:
           python-version: 3.11
           cache: pip
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -45,7 +45,6 @@ jobs:
       fail-fast: false
       matrix:
         python:
-          - "3.10"
           - "3.11"
         platform:
           - "ubuntu-latest"
@@ -79,7 +78,7 @@ jobs:
           brew install expect coreutils bash
 
       - name: Setup Python ${{ matrix.python }}
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
         with:
           python-version: ${{ matrix.python }}
           cache: pip
diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml
@@ -7,6 +7,7 @@ config:
   code-block-style: false
   no-duplicate-header: false
   single-trailing-newline: false
+  descriptive-link-text: false
 globs:
   - "**/*.md"
 ignores:
diff --git a/.pylintrc b/.pylintrc
@@ -90,7 +90,7 @@ persistent=yes
 
 # Minimum Python version to use for version dependent checks. Will default to
 # the version used to run pylint.
-py-version=3.10
+py-version=3.11
 
 # Discover python modules and packages in the file system subtree.
 recursive=no
diff --git a/README.md b/README.md
@@ -90,14 +90,14 @@ A teacher model is used to generate new multiple choice questions based on the k
 
 ## Development
 
-> **⚠️ Note:** Must use Python version 3.10 or later.
+> **⚠️ Note:** Must use Python version 3.11 or later.
 
 ### Set up your dev environment
 
 The following tools are required:
 
 - [`git`](https://git-scm.com)
-- [`python`](https://www.python.org) (v3.10 or v3.11)
+- [`python`](https://www.python.org) (v3.11)
 - [`pip`](https://pypi.org/project/pip/) (v23.0+)
 - [`bash`](https://www.gnu.org/software/bash/) (v5+, for functional tests)
 
diff --git a/docs/ci.md b/docs/ci.md
@@ -0,0 +1,60 @@
+# CI for InstructLab Eval
+
+Before running any testing locally, ensure you have run `pip install -r requirements-dev.txt` in your environment.
+
+## Unit tests
+
+Unit tests are designed to test specific Eval components or features in isolation. Generally, new code should be adding or modifying unit tests.
+
+All unit tests currently live in the `tests/` directory and are run with [pytest](https://docs.pytest.org/) via [tox](https://tox.wiki/).
+
+To run the unit tests, you can run `tox -e unit` or `tox -e unitcov` if you want to generate coverage metrics as well.
+
+In CI, the tests are run with Python 3.10 - 3.11 on Ubuntu and MacOS runners - you can see the details [here](https://github.com/instructlab/eval/blob/main/.github/workflows/test.yml)
+
+## Functional tests
+
+Functional tests are designed to test Eval components or features in tandem, but not necessarily as part of a complex workflow. New code may or may not need a functional test but should strive to implement one if possible.
+
+The functional test script is Shell-based and can be found at `scripts/functional-tests.sh`.
+
+To run the functional tests, you can run `tox -e functional`.
+
+In CI, the tests are run with Python 3.10 - 3.11 on Ubuntu and MacOS runners - you can see the details [here](https://github.com/instructlab/eval/blob/main/.github/workflows/test.yml)
+
+## End-to-end (E2E) tests
+
+InstructLab Eval has several end-to-end jobs that run to ensure compatibility with the [InstructLab Core](https://github.com/instructlab/instructlab) project.
+You can see details about the types of jobs being run in the matrix below.
+
+For more details about the E2E scripts themselves, see [the InstructLab Core documentation](https://github.com/instructlab/instructlab/blob/main/docs/maintainers/ci.md#end-to-end-e2e-tests).
+
+### Current E2E Jobs
+
+| Name | T-Shirt Size | Runner Host | Instance Type | OS | GPU Type | Script | Flags | Runs when? | Slack/Discord reporting? |
+| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
+| [`e2e-nvidia-l4-x1.yml`](https://github.com/instructlab/sdg/blob/main/.github/workflows/e2e-nvidia-l4-x1.yml) | Medium | AWS |[`g6.8xlarge`](https://aws.amazon.com/ec2/instance-types/g5/) | CentOS Stream 9 | 1 x NVIDIA L4 w/ 24 GB VRAM | `e2e-ci.sh` | `m` | Pull Requests, Push to `main` or `release-*` branch | No |
+| [`e2e-nvidia-l40s-x4.yml`](https://github.com/instructlab/sdg/blob/main/.github/workflows/e2e-nvidia-l40s-x4.yml) | Large | AWS |[`g6e.12xlarge`](https://aws.amazon.com/ec2/instance-types/g6e/) | CentOS Stream 9 | 4 x NVIDIA L40S w/ 48 GB VRAM (192 GB) | `e2e-ci.sh` | `l` | Manually by Maintainers, Automatically against `main` branch at 4PM UTC | Yes |
+
+### Discord/Slack reporting
+
+Some E2E jobs send their results to the channel `#e2e-ci-results` via the `Son of Jeeves` bot in both Discord and Slack. You can see which jobs currently have reporting via the "Current E2E Jobs" table above.
+
+In Slack, this has been implemented via [the official Slack GitHub Action](https://github.com/slackapi/slack-github-action?tab=readme-ov-file#technique-2-slack-app).
+In Discord, we use [actions/actions-status-discord](https://github.com/sarisia/actions-status-discord) and the built-in channel webhooks feature.
+
+### Triggering an E2E job via GitHub Web UI
+
+For the E2E jobs that can be launched manually, they take an input field that
+specifies the PR number or git branch to run them against. If you run them
+against a PR, they will automatically post a comment to the PR when the tests
+begin and end so it's easier for those involved in the PR to follow the results.
+
+1. Visit the [Actions tab](https://github.com/instructlab/eval/actions).
+2. Click on one of the E2E workflows on the left side of the page.
+3. Click on the `Run workflow` button on the right side of the page.
+4. Enter a branch name or a PR number in the input field.
+5. Click the green `Run workflow` button.
+
+> [!NOTE]
+> Only users with "Write" permissions to the repo can run CI jobs manually
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,7 +12,7 @@ authors = [
 description = "Evaluation"
 readme = "README.md"
 license = {text = "Apache-2.0"}
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Environment :: Console",
@@ -22,8 +22,6 @@ classifiers = [
     "Operating System :: POSIX :: Linux",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: Implementation :: CPython",
diff --git a/tox.ini b/tox.ini
@@ -103,4 +103,3 @@ passenv =
 [gh]
 python =
     3.11 = py311-{unitcov, functional}
-    3.10 = py310-{unitcov, functional}