Skip to content

Commit a5c6449

Browse files
Merge branch 'akshath/issue-1056-v2' of github.com:akshathmangudi/lighteval into akshath/issue-1056-v2
2 parents 3138c7c + a5caa1b commit a5c6449

File tree

9 files changed

+568
-106
lines changed

9 files changed

+568
-106
lines changed
Lines changed: 103 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,109 +1,112 @@
1-
name: PR Style Bot
1+
# name: PR Style Bot
22

3-
on:
4-
issue_comment:
5-
types: [created]
3+
# on:
4+
# issue_comment:
5+
# types: [created]
66

7-
permissions:
8-
contents: write
9-
pull-requests: write
7+
# permissions:
8+
# contents: write
9+
# pull-requests: write
1010

11-
jobs:
12-
run-style-bot:
13-
if: >
14-
contains(github.event.comment.body, '@bot /style') &&
15-
github.event.issue.pull_request != null
11+
# jobs:
12+
# run-style-bot:
13+
# if: >
14+
# contains(github.event.comment.body, '@bot /style') &&
15+
# github.event.issue.pull_request != null &&
16+
# github.event.comment.user.login == 'paulinebm'
1617

17-
runs-on: ubuntu-latest
18+
# runs-on: ubuntu-latest
1819

19-
steps:
20-
- name: Extract PR details
21-
id: pr_info
22-
uses: actions/github-script@v6
23-
with:
24-
script: |
25-
const prNumber = context.payload.issue.number;
26-
const { data: pr } = await github.rest.pulls.get({
27-
owner: context.repo.owner,
28-
repo: context.repo.repo,
29-
pull_number: prNumber
30-
});
20+
# steps:
21+
# - name: Extract PR details
22+
# id: pr_info
23+
# uses: actions/github-script@v6
24+
# with:
25+
# script: |
26+
# const prNumber = context.payload.issue.number;
27+
# const { data: pr } = await github.rest.pulls.get({
28+
# owner: context.repo.owner,
29+
# repo: context.repo.repo,
30+
# pull_number: prNumber
31+
# });
3132

32-
// We capture both the branch ref and the "full_name" of the head repo
33-
// so that we can check out the correct repository & branch (including forks).
34-
core.setOutput("prNumber", prNumber);
35-
core.setOutput("headRef", pr.head.ref);
36-
core.setOutput("headRepoFullName", pr.head.repo.full_name);
37-
- name: Check out PR branch
38-
uses: actions/checkout@v3
39-
env:
40-
HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }}
41-
HEADREF: ${{ steps.pr_info.outputs.headRef }}
42-
with:
43-
# Instead of checking out the base repo, use the contributor's repo name
44-
repository: ${{ env.HEADREPOFULLNAME }}
45-
ref: ${{ env.HEADREF }}
46-
# You may need fetch-depth: 0 for being able to push
47-
fetch-depth: 0
48-
token: ${{ secrets.GITHUB_TOKEN }}
33+
# // We capture both the branch ref and the "full_name" of the head repo
34+
# // so that we can check out the correct repository & branch (including forks).
35+
# core.setOutput("prNumber", prNumber);
36+
# core.setOutput("headRef", pr.head.ref);
37+
# core.setOutput("headRepoFullName", pr.head.repo.full_name);
38+
# - name: Check out PR branch
39+
# uses: actions/checkout@v3
40+
# env:
41+
# HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }}
42+
# HEADREF: ${{ steps.pr_info.outputs.headRef }}
43+
# with:
44+
# # Instead of checking out the base repo, use the contributor's repo name
45+
# repository: ${{ env.HEADREPOFULLNAME }}
46+
# ref: ${{ env.HEADREF }}
47+
# # You may need fetch-depth: 0 for being able to push
48+
# fetch-depth: 0
49+
# token: ${{ secrets.GITHUB_TOKEN }}
4950

50-
- name: Debug
51-
env:
52-
HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }}
53-
HEADREF: ${{ steps.pr_info.outputs.headRef }}
54-
PRNUMBER: ${{ steps.pr_info.outputs.prNumber }}
55-
run: |
56-
echo "PR number: ${{ env.PRNUMBER }}"
57-
echo "Head Ref: ${{ env.HEADREF }}"
58-
echo "Head Repo Full Name: ${{ env.HEADREPOFULLNAME }}"
51+
# - name: Debug
52+
# env:
53+
# HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }}
54+
# HEADREF: ${{ steps.pr_info.outputs.headRef }}
55+
# PRNUMBER: ${{ steps.pr_info.outputs.prNumber }}
56+
# run: |
57+
# echo "PR number: ${{ env.PRNUMBER }}"
58+
# echo "Head Ref: ${{ env.HEADREF }}"
59+
# echo "Head Repo Full Name: ${{ env.HEADREPOFULLNAME }}"
5960

60-
- name: Set up Python
61-
uses: actions/setup-python@v4
62-
- name: Install dependencies
63-
run: |
64-
pip install .[quality]
61+
# - name: Set up Python
62+
# uses: actions/setup-python@v4
63+
# - name: Install dependencies
64+
# env:
65+
# TEST: ${{ secrets.TEST }}
66+
# run: |
67+
# pip install .[quality]
6568

66-
- name: Run ruff format --check
67-
run: |
68-
ruff format .
69-
ruff check --fix .
70-
- name: Commit and push changes
71-
id: commit_and_push
72-
env:
73-
HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }}
74-
HEADREF: ${{ steps.pr_info.outputs.headRef }}
75-
PRNUMBER: ${{ steps.pr_info.outputs.prNumber }}
76-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
77-
run: |
78-
echo "HEADREPOFULLNAME: ${{ env.HEADREPOFULLNAME }}, HEADREF: ${{ env.HEADREF }}"
79-
# Configure git with the Actions bot user
80-
git config user.name "github-actions[bot]"
81-
git config user.email "github-actions[bot]@users.noreply.github.com"
82-
# Make sure your 'origin' remote is set to the contributor's fork
83-
git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${{ env.HEADREPOFULLNAME }}.git"
84-
# If there are changes after running style/quality, commit them
85-
if [ -n "$(git status --porcelain)" ]; then
86-
git add .
87-
git commit -m "Apply style fixes"
88-
# Push to the original contributor's forked branch
89-
git push origin HEAD:${{ env.HEADREF }}
90-
echo "changes_pushed=true" >> $GITHUB_OUTPUT
91-
else
92-
echo "No changes to commit."
93-
echo "changes_pushed=false" >> $GITHUB_OUTPUT
94-
fi
95-
- name: Comment on PR with workflow run link
96-
if: steps.commit_and_push.outputs.changes_pushed == 'true'
97-
uses: actions/github-script@v6
98-
with:
99-
script: |
100-
const prNumber = parseInt(process.env.prNumber, 10);
101-
const runUrl = `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`
102-
await github.rest.issues.createComment({
103-
owner: context.repo.owner,
104-
repo: context.repo.repo,
105-
issue_number: prNumber,
106-
body: `Style fixes have been applied. [View the workflow run here](${runUrl}).`
107-
});
108-
env:
109-
prNumber: ${{ steps.pr_info.outputs.prNumber }}
69+
# - name: Run ruff format --check
70+
# run: |
71+
# ruff format .
72+
# ruff check --fix .
73+
# - name: Commit and push changes
74+
# id: commit_and_push
75+
# env:
76+
# HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }}
77+
# HEADREF: ${{ steps.pr_info.outputs.headRef }}
78+
# PRNUMBER: ${{ steps.pr_info.outputs.prNumber }}
79+
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
80+
# run: |
81+
# echo "HEADREPOFULLNAME: ${{ env.HEADREPOFULLNAME }}, HEADREF: ${{ env.HEADREF }}"
82+
# # Configure git with the Actions bot user
83+
# git config user.name "github-actions[bot]"
84+
# git config user.email "github-actions[bot]@users.noreply.github.com"
85+
# # Make sure your 'origin' remote is set to the contributor's fork
86+
# git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${{ env.HEADREPOFULLNAME }}.git"
87+
# # If there are changes after running style/quality, commit them
88+
# if [ -n "$(git status --porcelain)" ]; then
89+
# git add .
90+
# git commit -m "Apply style fixes"
91+
# # Push to the original contributor's forked branch
92+
# git push origin HEAD:${{ env.HEADREF }}
93+
# echo "changes_pushed=true" >> $GITHUB_OUTPUT
94+
# else
95+
# echo "No changes to commit."
96+
# echo "changes_pushed=false" >> $GITHUB_OUTPUT
97+
# fi
98+
# - name: Comment on PR with workflow run link
99+
# if: steps.commit_and_push.outputs.changes_pushed == 'true'
100+
# uses: actions/github-script@v6
101+
# with:
102+
# script: |
103+
# const prNumber = parseInt(process.env.prNumber, 10);
104+
# const runUrl = `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`
105+
# await github.rest.issues.createComment({
106+
# owner: context.repo.owner,
107+
# repo: context.repo.repo,
108+
# issue_number: prNumber,
109+
# body: `Style fixes have been applied. [View the workflow run here](${runUrl}).`
110+
# });
111+
# env:
112+
# prNumber: ${{ steps.pr_info.outputs.prNumber }}

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ you need, or, here's an overview of some *popular benchmarks*:
7777
- **Turkic**: TUMLU (9 Turkic languages)
7878
- **Chinese**: CMMLU, CEval, AGIEval
7979
- **Russian**: RUMMLU, Russian SQuAD
80+
- **Kyrgyz**: Kyrgyz LLM Benchmark
8081
- **And many more...**
8182

8283
### 🧠 **Core Language Understanding**
@@ -124,7 +125,7 @@ Lighteval offers the following entry points for model evaluation:
124125
Did not find what you need ? You can always make your custom model API by following [this guide](https://huggingface.co/docs/lighteval/main/en/evaluating-a-custom-model)
125126
- `lighteval custom`: Evaluate custom models (can be anything)
126127

127-
Here's a **quick command** to evaluate using the *Accelerate backend*:
128+
Here's a **quick command** to evaluate using a remote inference service:
128129

129130
```shell
130131
lighteval eval "hf-inference-providers/openai/gpt-oss-20b" gpqa:diamond
@@ -186,7 +187,7 @@ If you're adding a **new feature**, please *open an issue first*.
186187
If you open a PR, don't forget to **run the styling**!
187188

188189
```bash
189-
pip install -e .[dev]
190+
pip install -e ".[dev]"
190191
pre-commit install
191192
pre-commit run --all-files
192193
```

docs/source/available-tasks.mdx

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Browse and inspect tasks available in LightEval.
44
<iframe
5-
src="https://openevals-benchmark-finder.hf.space"
5+
src="https://openevals-open-benchmark-index.hf.space"
66
frameborder="0"
77
width="850"
88
height="450"
@@ -16,6 +16,18 @@ List all tasks:
1616
lighteval tasks list
1717
```
1818

19+
Extract tasks details:
20+
21+
```bash
22+
lighteval tasks dump
23+
```
24+
25+
Store the tasks details in a JSON file:
26+
27+
```bash
28+
lighteval tasks dump > tasks.json
29+
```
30+
1931
### Inspect specific tasks
2032

2133
Inspect a task to view its config, metrics, and requirements:

docs/source/index.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ and see how your models stack up.
99

1010
### 🚀 **Multi-Backend Support**
1111
Evaluate your models using the most popular and efficient inference backends:
12-
- `eval`: Use [inspect-ai](https://inspect.aisi.org.uk/) as backend to evaluate and inspect your models ! (prefered way)
12+
- `eval`: Use [inspect-ai](https://inspect.aisi.org.uk/) as backend to evaluate and inspect your models! (prefered way)
1313
- `transformers`: Evaluate models on CPU or one or more GPUs using [🤗
1414
Accelerate](https://github.com/huggingface/transformers)
1515
- `nanotron`: Evaluate models in distributed settings using [⚡️

src/lighteval/main_inspect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,4 +565,4 @@ def bundle(log_dir: str, output_dir: str, overwrite: bool = True, repo_id: str |
565565
"tiny_benchmarks",
566566
]
567567
model = "hf-inference-providers/meta-llama/Llama-3.1-8B-Instruct:nebius"
568-
eval(models=[model], tasks=task)
568+
eval(models=[model], tasks=tasks[0])

0 commit comments

Comments
 (0)