diff --git a/.github/workflows/pr_style_bot.yaml b/.github/workflows/pr_style_bot.yaml new file mode 100644 index 000000000..35d7b9597 --- /dev/null +++ b/.github/workflows/pr_style_bot.yaml @@ -0,0 +1,87 @@ +name: PR Style Bot + +on: + issue_comment: + types: [created] + +permissions: + contents: write + pull-requests: write + +jobs: + run-style-bot: + if: > + contains(github.event.comment.body, '@bot /style') && + github.event.issue.pull_request != null + + runs-on: ubuntu-latest + + steps: + - name: Extract PR details + id: pr_info + uses: actions/github-script@v6 + with: + script: | + const prNumber = context.payload.issue.number; + const { data: pr } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber + }); + + // We capture both the branch ref and the "full_name" of the head repo + // so that we can check out the correct repository & branch (including forks). + core.setOutput("prNumber", prNumber); + core.setOutput("headRef", pr.head.ref); + core.setOutput("headRepoFullName", pr.head.repo.full_name); + - name: Check out PR branch + uses: actions/checkout@v3 + with: + repository: ${{ steps.pr_info.outputs.headRepoFullName }} + ref: ${{ steps.pr_info.outputs.headRef }} + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + uses: actions/setup-python@v4 + + - name: Install dependencies + run: pip install .[quality] + + - name: Apply style fixes + run: | + ruff format . + ruff check --fix . + + - name: Commit and push changes + id: commit_and_push + env: + HEADREPOFULLNAME: ${{ steps.pr_info.outputs.headRepoFullName }} + HEADREF: ${{ steps.pr_info.outputs.headRef }} + PRNUMBER: ${{ steps.pr_info.outputs.prNumber }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${{ env.HEADREPOFULLNAME }}.git" + if [ -n "$(git status --porcelain)" ]; then + git add . + git commit -m "Apply style fixes" + git push origin HEAD:${{ env.HEADREF }} + echo "changes_pushed=true" >> $GITHUB_OUTPUT + else + echo "changes_pushed=false" >> $GITHUB_OUTPUT + fi + - name: Comment on PR + if: steps.commit_and_push.outputs.changes_pushed == 'true' + uses: actions/github-script@v6 + with: + script: | + const prNumber = ${{ steps.pr_info.outputs.prNumber }}; + const runUrl = `${process.env.GITHUB_SERVER_URL}/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: `Style fixes have been applied. [View the workflow run here](${runUrl}).` + }); diff --git a/docs/source/adding-a-new-metric.mdx b/docs/source/adding-a-new-metric.mdx index f78963cd2..f81bf986b 100644 --- a/docs/source/adding-a-new-metric.mdx +++ b/docs/source/adding-a-new-metric.mdx @@ -155,7 +155,6 @@ from lighteval.tasks.lighteval_task import LightevalTaskConfig task = LightevalTaskConfig( name="my_custom_task", - suite=["community"], metric=[my_custom_metric], # Use your custom metric here prompt_function=my_prompt_function, hf_repo="my_dataset", diff --git a/src/lighteval/__main__.py b/src/lighteval/__main__.py index 1d3c2f0f8..694a76fe5 100644 --- a/src/lighteval/__main__.py +++ b/src/lighteval/__main__.py @@ -71,6 +71,7 @@ app.command(rich_help_panel="Evaluation Backends")(lighteval.main_custom.custom) app.command(rich_help_panel="Evaluation Backends")(lighteval.main_sglang.sglang) app.command(rich_help_panel="Evaluation Backends")(lighteval.main_inspect.eval) +app.command(rich_help_panel="EvaluationUtils")(lighteval.main_inspect.bundle) app.add_typer( lighteval.main_endpoint.app, name="endpoint", diff --git a/src/lighteval/main_inspect.py b/src/lighteval/main_inspect.py index b1cf3215a..a8402df82 100644 --- a/src/lighteval/main_inspect.py +++ b/src/lighteval/main_inspect.py @@ -30,6 +30,7 @@ from inspect_ai import Epochs, Task, task from inspect_ai import eval_set as inspect_ai_eval_set from inspect_ai.dataset import hf_dataset +from inspect_ai.log import bundle_log_dir from inspect_ai.scorer import exact from inspect_ai.solver import generate, system_message from pytablewriter import MarkdownTableWriter @@ -519,6 +520,13 @@ def eval( # noqa C901 print("run 'inspect view' to view the results") +def bundle(log_dir: str, output_dir: str, overwrite: bool = True, repo_id: str | None = None, public: bool = False): + bundle_log_dir(log_dir=log_dir, output_dir=output_dir, overwrite=overwrite) + + if repo_id is not None: + push_to_hub(output_dir, repo_id, public=public) + + if __name__ == "__main__": task = "lighteval|gsm8k|5,lighteval|gsm8k|1,lighteval|gsm8k|0" task = "lighteval|agieval|0"