Skip to content

Commit 38aa208

Browse files
Auto build moderator (#717)
1 parent 192122a commit 38aa208

File tree

4 files changed

+61
-4
lines changed

4 files changed

+61
-4
lines changed

.github/workflows/post-dependabot.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: Dependabot post-update (rebuild moderator model)
2+
on:
3+
pull_request_target:
4+
action: [opened, synchronize, reopened]
5+
branches:
6+
- 'master'
7+
8+
concurrency:
9+
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
10+
cancel-in-progress: true
11+
12+
permissions: {}
13+
jobs:
14+
post-update:
15+
permissions:
16+
pull-requests: read # for gh pr checkout
17+
contents: write # to push code in repo (stefanzweifel/git-auto-commit-action)
18+
19+
if: github.actor == 'dependabot[bot]' && contains(github.event.pull_request.title, 'scikit-learn')
20+
runs-on: ubuntu-latest
21+
steps:
22+
- name: Generate token
23+
id: generate_token
24+
uses: tibdex/github-app-token@v1
25+
with:
26+
app_id: ${{ secrets.BOT_APP_ID }}
27+
private_key: ${{ secrets.BOT_PRIVATE_KEY }}
28+
- name: Checkout
29+
uses: actions/checkout@v3
30+
with:
31+
token: ${{ steps.generate_token.outputs.token }}
32+
- name: Login
33+
run: |
34+
echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
35+
- name: Checkout
36+
run: |
37+
gh pr checkout ${{ github.event.pull_request.number }}
38+
- name: Setup Python
39+
uses: actions/setup-python@v4
40+
cache: 'pip'
41+
cache-dependency-path: '**/requirements*.txt'
42+
- name: Download train.csv
43+
# Original file can be found at: https://www.kaggle.com/code/piumallick/toxic-comments-sentiment-analysis/input?select=train.csv.zip
44+
run: curl "$MODERATOR_TRAINING_URL" > train.csv
45+
- name: Rebuild model
46+
run: PYTHONPATH='demos/moderator' python -m moderator.build_model train.csv
47+
- name: Commit and push if needed
48+
uses: stefanzweifel/git-auto-commit-action@v4
49+
with:
50+
commit_message: Rebuild moderator model
51+
file_pattern: 'demos/moderator/model/pipeline.dat'
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
import sys
2+
from pathlib import Path
3+
14
from moderator.consts import PROJ_ROOT
25
from moderator.model.pipeline import build_model
36

47

58
if __name__ == '__main__':
6-
dataset_path = PROJ_ROOT / 'moderator' / 'model' / 'data' / 'train.csv'
9+
dataset_path = Path(sys.argv[1])
710
model_path = PROJ_ROOT / 'model'
811
build_model(dataset_path, model_path)

demos/moderator/moderator/model/pipeline.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import pickle
2+
from pathlib import Path
3+
14
import pandas as pd
25

3-
import pickle
46
from sklearn.feature_extraction.text import TfidfVectorizer
57
from sklearn.linear_model import LogisticRegression
68
from sklearn.multioutput import MultiOutputClassifier
@@ -38,11 +40,12 @@ def build_pipeline():
3840
return pipeline
3941

4042

41-
def build_model(dataset_path, model_path):
43+
def build_model(dataset_path: Path, model_path: Path) -> None:
4244
train, targets = read_data(dataset_path)
4345

4446
pipeline = build_pipeline()
4547
pipeline.fit(train, targets)
4648

47-
with open("pipeline.dat", "wb") as f:
49+
output_path = model_path / "pipeline.dat"
50+
with output_path.open("wb") as f:
4851
pickle.dump(pipeline, f)
-2.76 MB
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../moderator/model/pipeline.dat

0 commit comments

Comments
 (0)