Skip to content

Commit 8a4bc8e

Browse files
new presets and multiple bugs fix (#247)
* add presets * lots of small fixes, updates and changes * proper testing * add transformers configs to presets * set scoring_f1 target metric * remove comments * fix embedder hashing * fix torch scorers `report_to` * fix pipeline refitting * fix presets * fix `clear_cache` for torch models * add extra exception handler into llm generator * fix transformers heavy preset * fix no-hpo * remove rerank from medium * upd classic heavy * remove unnecessary event loop closing * fix sklearn scorer cache clearing * try to optimize bert scorer * upd `report_to` in bert * Update optimizer_config.schema.json * fix typo * try to fix typing * fix callbacks test * run formatter * try to fix typing errors --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent a8eac23 commit 8a4bc8e

35 files changed

+412
-79
lines changed

autointent/_callbacks/tensorboard.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def __init__(self) -> None:
1616
Raises an ImportError if neither are installed.
1717
"""
1818
try:
19-
from torch.utils.tensorboard import SummaryWriter # type: ignore[attr-defined]
19+
from torch.utils.tensorboard import SummaryWriter
2020

2121
self.writer = SummaryWriter
2222
except ImportError:

autointent/_dump_tools/unit_dumpers.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,12 @@ def load(path: Path, **kwargs: Any) -> PeftModel: # noqa: ANN401, ARG004
228228
if (path / "ptuning").exists():
229229
# prompt learning model
230230
ptuning_path = path / "ptuning"
231-
model = AutoModelForSequenceClassification.from_pretrained(ptuning_path / "base_model") # type: ignore[no-untyped-call]
231+
model = AutoModelForSequenceClassification.from_pretrained(ptuning_path / "base_model")
232232
return PeftModel.from_pretrained(model, ptuning_path / "peft")
233233
if (path / "lora").exists():
234234
# merged lora model
235235
lora_path = path / "lora"
236-
return AutoModelForSequenceClassification.from_pretrained(lora_path) # type: ignore[no-untyped-call,no-any-return]
236+
return AutoModelForSequenceClassification.from_pretrained(lora_path) # type: ignore[no-any-return]
237237
msg = f"Invalid PeftModel directory structure at {path}. Expected 'ptuning' or 'lora' subdirectory."
238238
raise ValueError(msg)
239239

@@ -252,7 +252,7 @@ def dump(obj: PreTrainedModel, path: Path, exists_ok: bool) -> None:
252252

253253
@staticmethod
254254
def load(path: Path, **kwargs: Any) -> PreTrainedModel: # noqa: ANN401, ARG004
255-
return AutoModelForSequenceClassification.from_pretrained(path) # type: ignore[no-untyped-call,no-any-return]
255+
return AutoModelForSequenceClassification.from_pretrained(path) # type: ignore[no-any-return]
256256

257257
@classmethod
258258
def check_isinstance(cls, obj: Any) -> bool: # noqa: ANN401
@@ -269,7 +269,7 @@ def dump(obj: PreTrainedTokenizer | PreTrainedTokenizerFast, path: Path, exists_
269269

270270
@staticmethod
271271
def load(path: Path, **kwargs: Any) -> PreTrainedTokenizer | PreTrainedTokenizerFast: # noqa: ANN401, ARG004
272-
return AutoTokenizer.from_pretrained(path) # type: ignore[no-any-return]
272+
return AutoTokenizer.from_pretrained(path) # type: ignore[no-any-return,no-untyped-call]
273273

274274
@classmethod
275275
def check_isinstance(cls, obj: Any) -> bool: # noqa: ANN401

autointent/_pipeline/_pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,9 +380,11 @@ def _refit(self, context: Context) -> None:
380380

381381
context.data_handler.prepare_for_refit()
382382

383+
scoring_module.clear_cache()
383384
scoring_module.fit(*scoring_module.get_train_data(context))
384385
scores = scoring_module.predict(context.data_handler.train_utterances(1))
385386

387+
decision_module.clear_cache()
386388
decision_module.fit(scores, context.data_handler.train_labels(1), context.data_handler.tags)
387389

388390
def predict_with_metadata(self, utterances: list[str]) -> InferencePipelineOutput:
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
# TODO add sklearn RandomForestClassifier
21
search_space:
32
- node_type: scoring
4-
target_metric: scoring_roc_auc
3+
target_metric: scoring_f1
54
search_space:
65
- module_name: knn
76
k:
@@ -13,24 +12,15 @@ search_space:
1312
k:
1413
low: 1
1514
high: 20
16-
- module_name: description_bi
17-
temperature:
18-
low: 0.01
19-
high: 10
20-
log: true
21-
- module_name: description_cross
22-
temperature:
23-
low: 0.01
24-
high: 10
25-
log: true
26-
- module_name: rerank
27-
k:
28-
low: 10
29-
high: 40
30-
m:
31-
low: 1
32-
high: 10
33-
weights: [uniform, distance, closest]
15+
- module_name: catboost
16+
depth: [3, 6, 10]
17+
features_type: ["text", "embedding", "both"]
18+
- module_name: sklearn
19+
clf_name: [RandomForestClassifier]
20+
n_estimators: [200, 300, 500]
21+
max_depth: [50, 100, 150]
22+
max_features: [sqrt, log2]
23+
n_jobs: [8]
3424
- node_type: decision
3525
target_metric: decision_accuracy
3626
search_space:
@@ -44,5 +34,7 @@ search_space:
4434
- module_name: adaptive
4535
hpo_config:
4636
sampler: tpe
47-
n_trials: 128 # dont know yet if its good
48-
n_startup_trials: 32
37+
n_trials: 55
38+
n_startup_trials: 20
39+
embedder_config:
40+
model_name: intfloat/multilingual-e5-large-instruct
Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
search_space:
22
- node_type: scoring
3-
target_metric: scoring_roc_auc
3+
target_metric: scoring_f1
44
search_space:
55
- module_name: knn
66
k:
@@ -20,7 +20,12 @@ search_space:
2020
low: 0.1
2121
high: 0.9
2222
- module_name: argmax
23+
- module_name: jinoos
24+
- module_name: tunable
25+
- module_name: adaptive
2326
hpo_config:
24-
sampler: random
25-
n_trials: 128 # dont know yet if its good
26-
n_startup_trials: 32
27+
sampler: tpe
28+
n_trials: 20
29+
n_startup_trials: 10
30+
embedder_config:
31+
model_name: intfloat/multilingual-e5-large-instruct
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
search_space:
2+
- node_type: scoring
3+
target_metric: scoring_f1
4+
search_space:
5+
- module_name: knn
6+
k:
7+
low: 1
8+
high: 20
9+
weights: [uniform, distance, closest]
10+
- module_name: linear
11+
- module_name: mlknn
12+
k:
13+
low: 1
14+
high: 20
15+
- module_name: catboost
16+
- module_name: sklearn
17+
clf_name: [RandomForestClassifier]
18+
n_estimators: [150]
19+
max_depth: [100]
20+
n_jobs: [8]
21+
- node_type: decision
22+
target_metric: decision_accuracy
23+
search_space:
24+
- module_name: threshold
25+
thresh:
26+
low: 0.1
27+
high: 0.9
28+
- module_name: argmax
29+
- module_name: jinoos
30+
- module_name: tunable
31+
- module_name: adaptive
32+
hpo_config:
33+
sampler: tpe
34+
n_trials: 20
35+
n_startup_trials: 10
36+
embedder_config:
37+
model_name: intfloat/multilingual-e5-large-instruct

autointent/_presets/nn-heavy.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
search_space:
2+
- node_type: scoring
3+
target_metric: scoring_f1
4+
search_space:
5+
- module_name: cnn
6+
dropout:
7+
low: 0.1
8+
high: 0.3
9+
batch_size: [32, 64, 128]
10+
learning_rate:
11+
low: 5.0e-4
12+
high: 1.0e-2
13+
log: True
14+
num_train_epochs: [60]
15+
embed_dim: [64, 96, 128]
16+
kernel_sizes: [[3, 4, 5]]
17+
num_filters: [64, 96, 128]
18+
- module_name: rnn
19+
dropout:
20+
low: 0.1
21+
high: 0.3
22+
batch_size: [32, 64, 128]
23+
learning_rate:
24+
low: 5.0e-4
25+
high: 1.0e-2
26+
log: True
27+
num_train_epochs: [60]
28+
embed_dim: [64, 96, 128]
29+
hidden_dim: [128, 256, 512]
30+
- node_type: decision
31+
target_metric: decision_accuracy
32+
search_space:
33+
- module_name: threshold
34+
thresh:
35+
low: 0.1
36+
high: 0.9
37+
- module_name: argmax
38+
- module_name: jinoos
39+
- module_name: tunable
40+
- module_name: adaptive
41+
hpo_config:
42+
sampler: tpe
43+
n_trials: 55
44+
n_startup_trials: 20

autointent/_presets/nn-medium.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
search_space:
2+
- node_type: scoring
3+
target_metric: scoring_f1
4+
search_space:
5+
- module_name: cnn
6+
dropout:
7+
low: 0.1
8+
high: 0.3
9+
batch_size: [32, 64, 128]
10+
learning_rate:
11+
low: 5.0e-4
12+
high: 1.0e-2
13+
log: True
14+
num_train_epochs: [60]
15+
embed_dim: [64]
16+
kernel_sizes: [[3, 4, 5]]
17+
num_filters: [64]
18+
- module_name: rnn
19+
dropout:
20+
low: 0.1
21+
high: 0.3
22+
batch_size: [32, 64, 128]
23+
learning_rate:
24+
low: 5.0e-4
25+
high: 1.0e-2
26+
log: True
27+
num_train_epochs: [60]
28+
embed_dim: [64]
29+
hidden_dim: [128]
30+
- node_type: decision
31+
target_metric: decision_accuracy
32+
search_space:
33+
- module_name: threshold
34+
thresh:
35+
low: 0.1
36+
high: 0.9
37+
- module_name: argmax
38+
- module_name: jinoos
39+
- module_name: tunable
40+
- module_name: adaptive
41+
hpo_config:
42+
sampler: tpe
43+
n_trials: 55
44+
n_startup_trials: 20
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
search_space:
2+
- node_type: scoring
3+
target_metric: scoring_f1
4+
search_space:
5+
- module_name: bert
6+
classification_model_config:
7+
- model_name: microsoft/deberta-v3-large
8+
num_train_epochs: [30]
9+
batch_size: [32, 64]
10+
learning_rate:
11+
low: 1.0e-5
12+
high: 1.0e-4
13+
log: True
14+
- node_type: decision
15+
target_metric: decision_accuracy
16+
search_space:
17+
- module_name: threshold
18+
thresh:
19+
low: 0.1
20+
high: 0.9
21+
- module_name: argmax
22+
- module_name: jinoos
23+
- module_name: tunable
24+
- module_name: adaptive
25+
hpo_config:
26+
sampler: tpe
27+
n_trials: 40
28+
n_startup_trials: 20
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
search_space:
2+
- node_type: scoring
3+
target_metric: scoring_f1
4+
search_space:
5+
- module_name: bert
6+
classification_model_config:
7+
- model_name: microsoft/deberta-v3-small
8+
num_train_epochs: [30]
9+
batch_size: [32, 64, 128]
10+
learning_rate:
11+
low: 1.0e-5
12+
high: 1.0e-4
13+
log: True
14+
- node_type: decision
15+
target_metric: decision_accuracy
16+
search_space:
17+
- module_name: threshold
18+
thresh:
19+
low: 0.1
20+
high: 0.9
21+
- module_name: argmax
22+
- module_name: jinoos
23+
- module_name: tunable
24+
- module_name: adaptive
25+
hpo_config:
26+
sampler: tpe
27+
n_trials: 40
28+
n_startup_trials: 20

0 commit comments

Comments
 (0)