Skip to content

Commit 7b1a0d2

Browse files
Darinochkavoorhsgithub-actions[bot]
authored
feat: add DISABLE_EMISSIONS_TRACKING (#191)
* feat: add DISABLE_EMISSIONS_TRACKING * try to fix docs error * Update optimizer_config.schema.json * another attempt * Update optimizer_config.schema.json * i give up for now * Update optimizer_config.schema.json --------- Co-authored-by: voorhs <[email protected]> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 812940c commit 7b1a0d2

File tree

17 files changed

+47
-34
lines changed

17 files changed

+47
-34
lines changed

autointent/configs/_transformers.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,10 @@ def get_prompt_type(self, prompt_type: TaskTypeEnum | None) -> str | None: # no
115115

116116

117117
class CrossEncoderConfig(HFModelConfig):
118-
model_name: str = Field("cross-encoder/ms-marco-MiniLM-L-6-v2", description="Name of the hugging face model.")
118+
model_name: str = Field("cross-encoder/ms-marco-MiniLM-L6-v2", description="Name of the hugging face model.")
119119
train_head: bool = Field(
120120
False, description="Whether to train the head of the model. If False, LogReg will be trained."
121121
)
122+
tokenizer_config: TokenizerConfig = Field(
123+
default_factory=lambda: TokenizerConfig(max_length=512)
124+
) # this is because sentence-transformers doesn't allow you to customize tokenizer settings properly

autointent/modules/scoring/_dnnc/dnnc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class DNNCScorer(BaseScorer):
3636
utterances = ["what is your name?", "how are you?"]
3737
labels = [0, 1]
3838
scorer = DNNCScorer(
39-
cross_encoder_config="cross-encoder/ms-marco-MiniLM-L-6-v2",
39+
cross_encoder_config="cross-encoder/ms-marco-MiniLM-L6-v2",
4040
embedder_config="sergeyzh/rubert-tiny-turbo",
4141
k=5,
4242
)

autointent/nodes/emissions_tracker.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import json
44
import logging
5+
import os
56

67
from codecarbon import EmissionsTracker as CodeCarbonTracker # type: ignore[import-untyped]
78
from codecarbon.output import EmissionsData # type: ignore[import-untyped]
@@ -20,22 +21,31 @@ def __init__(self, project_name: str, measure_power_secs: int = 1) -> None:
2021
measure_power_secs: How often to measure power consumption in seconds.
2122
"""
2223
self._logger = logger
23-
self.tracker = CodeCarbonTracker(project_name=project_name, measure_power_secs=measure_power_secs)
24+
self._enabled = int(os.getenv("TRACK_EMISSIONS", "0"))
25+
if self._enabled:
26+
self.tracker = CodeCarbonTracker(project_name=project_name, measure_power_secs=measure_power_secs)
27+
else:
28+
self._logger.info("Emissions tracking is enabled via TRACK_EMISSIONS environment variable")
29+
self.tracker = None
2430

2531
def start_task(self, task_name: str) -> None:
2632
"""Start tracking emissions for a specific task.
2733
2834
Args:
2935
task_name: Name of the task to track emissions for.
3036
"""
31-
self.tracker.start_task(task_name)
37+
if self._enabled:
38+
self.tracker.start_task(task_name)
3239

3340
def stop_task(self) -> dict[str, float]:
3441
"""Stop tracking emissions and return the emissions data.
3542
3643
Returns:
3744
Dictionary containing emissions metrics.
3845
"""
46+
if not self._enabled:
47+
return {}
48+
3949
emissions_data = self.tracker.stop_task()
4050
_ = self.tracker.stop()
4151
return self._process_metrics(emissions_data)

docs/optimizer_config.schema.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"additionalProperties": false,
55
"properties": {
66
"model_name": {
7-
"default": "cross-encoder/ms-marco-MiniLM-L-6-v2",
7+
"default": "cross-encoder/ms-marco-MiniLM-L6-v2",
88
"description": "Name of the hugging face model.",
99
"title": "Model Name",
1010
"type": "string"
@@ -424,11 +424,11 @@
424424
"cross_encoder_config": {
425425
"$ref": "#/$defs/CrossEncoderConfig",
426426
"default": {
427-
"model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2",
427+
"model_name": "cross-encoder/ms-marco-MiniLM-L6-v2",
428428
"batch_size": 32,
429429
"device": null,
430430
"tokenizer_config": {
431-
"max_length": null,
431+
"max_length": 512,
432432
"padding": true,
433433
"truncation": true
434434
},

tests/_transformers/test_nli_transformer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def data_handler():
1313

1414

1515
def test_nli_transformer_predict_without_trained_head(data_handler):
16-
model = Ranker(cross_encoder_config={"model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2", "train_head": True})
16+
model = Ranker(cross_encoder_config={"model_name": "cross-encoder/ms-marco-MiniLM-L6-v2", "train_head": True})
1717
with pytest.raises(ValueError, match="Classifier is not trained yet"):
1818
model.predict(data_handler.train_utterances(0))
1919

@@ -48,7 +48,7 @@ def check_ranking(ranked, labels):
4848

4949

5050
def test_nli_transformer_predict_with_train_head(data_handler):
51-
model = Ranker(cross_encoder_config={"model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2", "train_head": True})
51+
model = Ranker(cross_encoder_config={"model_name": "cross-encoder/ms-marco-MiniLM-L6-v2", "train_head": True})
5252
texts = data_handler.train_utterances(0)
5353
labels = data_handler.train_labels(0)
5454
model.fit(texts, labels)
@@ -60,7 +60,7 @@ def test_nli_transformer_predict_with_train_head(data_handler):
6060

6161

6262
def test_nli_transformer_predict_default(data_handler):
63-
model = Ranker(cross_encoder_config={"model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2", "train_head": False})
63+
model = Ranker(cross_encoder_config={"model_name": "cross-encoder/ms-marco-MiniLM-L6-v2", "train_head": False})
6464
texts = data_handler.train_utterances(0)
6565
labels = data_handler.train_labels(0)
6666
predicted = model.predict(build_pairs(texts))
@@ -71,7 +71,7 @@ def test_nli_transformer_predict_default(data_handler):
7171

7272

7373
def test_nli_transformer_predict_default_with_fit(data_handler):
74-
model = Ranker(cross_encoder_config={"model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2", "train_head": False})
74+
model = Ranker(cross_encoder_config={"model_name": "cross-encoder/ms-marco-MiniLM-L6-v2", "train_head": False})
7575
texts = data_handler.train_utterances(0)
7676
labels = data_handler.train_labels(0)
7777
model.fit(texts, labels)

tests/assets/configs/description.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
embedder_config:
1515
- model_name: sentence-transformers/all-MiniLM-L6-v2
1616
cross_encoder_config:
17-
- model_name: cross-encoder/ms-marco-MiniLM-L-6-v2
17+
- model_name: cross-encoder/ms-marco-MiniLM-L6-v2
1818
encoder_type: [cross, bi]
1919
- node_type: decision
2020
target_metric: decision_accuracy

tests/assets/configs/full_training.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ embedder_config:
3030
use_cache: true
3131
cross_encoder_config:
3232
batch_size: 32
33-
model_name: cross-encoder/ms-marco-MiniLM-L-6-v2
33+
model_name: cross-encoder/ms-marco-MiniLM-L6-v2

tests/assets/configs/multiclass.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
- module_name: linear
1616
- module_name: dnnc
1717
cross_encoder_config:
18-
- model_name: cross-encoder/ms-marco-MiniLM-L-6-v2
18+
- model_name: cross-encoder/ms-marco-MiniLM-L6-v2
1919
train_head: true
2020
- avsolatorio/GIST-small-Embedding-v0
2121
k: [1, 3]
@@ -25,7 +25,7 @@
2525
m: [ 2, 3 ]
2626
use_crosencoder_scores: [true, false]
2727
cross_encoder_config:
28-
- cross-encoder/ms-marco-MiniLM-L-6-v2
28+
- cross-encoder/ms-marco-MiniLM-L6-v2
2929
- module_name: sklearn
3030
clf_name: [RandomForestClassifier]
3131
n_estimators: [5, 10]

tests/assets/configs/multilabel.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
use_crosencoder_scores: [true, false]
2222
m: [ 2, 3 ]
2323
cross_encoder_config:
24-
- model_name: cross-encoder/ms-marco-MiniLM-L-6-v2
24+
- model_name: cross-encoder/ms-marco-MiniLM-L6-v2
2525
- module_name: sklearn
2626
clf_name: [RandomForestClassifier]
2727
n_estimators: [5, 10]

tests/configs/test_combined_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def valid_optimizer_config():
1717
{
1818
"module_name": "dnnc",
1919
"cross_encoder_config": [
20-
{"model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2", "train_head": True},
20+
{"model_name": "cross-encoder/ms-marco-MiniLM-L6-v2", "train_head": True},
2121
{"model_name": "avsolatorio/GIST-small-Embedding-v0", "train_head": False},
2222
],
2323
"k": [1, 3],
@@ -63,7 +63,7 @@ def test_invalid_optimizer_config_missing_field():
6363
"node_type": "scoring",
6464
# Missing "target_metric"
6565
"search_space": [
66-
{"module_name": "dnnc", "cross_encoder_name": ["cross-encoder/ms-marco-MiniLM-L-6-v2"], "k": [1, 3]}
66+
{"module_name": "dnnc", "cross_encoder_name": ["cross-encoder/ms-marco-MiniLM-L6-v2"], "k": [1, 3]}
6767
],
6868
}
6969
]
@@ -80,7 +80,7 @@ def test_invalid_optimizer_config_wrong_type():
8080
"search_space": [
8181
{
8282
"module_name": "dnnc",
83-
"cross_encoder_name": "cross-encoder/ms-marco-MiniLM-L-6-v2", # Should be a list
83+
"cross_encoder_name": "cross-encoder/ms-marco-MiniLM-L6-v2", # Should be a list
8484
"k": "wrong_type", # Should be a list of integers
8585
"train_head": "true", # Should be a boolean, not a string
8686
}

0 commit comments

Comments
 (0)