Skip to content

Commit 7b42113

Browse files
authored
Push details without converting fields to str (#572)
* push details as true datatype * commit
1 parent bd578a8 commit 7b42113

File tree

4 files changed

+5
-4
lines changed

4 files changed

+5
-4
lines changed

src/lighteval/logging/evaluation_tracker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def save(self) -> None:
198198
details_datasets: dict[str, Dataset] = {}
199199
for task_name, task_details in self.details_logger.details.items():
200200
# Create a dataset from the dictionary - we force cast to str to avoid formatting problems for nested objects
201-
dataset = Dataset.from_list([{k: str(v) for k, v in asdict(detail).items()} for detail in task_details])
201+
dataset = Dataset.from_list([asdict(detail) for detail in task_details])
202202

203203
# We don't keep 'id' around if it's there
204204
column_names = dataset.column_names

src/lighteval/logging/info_loggers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ class Detail:
201201
num_effective_few_shots: int = 0
202202
num_asked_few_shots: int = 0
203203
predictions: list = field(default_factory=list)
204+
prediction_logits: list = field(default_factory=list)
204205
input_tokens: list = field(default_factory=list)
205206
cont_tokens: list = field(default_factory=list)
206207
truncated: list = field(default_factory=list)

src/lighteval/models/model_output.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class GenerativeResponse(ModelResponse):
6363
logits: Optional[list[float]] = None # Generated text logits
6464

6565
def get_result_for_eval(self):
66-
return self.result if self.logits is None else (self.result, self.logits)
66+
return self.result
6767

6868

6969
@dataclass

tests/logging/test_evaluation_tracker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ def test_results_logging(mock_evaluation_tracker: EvaluationTracker):
9999
@pytest.mark.evaluation_tracker(save_details=True)
100100
def test_details_logging(mock_evaluation_tracker, mock_datetime):
101101
task_details = {
102-
"task1": [DetailsLogger.CompiledDetail(truncated=10, padded=5)],
103-
"task2": [DetailsLogger.CompiledDetail(truncated=20, padded=10)],
102+
"task1": [DetailsLogger.CompiledDetail(hashes=None, truncated=10, padded=5)],
103+
"task2": [DetailsLogger.CompiledDetail(hashes=None, truncated=20, padded=10)],
104104
}
105105
mock_evaluation_tracker.details_logger.details = task_details
106106

0 commit comments

Comments
 (0)