Skip to content

Commit 9f0c439

Browse files
maxxgxleoll2
andauthored
Relax constraints for subset split (#1065)
Co-authored-by: Leonardo Lai <[email protected]>
1 parent cdf13d7 commit 9f0c439

File tree

2 files changed

+4
-9
lines changed

2 files changed

+4
-9
lines changed

libs/configuration_tools/src/geti_configuration_tools/training_configuration.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# Copyright (C) 2022-2025 Intel Corporation
22
# LIMITED EDGE SOFTWARE DISTRIBUTION LICENSE
3-
import math
43
from typing import Any
54

65
from geti_types import ID, PersistentEntity
@@ -42,13 +41,9 @@ class SubsetSplit(BaseModel):
4241
def validate_subsets(self) -> "SubsetSplit":
4342
if (self.training + self.validation + self.test) != 100:
4443
raise ValueError("Sum of subsets should be equal to 100")
45-
# check that all subsets have at least one item if dataset_size is provided
46-
if self.dataset_size is not None:
47-
validation_size = math.floor(self.dataset_size * self.validation / 100)
48-
test_size = math.floor(self.dataset_size * self.test / 100)
49-
train_size = self.dataset_size - validation_size - test_size
50-
if train_size < 1 or validation_size < 1 or test_size < 1:
51-
raise ValueError("Each subset must be at least contain one item")
44+
# check that all subsets can have at least one item
45+
if self.dataset_size is not None and self.dataset_size < 3:
46+
raise ValueError("The dataset is too small to assign at least one item to each subset")
5247
return self
5348

5449

libs/configuration_tools/tests/test_training_configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def test_validate_subsets(self) -> None:
254254
"training": 50,
255255
"validation": 49,
256256
"test": 1,
257-
"dataset_size": 10,
257+
"dataset_size": 2,
258258
}
259259
}
260260
}

0 commit comments

Comments
 (0)