Skip to content

Commit 3cd955c

Browse files
committed
knn and forest config updates
1 parent 984aab1 commit 3cd955c

File tree

5 files changed

+56
-5
lines changed

5 files changed

+56
-5
lines changed

configs/spmd/large_scale/forest.json

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd forest classification parameters": {
5+
"algorithm": {
6+
"estimator": "RandomForestClassifier"
7+
}
8+
},
9+
"synthetic data": {
10+
"data": [
11+
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 501000, "n_features": 10, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } },
12+
{ "source": "make_classification", "split_kwargs": { "train_size": 10000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 11000, "n_features": 1000, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }
13+
]
14+
}
15+
},
16+
"TEMPLATES": {
17+
"basicstats": {
18+
"SETS": [
19+
"sklearnex spmd implementation",
20+
"large scale 2k parameters",
21+
"synthetic data",
22+
"spmd forest classification parameters"
23+
]
24+
}
25+
}
26+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd forest classification parameters": {
5+
"algorithm": {
6+
"estimator": "RandomForestClassifier"
7+
}
8+
},
9+
"synthetic data": {
10+
"data": [
11+
{ "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 10001000, "n_features": 100, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }
12+
]
13+
}
14+
},
15+
"TEMPLATES": {
16+
"basicstats": {
17+
"SETS": [
18+
"sklearnex spmd implementation",
19+
"large scale strong 2k parameters",
20+
"synthetic data",
21+
"spmd forest classification parameters"
22+
]
23+
}
24+
}
25+
}

configs/spmd/large_scale/knn.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 50000 }, "generation_kwargs": { "n_samples": 550000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } },
2323
{ "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 500000 }, "generation_kwargs": { "n_samples": 550000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } },
2424
{ "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 50000 }, "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } },
25-
{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 500000 }, "generation_kwargs": { "n_samples": 1000000, "n_features": 10, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }
25+
{ "source": "make_classification", "split_kwargs": { "train_size": 200000, "test_size": 200000 }, "generation_kwargs": { "n_samples": 400000, "n_features": 10, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }
2626
]
2727
}
2828
},
@@ -32,7 +32,7 @@
3232
"common knn parameters",
3333
"synthetic classification data",
3434
"sklearnex spmd implementation",
35-
"large scale default parameters",
35+
"large scale 2k parameters",
3636
"spmd knn cls parameters"
3737
]
3838
}

sklbench/benchmarks/sklearn_estimator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,8 @@ def main(bench_case: BenchCase, filters: List[BenchCase]):
525525
result_template = enrich_result(result_template, bench_case)
526526
if "assume_finite" in context_params:
527527
result_template["assume_finite"] = context_params["assume_finite"]
528-
if hasattr(estimator_instance, "get_params"):
529-
estimator_params = estimator_instance.get_params()
528+
#if hasattr(estimator_instance, "get_params"):
529+
# estimator_params = estimator_instance.get_params()
530530
# note: "handle" is not JSON-serializable
531531
if "handle" in estimator_params:
532532
del estimator_params["handle"]

sklbench/datasets/transformer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def split_and_transform_data(bench_case, data, data_description):
181181
"format": data_format,
182182
"order": data_order,
183183
"dtype": data_dtype,
184-
"samples": converted_data.shape[0],
184+
"samples (per rank)": converted_data.shape[0],
185185
}
186186
if len(converted_data.shape) == 2 and converted_data.shape[1] > 1:
187187
data_description[subset_name]["features"] = converted_data.shape[1]

0 commit comments

Comments
 (0)