Skip to content

Commit 8025719

Browse files
committed
dbscan large scale support and logreg details
1 parent 3cd955c commit 8025719

File tree

4 files changed

+67
-1
lines changed

4 files changed

+67
-1
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{
2+
"INCLUDE": ["../../common/sklearn.json", "../../regular/dbscan.json", "large_scale.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd dbscan parameters": {
5+
"algorithm": {
6+
"estimator": "DBSCAN",
7+
"estimator_methods": {
8+
"training": "fit"
9+
}
10+
},
11+
"data": {
12+
"dtype": "float64"
13+
}
14+
},
15+
"synthetic dataset": {
16+
"data": [
17+
{ "source": "make_blobs", "generation_kwargs": { "n_samples": 400000, "n_features": 100, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } }
18+
]
19+
}
20+
},
21+
"TEMPLATES": {
22+
"dbscan": {
23+
"SETS": [
24+
"common dbscan parameters",
25+
"synthetic dataset",
26+
"sklearnex spmd implementation",
27+
"large scale strong parameters",
28+
"spmd dbscan parameters"
29+
]
30+
}
31+
}
32+
}

configs/spmd/large_scale/large_scale.json

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,24 @@
2727
"mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
2828
}
2929
},
30+
"large scale <64 parameters": {
31+
"data": {
32+
"dtype": "float64",
33+
"distributed_split": "None"
34+
},
35+
"bench": {
36+
"mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
37+
}
38+
},
39+
"large scale >64 parameters": {
40+
"data": {
41+
"dtype": "float64",
42+
"distributed_split": "None"
43+
},
44+
"bench": {
45+
"mpi_params": {"n": [768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
46+
}
47+
},
3048
"large scale strong 2k parameters": {
3149
"data": {
3250
"dtype": "float64",
@@ -36,6 +54,15 @@
3654
"mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
3755
}
3856
},
57+
"large scale strong <64 parameters": {
58+
"data": {
59+
"dtype": "float64",
60+
"distributed_split": "rank_based"
61+
},
62+
"bench": {
63+
"mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
64+
}
65+
},
3966
"large scale impi parameters": {
4067
"data": {
4168
"dtype": "float64",

configs/spmd/large_scale/logreg_strong.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"algorithm":{
66
"estimator": "LogisticRegression",
77
"estimator_methods": { "inference": "predict" },
8-
"estimator_params": { "max_iter": 30 }
8+
"estimator_params": { "max_iter": 16 }
99
}
1010
},
1111
"synthetic data": {

sklbench/utils/measurement.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,16 @@ def measure_time(
7272
)
7373
times = []
7474
func_return_value = None
75+
inners, iters = [], []
7576
while len(times) < n_runs:
7677
if enable_itt and itt_is_available:
7778
itt.resume()
7879
t0 = timeit.default_timer()
7980
func_return_value = func(*args, **kwargs)
8081
t1 = timeit.default_timer()
82+
if hasattr(func.__self__, "_n_inner_iter"):
83+
inners.append(func.__self__._n_inner_iter)
84+
iters.append(func.__self__.n_iter_)
8185
if enable_itt and itt_is_available:
8286
itt.pause()
8387
times.append(t1 - t0)
@@ -88,6 +92,9 @@ def measure_time(
8892
f"exceeded time limit ({time_limit} seconds)"
8993
)
9094
break
95+
from mpi4py import MPI
96+
if MPI.COMM_WORLD.Get_rank() == 0:
97+
logger.debug("iters across n runs: " + str(iters) + ", inner iters across n runs: " + str(inners))
9198
logger.debug(times)
9299
#mean, std = box_filter(times)
93100
#if std / mean > std_mean_ratio:

0 commit comments

Comments
 (0)