bf16

md-shafiul-alam · md-shafiul-alam · commit 817710b83d09 · 2024-09-24T00:05:42.000-07:00
diff --git a/configs/regular/bf16/dbscan.json b/configs/regular/bf16/dbscan.json
@@ -0,0 +1,41 @@
+{
+    "INCLUDE": ["../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common dbscan parameters": {
+            "algorithm": {
+                "estimator": "DBSCAN",
+                "estimator_params": {
+                    "eps": "[SPECIAL_VALUE]distances_quantile:0.01",
+                    "min_samples": 5,
+                    "metric": "euclidean"
+                }
+            },
+            "data": {
+                "dtype": ["float32"]
+            }
+        },
+        "sklearn dbscan parameters": {
+            "algorithm": {
+                "estimator_params": {
+                    "algorithm": "brute",
+                    "n_jobs": "[SPECIAL_VALUE]physical_cpus"
+                }
+            }
+        },
+        "synthetic dataset": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000,  "n_features": 10, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } }
+            ]
+	    }
+    },
+    "TEMPLATES": {
+        "sklearn dbscan": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common dbscan parameters",
+                "sklearn dbscan parameters",
+                "synthetic dataset"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/ensemble.json b/configs/regular/bf16/ensemble.json
@@ -0,0 +1,45 @@
+{
+    "INCLUDE": ["../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common ensemble params": {
+            "algorithm": {
+                "estimator_params": {
+                    "n_estimators": 200,
+                    "max_depth": 16,
+                    "max_samples": 1.0,
+                    "min_samples_split": 5,
+                    "min_samples_leaf": 2,
+                    "min_impurity_decrease": 0.0,
+                    "bootstrap": true,
+                    "random_state": 42
+                }
+            }
+        },
+        "sklearn ensemble classifier params": {
+            "algorithm": {
+                "estimator": ["RandomForestClassifier", "ExtraTreesClassifier"],
+                "estimator_params": {
+                    "criterion": "gini",
+                    "max_features": "sqrt",
+                    "max_leaf_nodes": null,
+                    "n_jobs": "[SPECIAL_VALUE]physical_cpus"
+                }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 501000, "n_features": 10, "n_classes": 2 },  "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn ensemble classification": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common ensemble params",
+                "sklearn ensemble classifier params",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/kmeans.json b/configs/regular/bf16/kmeans.json
@@ -0,0 +1,40 @@
+{
+    "INCLUDE": ["../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common kmeans parameters": {
+            "algorithm": {
+                "estimator": "KMeans",
+                "estimator_params": {
+                    "n_clusters": "[SPECIAL_VALUE]auto",
+                    "n_init": 1,
+                    "max_iter": 30,
+                    "tol": 1e-3,
+                    "random_state": 42
+                },
+                "estimator_methods": { "inference": "predict" }
+            },
+            "data": {
+                "dtype": ["float32", "float64"],
+                "preprocessing_kwargs": { "normalize": true }
+            }
+        },
+        "sklearn kmeans parameters": {
+            "algorithm": { "estimator_params": { "init": "k-means++", "algorithm": "lloyd" } }
+        },
+        "synthetic data": {
+                "data": [
+                    { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000,  "n_features": 100, "centers": 100 }, "algorithm": { "n_clusters": 100, "max_iter": 100 } }
+                ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn kmeans": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common kmeans parameters",
+                "sklearn kmeans parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/knn.json b/configs/regular/bf16/knn.json
@@ -0,0 +1,34 @@
+{
+    "INCLUDE": ["../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common knn parameters": {
+            "algorithm": {
+                "estimator_params": {
+                    "n_neighbors": [10, 100],
+                    "weights": "uniform"
+                }
+            },
+            "data": {
+                "preprocessing_kwargs": { "normalize": true }
+            }
+        },
+        "sklearn knn parameters": {
+            "algorithm": { "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" } }
+        },
+        "synthetic classification data": {
+            "data": [
+		        { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 50000 },   "generation_kwargs": {  "n_samples": 550000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }
+            ]
+        }	
+    },
+    "TEMPLATES": {
+        "sklearn brute knn clsf": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common knn parameters",
+                "sklearn knn parameters",
+                "synthetic classification data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/linear_model.json b/configs/regular/bf16/linear_model.json
@@ -0,0 +1,29 @@
+{
+    "INCLUDE": ["../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "synthetic data": {
+            "data": [
+                { "source": "make_regression", "generation_kwargs": { "n_samples": 3005000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 3000000, "test_size": 5000 } }
+            ]
+        },
+        "common linear parameters": {
+            "algorithm": {
+                "estimator": "LinearRegression",
+                "estimator_params": { "fit_intercept": true, "copy_X": true }
+            }
+        },
+        "sklearn linear parameters": {
+            "estimator_params": { "n_jobs": "[SPECIAL_VALUE]physical_cpus" }
+        }
+    },
+    "TEMPLATES": {
+        "sklearn linear": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common linear parameters",
+                "sklearn linear parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/logreg.json b/configs/regular/bf16/logreg.json
@@ -0,0 +1,42 @@
+{
+    "INCLUDE": ["../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "common logreg parameters": {
+            "algorithm": {
+                "estimator": "LogisticRegression",
+                "estimator_methods": { "inference": "predict" },
+                "estimator_params": {
+                    "penalty": "l2",
+                    "tol": 1e-4,
+                    "C": 1.0,
+                    "l1_ratio": null,
+                    "max_iter": 200
+                }
+            }
+        },
+        "sklearn logreg parameters": {
+            "algorithm": {
+                "estimator_params": {
+                    "solver": "lbfgs",
+                    "n_jobs": "[SPECIAL_VALUE]physical_cpus",
+                    "random_state": 42
+                }
+            }
+        },
+        "synthetic data": {
+            "data": [
+		        { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 5001000, "n_features": 10, "n_classes": 2 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn logreg": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "common logreg parameters",
+                "sklearn logreg parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/regular/bf16/pca.json b/configs/regular/bf16/pca.json
@@ -0,0 +1,33 @@
+{
+    "INCLUDE": ["../common/sklearn.json"],
+    "PARAMETERS_SETS": {
+        "pca parameters": {
+            "algorithm": {
+                "estimator": "PCA",
+                "estimator_params": {
+                    "n_components": 3,
+                    "copy": true,
+                    "whiten": false,
+                    "svd_solver": "covariance_eigh",
+                    "tol": 0.0,
+                    "iterated_power": 15,
+                    "random_state": 42
+                }
+            }
+        },
+        "synthetic data": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000,  "n_features": 10, "centers": 1 } }
+            ]
+        }
+    },
+    "TEMPLATES": {
+        "sklearn pca": {
+            "SETS": [
+                "sklearn-ex[gpu] implementations",
+                "pca parameters",
+                "synthetic data"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/linear_model.json b/configs/spmd/large_scale/linear_model.json
@@ -10,7 +10,7 @@
         "synthetic data": {
             "data": [
                 { "source": "make_regression", "generation_kwargs": { "n_samples": 30005000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 30000000, "test_size": 5000 } },
-		{ "source": "make_regression", "generation_kwargs": { "n_samples": 305000,  "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 300000, "test_size": 5000 } }
+		        { "source": "make_regression", "generation_kwargs": { "n_samples": 305000,  "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 300000, "test_size": 5000 } }
             ]
         }
     },
diff --git a/configs/spmd/large_scale/logreg.json b/configs/spmd/large_scale/logreg.json
@@ -10,7 +10,7 @@
 	},
         "synthetic data": {
             "data": [
-		{ "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 5001000, "n_features": 10, "n_classes": 2 } },
+		        { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 5001000, "n_features": 10, "n_classes": 2 } },
                 { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 101000, "n_features": 1000, "n_classes": 2 } }
             ]
         }

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@`
`10`	`10`	`"synthetic data": {`
`11`	`11`	`"data": [`
`12`	`12`	`{ "source": "make_regression", "generation_kwargs": { "n_samples": 30005000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 30000000, "test_size": 5000 } },`
`13`		`- { "source": "make_regression", "generation_kwargs": { "n_samples": 305000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 300000, "test_size": 5000 } }`
	`13`	`+ { "source": "make_regression", "generation_kwargs": { "n_samples": 305000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 300000, "test_size": 5000 } }`
`14`	`14`	`]`
`15`	`15`	`}`
`16`	`16`	`},`