|
| 1 | +# Copyright (C) 2020 Intel Corporation |
| 2 | +# |
| 3 | +# SPDX-License-Identifier: MIT |
| 4 | + |
| 5 | +import argparse |
| 6 | +from bench import measure_function_time, parse_args, load_data, print_output |
| 7 | +from sklearn.model_selection import train_test_split |
| 8 | + |
| 9 | +parser = argparse.ArgumentParser( |
| 10 | + description='scikit-learn train_test_split benchmark') |
| 11 | +parser.add_argument('--train-size', type=float, default=0.75, |
| 12 | + help='Size of training subset') |
| 13 | +parser.add_argument('--test-size', type=float, default=0.25, |
| 14 | + help='Size of testing subset') |
| 15 | +parser.add_argument('--do-not-shuffle', default=False, action='store_true', |
| 16 | + help='Do not perform data shuffle before splitting') |
| 17 | +parser.add_argument('--include-y', default=False, action='store_true', |
| 18 | + help='Include label (Y) in splitting') |
| 19 | +parser.add_argument('--rng', default=None, |
| 20 | + choices=('MT19937', 'SFMT19937', 'MT2203', 'R250', 'WH', |
| 21 | + 'MCG31', 'MCG59', 'MRG32K3A', 'PHILOX4X32X10', |
| 22 | + 'NONDETERM', None), |
| 23 | + help='Random numbers generator for shuffling ' |
| 24 | + '(only for IDP scikit-learn)') |
| 25 | +params = parse_args(parser) |
| 26 | + |
| 27 | +# Load generated data |
| 28 | +X, y, _, _ = load_data(params) |
| 29 | + |
| 30 | +if params.include_y: |
| 31 | + data_args = (X, y) |
| 32 | +else: |
| 33 | + data_args = (X, ) |
| 34 | + |
| 35 | +tts_params = { |
| 36 | + 'train_size': params.train_size, |
| 37 | + 'test_size': params.test_size, |
| 38 | + 'shuffle': not params.do_not_shuffle, |
| 39 | + 'random_state': params.seed |
| 40 | +} |
| 41 | + |
| 42 | +if params.rng is not None: |
| 43 | + tts_params['rng'] = params.rng |
| 44 | + |
| 45 | +time, _ = measure_function_time(train_test_split, *data_args, **tts_params, |
| 46 | + params=params) |
| 47 | + |
| 48 | +columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', |
| 49 | + 'time') |
| 50 | + |
| 51 | +print_output(library='sklearn', algorithm='train_test_split', |
| 52 | + stages=['training'], columns=columns, params=params, |
| 53 | + functions=['train_test_split'], times=[time], accuracies=[None], |
| 54 | + accuracy_type=None, data=[X], alg_params=tts_params) |
0 commit comments