diff --git a/vb_suite/hdfstore_bench.py b/vb_suite/hdfstore_bench.py index 8f66cc04a5ec9..dd466e7fd5dec 100644 --- a/vb_suite/hdfstore_bench.py +++ b/vb_suite/hdfstore_bench.py @@ -19,9 +19,9 @@ def remove(f): # get from a store setup1 = common_setup + """ -index = [rands(10) for _ in xrange(100000)] -df = DataFrame({'float1' : randn(100000), - 'float2' : randn(100000)}, +index = [rands(10) for _ in xrange(25000)] +df = DataFrame({'float1' : randn(25000), + 'float2' : randn(25000)}, index=index) remove(f) store = HDFStore(f) @@ -36,9 +36,9 @@ def remove(f): # write to a store setup2 = common_setup + """ -index = [rands(10) for _ in xrange(100000)] -df = DataFrame({'float1' : randn(100000), - 'float2' : randn(100000)}, +index = [rands(10) for _ in xrange(25000)] +df = DataFrame({'float1' : randn(25000), + 'float2' : randn(25000)}, index=index) remove(f) store = HDFStore(f) @@ -52,12 +52,12 @@ def remove(f): # get from a store (mixed) setup3 = common_setup + """ -index = [rands(10) for _ in xrange(100000)] -df = DataFrame({'float1' : randn(100000), - 'float2' : randn(100000), - 'string1' : ['foo'] * 100000, - 'bool1' : [True] * 100000, - 'int1' : np.random.randint(0, 1000000, size=100000)}, +index = [rands(10) for _ in xrange(25000)] +df = DataFrame({'float1' : randn(25000), + 'float2' : randn(25000), + 'string1' : ['foo'] * 25000, + 'bool1' : [True] * 25000, + 'int1' : np.random.randint(0, 250000, size=25000)}, index=index) remove(f) store = HDFStore(f) @@ -73,12 +73,12 @@ def remove(f): # write to a store (mixed) setup4 = common_setup + """ -index = [rands(10) for _ in xrange(100000)] -df = DataFrame({'float1' : randn(100000), - 'float2' : randn(100000), - 'string1' : ['foo'] * 100000, - 'bool1' : [True] * 100000, - 'int1' : np.random.randint(0, 1000000, size=100000)}, +index = [rands(10) for _ in xrange(25000)] +df = DataFrame({'float1' : randn(25000), + 'float2' : randn(25000), + 'string1' : ['foo'] * 25000, + 'bool1' : [True] * 25000, + 'int1' : np.random.randint(0, 250000, size=25000)}, index=index) remove(f) store = HDFStore(f) @@ -92,12 +92,12 @@ def remove(f): # get from a table (mixed) setup5 = common_setup + """ -index = [rands(10) for _ in xrange(100000)] -df = DataFrame({'float1' : randn(100000), - 'float2' : randn(100000), - 'string1' : ['foo'] * 100000, - 'bool1' : [True] * 100000, - 'int1' : np.random.randint(0, 1000000, size=100000)}, +index = [rands(10) for _ in xrange(25000)] +df = DataFrame({'float1' : randn(25000), + 'float2' : randn(25000), + 'string1' : ['foo'] * 25000, + 'bool1' : [True] * 25000, + 'int1' : np.random.randint(0, 250000, size=25000)}, index=index) remove(f) @@ -114,12 +114,12 @@ def remove(f): # write to a table (mixed) setup6 = common_setup + """ -index = [rands(10) for _ in xrange(100000)] -df = DataFrame({'float1' : randn(100000), - 'float2' : randn(100000), - 'string1' : ['foo'] * 100000, - 'bool1' : [True] * 100000, - 'int1' : np.random.randint(0, 100000, size=100000)}, +index = [rands(10) for _ in xrange(25000)] +df = DataFrame({'float1' : randn(25000), + 'float2' : randn(25000), + 'string1' : ['foo'] * 25000, + 'bool1' : [True] * 25000, + 'int1' : np.random.randint(0, 25000, size=25000)}, index=index) remove(f) store = HDFStore(f) @@ -133,9 +133,9 @@ def remove(f): # select from a table setup7 = common_setup + """ -index = [rands(10) for _ in xrange(100000)] -df = DataFrame({'float1' : randn(100000), - 'float2' : randn(100000) }, +index = [rands(10) for _ in xrange(25000)] +df = DataFrame({'float1' : randn(25000), + 'float2' : randn(25000) }, index=index) remove(f) @@ -152,9 +152,9 @@ def remove(f): # write to a table setup8 = common_setup + """ -index = [rands(10) for _ in xrange(100000)] -df = DataFrame({'float1' : randn(100000), - 'float2' : randn(100000) }, +index = [rands(10) for _ in xrange(25000)] +df = DataFrame({'float1' : randn(25000), + 'float2' : randn(25000) }, index=index) remove(f) store = HDFStore(f) @@ -168,7 +168,7 @@ def remove(f): # get from a table (wide) setup9 = common_setup + """ -df = DataFrame(np.random.randn(100000,200)) +df = DataFrame(np.random.randn(25000,100)) remove(f) store = HDFStore(f) @@ -184,7 +184,7 @@ def remove(f): # write to a table (wide) setup10 = common_setup + """ -df = DataFrame(np.random.randn(100000,200)) +df = DataFrame(np.random.randn(25000,100)) remove(f) store = HDFStore(f) @@ -195,16 +195,15 @@ def remove(f): start_date=start_date) #---------------------------------------------------------------------- -# get from a table (wide) (indexed) +# get from a table (wide) setup11 = common_setup + """ -index = date_range('1/1/2000', periods = 100000) -df = DataFrame(np.random.randn(100000,200), index = index) +index = date_range('1/1/2000', periods = 25000) +df = DataFrame(np.random.randn(25000,100), index = index) remove(f) store = HDFStore(f) store.append('df11',df) -store.create_table_index('df11') """ query_store_table_wide = Benchmark( @@ -213,18 +212,17 @@ def remove(f): #---------------------------------------------------------------------- -# query from a table (indexed) +# query from a table setup12 = common_setup + """ -index = date_range('1/1/2000', periods = 100000) -df = DataFrame({'float1' : randn(100000), - 'float2' : randn(100000) }, +index = date_range('1/1/2000', periods = 25000) +df = DataFrame({'float1' : randn(25000), + 'float2' : randn(25000) }, index=index) remove(f) store = HDFStore(f) store.append('df12',df) -store.create_table_index('df12') """ query_store_table = Benchmark( @@ -235,8 +233,8 @@ def remove(f): # select from a panel table setup13 = common_setup + """ -p = Panel(randn(20, 1000, 1000), items= [ 'Item%03d' % i for i in xrange(20) ], - major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in xrange(1000) ]) +p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in xrange(20) ], + major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in xrange(25) ]) remove(f) store = HDFStore(f) @@ -252,8 +250,8 @@ def remove(f): # write to a panel table setup14 = common_setup + """ -p = Panel(randn(20, 1000, 1000), items= [ 'Item%03d' % i for i in xrange(20) ], - major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in xrange(1000) ]) +p = Panel(randn(20, 1000, 25), items= [ 'Item%03d' % i for i in xrange(20) ], + major_axis=date_range('1/1/2000', periods=1000), minor_axis = [ 'E%03d' % i for i in xrange(25) ]) remove(f) store = HDFStore(f) @@ -262,3 +260,18 @@ def remove(f): write_store_table_panel = Benchmark( "store.append('p2',p)", setup14, cleanup="store.close()", start_date=start_date) + +#---------------------------------------------------------------------- +# write to a table (data_columns) + +setup15 = common_setup + """ +df = DataFrame(np.random.randn(25000,10),columns = [ 'C%03d' % i for i in xrange(10) ]) + +remove(f) +store = HDFStore(f) +""" + +write_store_table_dc = Benchmark( + "store.append('df15',df,data_columns=True)", setup15, cleanup="store.close()", + start_date=start_date) +