@@ -46,13 +46,12 @@ def create_dataset(n_rows=1000, extras=False, has_missing=True, random_seed=2001
4646 random .choice (['A' , 'B_b' , 'C_c_c' ]), # Strings with underscores to test reverse_dummies()
4747 random .choice (['A' , 'B' , 'C' , np .NaN ]) if has_missing else random .choice (['A' , 'B' , 'C' ]), # None
4848 random .choice (['A' , 'B' , 'C' , 'D' ]) if extras else random .choice (['A' , 'B' , 'C' ]), # With a new string value
49- random .choice ([12 , 43 , - 32 ]), # Number in the column name
5049 random .choice (['A' , 'B' , 'C' ]), # What is going to become the categorical column
5150 random .choice (['A' , 'B' , 'C' , np .nan ]), # Categorical with missing values
5251 random .choice ([1 , 2 , 3 ]) # Ordinal integers
5352 ] for row in range (n_rows )]
5453
55- df = pd .DataFrame (ds , columns = ['float' , 'float_edge' , 'unique_int' , 'unique_str' , 'invariant' , 'underscore' , 'none' , 'extra' , 321 , 'categorical' , 'na_categorical' , 'categorical_int' ])
54+ df = pd .DataFrame (ds , columns = ['float' , 'float_edge' , 'unique_int' , 'unique_str' , 'invariant' , 'underscore' , 'none' , 'extra' , 'categorical' , 'na_categorical' , 'categorical_int' ])
5655 df ['categorical' ] = pd .Categorical (df ['categorical' ], categories = ['A' , 'B' , 'C' ])
5756 df ['na_categorical' ] = pd .Categorical (df ['na_categorical' ], categories = ['A' , 'B' , 'C' ])
5857 df ['categorical_int' ] = pd .Categorical (df ['categorical_int' ], categories = [1 , 2 , 3 ])
0 commit comments