Skip to content

Commit c3b05fc

Browse files
committed
synthesised categorical variable without encoding for SD2011 dataset
1 parent 56706a2 commit c3b05fc

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

main.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from synthpop import Synthpop
2+
import pandas as pd
3+
import pyreadr
4+
5+
6+
def synSD2011():
7+
df0 = pyreadr.read_r("SD2011.rda")['SD2011']
8+
#pd.read_csv("bar_pass_prediction.csv")
9+
print(df0)
10+
df = df0[['age', 'unempdur', 'income', 'sex']]#df0[['sex', 'race1', 'ugpa', 'bar']]
11+
#df.to_excel("inputData.xlsx")
12+
dtype_map ={
13+
"age":"float",
14+
"unempdur":"float",
15+
"income":"float",
16+
"sex":"category"
17+
}
18+
#{'sex': 'float', 'race1': 'category', 'ugpa': 'float', 'bar': 'category'}
19+
# for (k,v) in dtype_map.items():
20+
# if v == 'category':
21+
# df[k] = df[k].astype('category')
22+
23+
print(df.dtypes)
24+
r = df.dtypes.keys()
25+
spop = Synthpop()
26+
spop.fit(df,dtype_map)
27+
28+
synth_df = spop.generate(len(df))
29+
30+
print(synth_df.head())

0 commit comments

Comments
 (0)