33import pyreadr
44
55def synBar ():
6- df = pd .read_csv ("bar_pass_prediction.csv" )[[ 'sex' , 'race1' , 'ugpa' , 'bar' ]]
6+ df = pd .read_csv ("bar_pass_prediction.csv" )
77 print (df .dtypes )
8- dtype_map = {'sex' : 'float' , 'race1' : 'category' , 'ugpa' : 'float' , 'bar' : 'category' }
8+ dtype_map = {}
99
10- for (k ,v ) in dtype_map .items ():
11-
12- if v == 'category' :
13- df = df .astype ({k : "category" })
10+ for k in df .dtypes .keys ():
11+ match df .dtypes [k ]:
12+ case 'float64' :
13+ dtype_map [k ] = 'float'
14+ case 'category' :
15+ dtype_map [k ] = 'category'
16+ df = df .astype ({k : "category" })
17+ case _:
18+ dtype_map [k ]= 'category'
19+ df = df .astype ({k : "category" })
1420
1521 print (df .dtypes )
1622 spop = Synthpop ()
@@ -28,22 +34,35 @@ def synBar():
2834def synSD2011 ():
2935 df0 = pyreadr .read_r ("SD2011.rda" )['SD2011' ]
3036 #pd.read_csv("bar_pass_prediction.csv")
31- print (df0 .dtypes )
32- df = df0 [[ 'age' , 'unempdur' , 'income' , 'sex' ]] #df0[['sex', 'race1', 'ugpa', 'bar']]
33- print (df .isna ().sum ())
37+ # print(df0.dtypes)
38+ df = df0 #df0[['sex', 'race1', 'ugpa', 'bar']]
39+ # print(df.isna().sum())
3440 #df.to_excel("inputData.xlsx")
3541 dtype_map = {
36- "age" :"float" ,
37- "unempdur" :"float" ,
38- "income" :"float" ,
39- "sex" :"category"
42+ # "age":"float",
43+ # "unempdur":"float",
44+ # "income":"float",
45+ # "sex":"category"
4046 }
47+
48+ for k in df .dtypes .keys ():
49+ match df .dtypes [k ]:
50+ case 'float64' :
51+ dtype_map [k ] = 'float'
52+ case 'category' :
53+ dtype_map [k ] = 'category'
54+ df = df .astype ({k : "category" })
55+ case _:
56+ dtype_map [k ]= 'category'
57+ df = df .astype ({k : "category" })
58+
59+ print (dtype_map )
4160 #{'sex': 'float', 'race1': 'category', 'ugpa': 'float', 'bar': 'category'}
4261 # for (k,v) in dtype_map.items():
4362 # if v == 'category':
4463 # df[k] = df[k].astype('category')
4564
46- print ( df . dtypes )
65+
4766 r = df .dtypes .keys ()
4867 spop = Synthpop ()
4968 spop .fit (df ,dtype_map )
@@ -53,4 +72,4 @@ def synSD2011():
5372 print (synth_df .head ())
5473
5574
56- synSD2011 ()
75+ synBar ()
0 commit comments