@@ -37,19 +37,24 @@ def learn_rf(train_features, train_labels, test_features, test_labels):
3737 # Use the forest's predict method on the test data
3838 print ("Predicting ..." )
3939 y_pred = rf .predict (test_features )
40- #y_pred = np.array(np.where(y_pred > 0.5, 1, 0), dtype=int)
4140 return calculate_scores (y_true = test_labels , y_pred = y_pred )
4241
4342
4443def learn_SVM (train_features , train_labels , test_features , test_labels ):
4544 import numpy as np
4645 from sklearn .svm import SVC
47- clf = SVC (random_state = 42 )
46+ from sklearn .preprocessing import StandardScaler
47+ clf = SVC (random_state = 42 , max_iter = 1000 )
48+ scaler = StandardScaler ()
49+ print ("Scaling feature matrix ..." )
50+ train_features = scaler .fit_transform (train_features )
51+ print (f'Scaler mean for first 10 features: { scaler .mean_ [:10 ]} ' )
4852 print ("Fitting SVM ..." )
4953 clf .fit (train_features , train_labels )
5054 print ("Predicting ..." )
55+ test_features = scaler .transform (test_features )
56+ print (f'Scaler mean for first 10 features: { scaler .mean_ [:10 ]} ' )
5157 y_pred = clf .predict (test_features )
52- y_pred = np .array (np .where (y_pred > 0.5 , 1 , 0 ), dtype = int )
5358 return calculate_scores (y_true = test_labels , y_pred = y_pred )
5459
5560
@@ -101,4 +106,4 @@ def _shuffle_labels(node_labels):
101106 np .random .shuffle (labels )
102107 for i in range (len (node_labels )):
103108 node_labels [node_ids [i ]] = labels [i ]
104- return node_labels
109+ return node_labels
0 commit comments