-
Notifications
You must be signed in to change notification settings - Fork 49
Open
Description
Hi I just ran the codes for assignment 4 part 2A:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
def salary_predictions():
def is_management(node):
managementSalary = node[1]['ManagementSalary']
if managementSalary == 0:
return 0
elif managementSalary == 1:
return 1
else:
return None
df = pd.DataFrame(index=G.nodes())
df['clustering'] = pd.Series(nx.clustering(G))
df['degree'] = pd.Series(G.degree())
df['degree_centrality'] = pd.Series(nx.degree_centrality(G))
df['closeness'] = pd.Series(nx.closeness_centrality(G))
df['betweeness'] = pd.Series(nx.betweenness_centrality(G, normalized=True))
df['pr'] = pd.Series(nx.pagerank(G))
df['is_management'] = pd.Series([is_management(node) for node in G.nodes(data=True)])
df_train = df[~pd.isnull(df['is_management'])]
df_test = df[pd.isnull(df['is_management'])]
features = ['clustering', 'degree', 'degree_centrality', 'closeness', 'betweeness', 'pr']
X_train = df_train[features]
Y_train = df_train['is_management']
X_test = df_test[features]
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
clf = MLPClassifier(hidden_layer_sizes = [10, 5], alpha = 5,
random_state = 0, solver='lbfgs', verbose=0)
clf.fit(X_train_scaled, Y_train)
test_proba = clf.predict_proba(X_test_scaled)[:, 1]
return pd.Series(test_proba,X_test.index)
But I met the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
TypeError: float() argument must be a string or a number, not 'tuple'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Cell In [17], line 84
82 test_proba = clf.predict_proba(X_test_scaled)[:, 1]
83 return pd.Series(test_proba,X_test.index)
---> 84 salary_predictions()
Cell In [17], line 77, in salary_predictions()
75 X_test = df_test[features]
76 scaler = MinMaxScaler()
---> 77 X_train_scaled = scaler.fit_transform(X_train)
78 X_test_scaled = scaler.transform(X_test)
79 clf = MLPClassifier(hidden_layer_sizes = [10, 5], alpha = 5,
80 random_state = 0, solver='lbfgs', verbose=0)
File /opt/conda/lib/python3.9/site-packages/sklearn/base.py:867, in TransformerMixin.fit_transform(self, X, y, **fit_params)
863 # non-optimized default implementation; override when a better
864 # method is possible for a given clustering algorithm
865 if y is None:
866 # fit method of arity 1 (unsupervised transformation)
--> 867 return self.fit(X, **fit_params).transform(X)
868 else:
869 # fit method of arity 2 (supervised transformation)
870 return self.fit(X, y, **fit_params).transform(X)
File /opt/conda/lib/python3.9/site-packages/sklearn/preprocessing/_data.py:420, in MinMaxScaler.fit(self, X, y)
418 # Reset internal state before fitting
419 self._reset()
--> 420 return self.partial_fit(X, y)
File /opt/conda/lib/python3.9/site-packages/sklearn/preprocessing/_data.py:457, in MinMaxScaler.partial_fit(self, X, y)
451 raise TypeError(
452 "MinMaxScaler does not support sparse input. "
453 "Consider using MaxAbsScaler instead."
454 )
456 first_pass = not hasattr(self, "n_samples_seen_")
--> 457 X = self._validate_data(
458 X,
459 reset=first_pass,
460 dtype=FLOAT_DTYPES,
461 force_all_finite="allow-nan",
462 )
464 data_min = np.nanmin(X, axis=0)
465 data_max = np.nanmax(X, axis=0)
File /opt/conda/lib/python3.9/site-packages/sklearn/base.py:577, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
575 raise ValueError("Validation should be done on X, y or both.")
576 elif not no_val_X and no_val_y:
--> 577 X = check_array(X, input_name="X", **check_params)
578 out = X
579 elif no_val_X and not no_val_y:
File /opt/conda/lib/python3.9/site-packages/sklearn/utils/validation.py:856, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
854 array = array.astype(dtype, casting="unsafe", copy=False)
855 else:
--> 856 array = np.asarray(array, order=order, dtype=dtype)
857 except ComplexWarning as complex_warning:
858 raise ValueError(
859 "Complex data not supported\n{}\n".format(array)
860 ) from complex_warning
File /opt/conda/lib/python3.9/site-packages/pandas/core/generic.py:2070, in NDFrame.__array__(self, dtype)
2069 def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
-> 2070 return np.asarray(self._values, dtype=dtype)
ValueError: setting an array element with a sequence.
Metadata
Metadata
Assignees
Labels
No labels