Skip to content

Error when running assignment4 codes #1

@JessieJiang1011

Description

@JessieJiang1011

Hi I just ran the codes for assignment 4 part 2A:

  from sklearn.svm import SVC
  from sklearn.neural_network import MLPClassifier
  from sklearn.preprocessing import MinMaxScaler
  def salary_predictions():
      def is_management(node):
          managementSalary = node[1]['ManagementSalary']
          if managementSalary == 0:
              return 0
          elif managementSalary == 1:
              return 1
          else:
              return None

    df = pd.DataFrame(index=G.nodes())
    df['clustering'] = pd.Series(nx.clustering(G))
    df['degree'] = pd.Series(G.degree())
    df['degree_centrality'] = pd.Series(nx.degree_centrality(G))
    df['closeness'] = pd.Series(nx.closeness_centrality(G))
    df['betweeness'] = pd.Series(nx.betweenness_centrality(G, normalized=True))
    df['pr'] = pd.Series(nx.pagerank(G))
    df['is_management'] = pd.Series([is_management(node) for node in G.nodes(data=True)])
    df_train = df[~pd.isnull(df['is_management'])]
    df_test = df[pd.isnull(df['is_management'])]
    features = ['clustering', 'degree', 'degree_centrality', 'closeness', 'betweeness', 'pr']
    X_train = df_train[features]
    Y_train = df_train['is_management']
    X_test = df_test[features]
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    clf = MLPClassifier(hidden_layer_sizes = [10, 5], alpha = 5,
                       random_state = 0, solver='lbfgs', verbose=0)
    clf.fit(X_train_scaled, Y_train)
    test_proba = clf.predict_proba(X_test_scaled)[:, 1]
    return pd.Series(test_proba,X_test.index)

But I met the following error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
TypeError: float() argument must be a string or a number, not 'tuple'

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
Cell In [17], line 84
     82     test_proba = clf.predict_proba(X_test_scaled)[:, 1]
     83     return pd.Series(test_proba,X_test.index)
---> 84 salary_predictions()

Cell In [17], line 77, in salary_predictions()
     75 X_test = df_test[features]
     76 scaler = MinMaxScaler()
---> 77 X_train_scaled = scaler.fit_transform(X_train)
     78 X_test_scaled = scaler.transform(X_test)
     79 clf = MLPClassifier(hidden_layer_sizes = [10, 5], alpha = 5,
     80                    random_state = 0, solver='lbfgs', verbose=0)

File /opt/conda/lib/python3.9/site-packages/sklearn/base.py:867, in TransformerMixin.fit_transform(self, X, y, **fit_params)
    863 # non-optimized default implementation; override when a better
    864 # method is possible for a given clustering algorithm
    865 if y is None:
    866     # fit method of arity 1 (unsupervised transformation)
--> 867     return self.fit(X, **fit_params).transform(X)
    868 else:
    869     # fit method of arity 2 (supervised transformation)
    870     return self.fit(X, y, **fit_params).transform(X)

File /opt/conda/lib/python3.9/site-packages/sklearn/preprocessing/_data.py:420, in MinMaxScaler.fit(self, X, y)
    418 # Reset internal state before fitting
    419 self._reset()
--> 420 return self.partial_fit(X, y)

File /opt/conda/lib/python3.9/site-packages/sklearn/preprocessing/_data.py:457, in MinMaxScaler.partial_fit(self, X, y)
    451     raise TypeError(
    452         "MinMaxScaler does not support sparse input. "
    453         "Consider using MaxAbsScaler instead."
    454     )
    456 first_pass = not hasattr(self, "n_samples_seen_")
--> 457 X = self._validate_data(
    458     X,
    459     reset=first_pass,
    460     dtype=FLOAT_DTYPES,
    461     force_all_finite="allow-nan",
    462 )
    464 data_min = np.nanmin(X, axis=0)
    465 data_max = np.nanmax(X, axis=0)

File /opt/conda/lib/python3.9/site-packages/sklearn/base.py:577, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
    575     raise ValueError("Validation should be done on X, y or both.")
    576 elif not no_val_X and no_val_y:
--> 577     X = check_array(X, input_name="X", **check_params)
    578     out = X
    579 elif no_val_X and not no_val_y:

File /opt/conda/lib/python3.9/site-packages/sklearn/utils/validation.py:856, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
    854         array = array.astype(dtype, casting="unsafe", copy=False)
    855     else:
--> 856         array = np.asarray(array, order=order, dtype=dtype)
    857 except ComplexWarning as complex_warning:
    858     raise ValueError(
    859         "Complex data not supported\n{}\n".format(array)
    860     ) from complex_warning

File /opt/conda/lib/python3.9/site-packages/pandas/core/generic.py:2070, in NDFrame.__array__(self, dtype)
   2069 def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
-> 2070     return np.asarray(self._values, dtype=dtype)

ValueError: setting an array element with a sequence.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions