|
1 | 1 | import logging |
2 | 2 | import networkx as nx |
3 | | -from sklearn.ensemble import RandomForestClassifier |
| 3 | +from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier |
4 | 4 | from sklearn.model_selection import train_test_split |
5 | 5 | from sklearn.metrics import accuracy_score |
| 6 | +from sklearn.preprocessing import StandardScaler |
6 | 7 |
|
7 | 8 | class VulnerabilityScanner: |
8 | 9 | def __init__(self): |
9 | 10 | self.scan_results = [] |
10 | | - self.model = RandomForestClassifier(n_estimators=100) |
| 11 | + self.rf_model = RandomForestClassifier(n_estimators=100) |
| 12 | + self.gb_model = GradientBoostingClassifier(n_estimators=100) |
| 13 | + self.ensemble_model = VotingClassifier(estimators=[('rf', self.rf_model), ('gb', self.gb_model)], voting='soft') |
11 | 14 | self.data = None |
12 | 15 | self.labels = None |
| 16 | + self.scaler = StandardScaler() |
13 | 17 |
|
14 | 18 | def scan(self, target): |
15 | 19 | logging.info(f"Scanning target: {target}") |
@@ -88,24 +92,28 @@ def ensure_compatibility(self, existing_data, new_component_data): |
88 | 92 | } |
89 | 93 | return compatible_data |
90 | 94 |
|
| 95 | + def preprocess_data(self, data): |
| 96 | + return self.scaler.fit_transform(data) |
| 97 | + |
91 | 98 | def load_data(self, data, labels): |
92 | | - self.data = data |
| 99 | + self.data = self.preprocess_data(data) |
93 | 100 | self.labels = labels |
94 | 101 |
|
95 | 102 | def train_model(self): |
96 | 103 | if self.data is None or self.labels is None: |
97 | 104 | raise ValueError("Data and labels must be loaded before training the model.") |
98 | 105 |
|
99 | 106 | X_train, X_test, y_train, y_test = train_test_split(self.data, self.labels, test_size=0.2, random_state=42) |
100 | | - self.model.fit(X_train, y_train) |
101 | | - predictions = self.model.predict(X_test) |
| 107 | + self.ensemble_model.fit(X_train, y_train) |
| 108 | + predictions = self.ensemble_model.predict(X_test) |
102 | 109 | accuracy = accuracy_score(y_test, predictions) |
103 | 110 | return accuracy |
104 | 111 |
|
105 | 112 | def predict(self, new_data): |
106 | | - if self.model is None: |
| 113 | + if self.ensemble_model is None: |
107 | 114 | raise ValueError("Model must be trained before making predictions.") |
108 | 115 |
|
109 | | - return self.model.predict(new_data) |
| 116 | + preprocessed_data = self.preprocess_data(new_data) |
| 117 | + return self.ensemble_model.predict(preprocessed_data) |
110 | 118 |
|
111 | 119 | # For detailed plans on future implementations, please refer to the `future_implementations_plan.md` file. |
0 commit comments