Skip to content

Commit 7bcefbc

Browse files
committed
Improved efficiency
1 parent 91830fd commit 7bcefbc

File tree

3 files changed

+135
-94
lines changed

3 files changed

+135
-94
lines changed

model/train_model.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import pandas as pd
22
from sklearn.ensemble import RandomForestClassifier
3+
from sklearn.preprocessing import StandardScaler
34
from sklearn.model_selection import train_test_split
5+
import numpy as np
6+
from sklearn.model_selection import cross_val_score
47
from sklearn.metrics import accuracy_score
58
import pickle
69

@@ -15,6 +18,15 @@
1518
# Label Failures (1 if CPU > 90% or Memory > 80%)
1619
data['failure'] = (data['value_cpu'] > 0.9) | (data['value_mem'] > 80)
1720

21+
# Get feature importances
22+
importances = model.feature_importances_
23+
feature_names = X.columns
24+
25+
# Select only the most important features
26+
important_features = np.argsort(importances)[-10:] # Keep top 10 features
27+
X_train = X_train.iloc[:, important_features]
28+
X_test = X_test.iloc[:, important_features]
29+
1830
# Features & Labels
1931
X = data[['value_cpu', 'value_mem']]
2032
y = data['failure'].astype(int)
@@ -23,9 +35,24 @@
2335
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
2436

2537
# Train Model
26-
model = RandomForestClassifier(n_estimators=100, random_state=42)
38+
model = RandomForestClassifier(
39+
n_estimators=200, # Increase trees for stability
40+
max_depth=10, # Limit tree depth to reduce complexity
41+
min_samples_split=10, # Minimum samples required to split an internal node
42+
min_samples_leaf=5, # Minimum samples per leaf to prevent small splits
43+
max_features="sqrt", # Use sqrt of features to reduce correlation
44+
random_state=42
45+
)
2746
model.fit(X_train, y_train)
2847

48+
49+
scaler = StandardScaler()
50+
X_train = scaler.fit_transform(X_train)
51+
X_test = scaler.transform(X_test)
52+
53+
cv_scores = cross_val_score(model, X_train, y_train, cv=5)
54+
print(f"✅ Cross-Validation Accuracy: {np.mean(cv_scores):.4f}")
55+
2956
# Evaluate Model
3057
y_pred = model.predict(X_test)
3158
print("Accuracy:", accuracy_score(y_test, y_pred))

scripts/fetch_metrics.py

Lines changed: 32 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,39 @@
1-
import os
21
import requests
32
import pandas as pd
3+
import os
44
from datetime import datetime
55

66
PROMETHEUS_URL = "http://localhost:9090/api/v1/query"
77

8-
def fetch_metric(metric_query, metric_name):
9-
"""Fetch metrics from Prometheus with error handling."""
10-
try:
11-
response = requests.get(PROMETHEUS_URL, params={'query': metric_query}, timeout=5)
12-
response.raise_for_status() # Raise an error if request fails
13-
data = response.json()
14-
15-
if 'data' not in data or 'result' not in data['data']:
16-
print(f"⚠️ No data found for {metric_name}")
17-
return pd.DataFrame(columns=['timestamp', metric_name]) # Empty DataFrame
18-
19-
results = []
20-
for item in data['data']['result']:
21-
try:
22-
timestamp = datetime.utcfromtimestamp(float(item['value'][0])).strftime('%Y-%m-%d %H:%M:%S')
23-
value = float(item['value'][1])
24-
results.append({'timestamp': timestamp, metric_name: value})
25-
except (ValueError, IndexError):
26-
print(f"⚠️ Skipping invalid data point in {metric_name}: {item}")
27-
28-
return pd.DataFrame(results)
29-
30-
except requests.exceptions.RequestException as e:
31-
print(f"❌ Error fetching {metric_name}: {e}")
32-
return pd.DataFrame(columns=['timestamp', metric_name])
33-
34-
# Ensure the 'data' directory exists
35-
output_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data"))
36-
os.makedirs(output_dir, exist_ok=True)
37-
38-
# Fetch Metrics with correct queries
39-
cpu_usage = fetch_metric('rate(container_cpu_usage_seconds_total[1m])', 'cpu_usage') # CPU as rate
40-
memory_usage = fetch_metric('container_memory_usage_bytes', 'memory_usage') # Memory in bytes
41-
42-
# Convert Memory Usage to MB
43-
if not memory_usage.empty:
44-
memory_usage['memory_usage'] = memory_usage['memory_usage'] / (1024 * 1024) # Convert to MB
45-
46-
# Save to CSV if data exists
47-
if not cpu_usage.empty:
48-
cpu_usage.to_csv(os.path.join(output_dir, "cpu_usage.csv"), index=False)
49-
print("✅ CPU usage saved to data/cpu_usage.csv")
50-
else:
51-
print("⚠️ No CPU usage data to save.")
52-
53-
if not memory_usage.empty:
54-
memory_usage.to_csv(os.path.join(output_dir, "memory_usage.csv"), index=False)
55-
print("✅ Memory usage saved to data/memory_usage.csv")
56-
else:
57-
print("⚠️ No memory usage data to save.")
8+
# Define metrics to fetch
9+
METRICS = {
10+
"cpu_usage": "container_cpu_usage_seconds_total",
11+
"memory_usage": "container_memory_usage_bytes",
12+
"disk_io": "node_disk_io_time_seconds_total",
13+
"network_rx": "node_network_receive_bytes_total",
14+
"network_tx": "node_network_transmit_bytes_total",
15+
}
16+
17+
SAVE_DIR = "../data"
18+
os.makedirs(SAVE_DIR, exist_ok=True)
19+
20+
def fetch_metric(metric_name):
21+
"""Fetches a single metric from Prometheus and returns a DataFrame."""
22+
response = requests.get(PROMETHEUS_URL, params={"query": metric_name})
23+
data = response.json()
24+
25+
results = []
26+
for item in data.get("data", {}).get("result", []):
27+
timestamp = datetime.utcfromtimestamp(float(item["value"][0])).strftime("%Y-%m-%d %H:%M:%S")
28+
value = float(item["value"][1])
29+
results.append({"timestamp": timestamp, "value": value})
30+
31+
return pd.DataFrame(results)
32+
33+
# Fetch all metrics
34+
for metric_key, query in METRICS.items():
35+
df = fetch_metric(query)
36+
save_path = os.path.join(SAVE_DIR, f"{metric_key}.csv")
37+
df.to_csv(save_path, index=False)
38+
print(f"✅ {metric_key} data saved to {save_path}")
5839

scripts/train_model.py

Lines changed: 75 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,96 @@
11
import pandas as pd
2-
from sklearn.model_selection import train_test_split
3-
from sklearn.ensemble import RandomForestClassifier
4-
from sklearn.metrics import accuracy_score, classification_report
5-
import os
2+
import numpy as np
63
import joblib
74
import matplotlib.pyplot as plt
5+
import seaborn as sns
6+
from sklearn.model_selection import train_test_split, cross_val_score
7+
from sklearn.preprocessing import StandardScaler
8+
from sklearn.ensemble import RandomForestClassifier
9+
from sklearn.metrics import classification_report, confusion_matrix
10+
from imblearn.over_sampling import SMOTE
11+
from xgboost import XGBClassifier
12+
13+
# Load dataset
14+
data = pd.read_csv("data/merged_data.csv")
815

9-
# Load Processed Data
10-
df = pd.read_csv("data/processed_metrics.csv")
16+
# Check if target column exists
17+
if "target" not in data.columns:
18+
raise KeyError("❌ 'target' column not found in the dataset!")
1119

12-
# Drop unnecessary columns
13-
df = df.drop(columns=["timestamp"], errors="ignore")
20+
# Remove non-numeric columns and separate features/target
21+
X = data.drop(columns=["timestamp", "target"], errors="ignore")
22+
y = data["target"]
1423

15-
# Ensure "failure" column exists
16-
if "failure" not in df.columns:
17-
raise ValueError("Error: 'failure' column not found in processed_metrics.csv!")
24+
# Standardize features
25+
scaler = StandardScaler()
26+
X_scaled = scaler.fit_transform(X)
1827

19-
# Define Features (X) and Target (y)
20-
X = df.drop(columns=["failure"])
21-
y = df["failure"]
28+
# Handle class imbalance using SMOTE
29+
smote = SMOTE(random_state=42)
30+
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)
2231

23-
# Train-Test Split
24-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
32+
# Split into train & test sets
33+
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
34+
35+
# Train model with class weighting
36+
37+
model = XGBClassifier(
38+
n_estimators=500,
39+
max_depth=10,
40+
learning_rate=0.01,
41+
subsample=0.8,
42+
colsample_bytree=0.8,
43+
scale_pos_weight=1,
44+
use_label_encoder=False,
45+
eval_metric="logloss"
46+
)
2547

26-
# Train a Random Forest Model
27-
model = RandomForestClassifier(n_estimators=100, random_state=42)
2848
model.fit(X_train, y_train)
2949

3050
# Predictions
3151
y_pred = model.predict(X_test)
3252

33-
# Model Evaluation
34-
accuracy = accuracy_score(y_test, y_pred)
35-
print(f"✅ Model Accuracy: {accuracy:.4f}")
36-
print("📊 Classification Report:\n", classification_report(y_test, y_pred))
53+
# Evaluate model
54+
train_acc = model.score(X_train, y_train)
55+
test_acc = model.score(X_test, y_test)
56+
cv_acc = np.mean(cross_val_score(model, X_resampled, y_resampled, cv=5))
3757

38-
# Ensure models directory exists
39-
os.makedirs("models", exist_ok=True)
58+
# Confusion matrix
59+
cm = confusion_matrix(y_test, y_pred)
4060

41-
# Save the Model
42-
joblib.dump(model, "models/failure_predictor.pkl")
43-
print("✅ Model saved as models/failure_predictor.pkl")
61+
# Feature importance
62+
feature_importances = model.feature_importances_
63+
sorted_indices = np.argsort(feature_importances)[::-1]
64+
top_features = X.columns[sorted_indices]
4465

66+
# Print results
67+
print("\n📊 MODEL PERFORMANCE METRICS")
68+
print("────────────────────────────────")
69+
print(f"🏋️ Training Accuracy: {train_acc:.4f}")
70+
print(f"🛠️ Test Accuracy: {test_acc:.4f}")
71+
print(f"🎯 Cross-Validation Accuracy: {cv_acc:.4f}")
4572

46-
# Get feature importance
47-
importances = model.feature_importances_
48-
features = X.columns
73+
# Print classification report
74+
print("\n📜 Classification Report:\n", classification_report(y_test, y_pred))
4975

50-
# Plot
51-
plt.figure(figsize=(10,5))
52-
plt.barh(features, importances, color="skyblue")
53-
plt.xlabel("Importance Score")
54-
plt.ylabel("Feature Name")
55-
plt.title("Feature Importance in Failure Prediction Model")
56-
plt.show()
76+
# Print confusion matrix
77+
print("\n🖼️ Confusion Matrix:")
78+
print(cm)
79+
80+
# Show top features
81+
print("\n🔍 Top 5 Most Important Features:")
82+
for i in range(min(5, len(top_features))):
83+
print(f" {i+1}. {top_features[i]} ({feature_importances[sorted_indices[i]]:.4f})")
5784

58-
# Check training accuracy
59-
train_pred = model.predict(X_train)
60-
train_acc = accuracy_score(y_train, train_pred)
85+
# Save trained model
86+
joblib.dump(model, "models/failure_predictor.pkl")
87+
print("\n✅ Model saved successfully!")
88+
89+
# Plot confusion matrix
90+
plt.figure(figsize=(6, 5))
91+
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["No Failure", "Failure"], yticklabels=["No Failure", "Failure"])
92+
plt.xlabel("Predicted")
93+
plt.ylabel("Actual")
94+
plt.title("Confusion Matrix")
95+
plt.show()
6196

62-
print(f"🏋️ Training Accuracy: {train_acc:.2f}")
63-
print(f"🛠️ Test Accuracy: {accuracy_score(y_test, y_pred):.2f}")

0 commit comments

Comments
 (0)