Skip to content

Commit acd51f6

Browse files
committed
Changed fetch matrix
1 parent d254548 commit acd51f6

File tree

2 files changed

+17
-33
lines changed

2 files changed

+17
-33
lines changed

scripts/fetch_metrics.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,9 @@
1212
"network_rx": "node_network_receive_bytes_total",
1313
"network_tx": "node_network_transmit_bytes_total",
1414
}
15-
1615
SAVE_DIR = "../data"
1716
os.makedirs(SAVE_DIR, exist_ok=True)
18-
1917
def fetch_metric(metric_name):
20-
"""Fetches a single metric from Prometheus and returns a DataFrame."""
2118
response = requests.get(PROMETHEUS_URL, params={"query": metric_name})
2219
data = response.json()
2320

@@ -28,7 +25,7 @@ def fetch_metric(metric_name):
2825
value = float(item["value"][1])
2926
results.append({"timestamp": timestamp, metric_name: value})
3027
except Exception as e:
31-
print(f"Error processing {metric_name}: {e}")
28+
print(f"Error with {metric_name}: {e}")
3229

3330
df = pd.DataFrame(results)
3431

@@ -37,14 +34,14 @@ def fetch_metric(metric_name):
3734

3835
return df
3936

40-
# Fetch all metrics
37+
#fetching all
4138
all_data = None
4239

4340
for metric_key, query in METRICS.items():
4441
df = fetch_metric(query)
4542

4643
if df.empty:
47-
print(f"⚠️ Warning: No data for {metric_key}, skipping merge.")
44+
print(f"⚠️No Data, skipping merge.")
4845
continue
4946

5047
if all_data is None:
@@ -55,13 +52,11 @@ def fetch_metric(metric_name):
5552
print("Before merge, df columns:", list(df.columns))
5653

5754
all_data = pd.merge(all_data, df, on="timestamp", how="outer")
58-
59-
# Save collected data
6055
if all_data is not None and not all_data.empty:
6156
save_path = os.path.join(SAVE_DIR, "merged_data.csv")
6257
all_data.to_csv(save_path, index=False)
63-
print(f"Merged data saved to {save_path}")
58+
print(f"Merged data saved to {save_path}")
6459
print(all_data.head()) # Preview first few rows
6560
else:
66-
print("⚠️ No data was fetched, skipping save.")
61+
print("⚠️ No data was fetched.")
6762

scripts/train_model1.py

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,75 +11,65 @@
1111
from imblearn.over_sampling import BorderlineSMOTE
1212
from xgboost import XGBClassifier
1313

14-
# ✅ Load Dataset
1514
CSV_PATH = "/home/pavithra/k8s-failure-prediction/data/merged_data.csv"
1615
df = pd.read_csv(CSV_PATH)
1716

18-
# ✅ Preprocessing
1917
df.columns = df.columns.str.strip().str.replace(r'\s+', '_', regex=True).str.lower()
2018
df["timestamp"] = pd.to_datetime(df["timestamp"])
2119
df.set_index("timestamp", inplace=True)
2220

23-
# ✅ Feature Engineering
2421
for col in df.columns:
2522
df[f"{col}_avg"] = df[col].rolling(window=5, min_periods=1).mean()
26-
27-
# ✅ Target Variable
2823
df["target"] = (df["container_restart_count"].diff().fillna(0) > 1).astype(int)
2924
df.drop(columns=["container_restart_count"], inplace=True)
3025

31-
# ✅ Prepare Data
3226
X = df.drop(columns=["target"])
3327
y = df["target"]
34-
35-
# ✅ Handle Class Imbalance
28+
# to handle the imbalance
3629
if y.value_counts().min() >= 5:
3730
smote = BorderlineSMOTE(sampling_strategy='auto', random_state=42)
3831
X_resampled, y_resampled = smote.fit_resample(X, y)
3932
else:
4033
X_resampled, y_resampled = X, y
4134

42-
# ✅ Train-Test Split
35+
#splitting
4336
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
4437

45-
# ✅ Reduce Overfitting (Final Fix)
38+
#to reduce overfitting
4639
rf = RandomForestClassifier(
47-
n_estimators=300, # More trees
48-
max_depth=10, # Reduce tree depth
49-
min_samples_split=20, # More samples needed per split
50-
min_samples_leaf=10, # Prevent small branches
40+
n_estimators=300,
41+
max_depth=10,
42+
min_samples_split=20,
43+
min_samples_leaf=10,
5144
bootstrap=True,
5245
random_state=42
5346
)
5447

55-
# ✅ Ensemble Model (Random Forest + XGBoost)
5648
xgb = XGBClassifier(n_estimators=200, learning_rate=0.05, max_depth=7, subsample=0.8, colsample_bytree=0.8, random_state=42)
5749
rf.fit(X_train, y_train)
5850
xgb.fit(X_train, y_train)
5951

60-
# ✅ Predictions
52+
#predict
6153
y_pred_rf = rf.predict(X_test)
6254
y_pred_xgb = xgb.predict(X_test)
6355

64-
# ✅ Combine Predictions (Soft Voting)
56+
#combining them
6557
y_pred_ensemble = (y_pred_rf + y_pred_xgb) // 2
66-
67-
# ✅ Evaluate Model
6858
train_acc = rf.score(X_train, y_train) * 100
6959
test_acc = accuracy_score(y_test, y_pred_ensemble) * 100
7060
print(f"\n🎯 Train Accuracy: {train_acc:.2f} %")
7161
print(f"🎯 Test Accuracy: {test_acc:.2f} %")
7262
print("\n🔹 Classification Report:\n", classification_report(y_test, y_pred_ensemble))
7363

74-
# ✅ Save Model
64+
7565
MODEL_PATH = "../models/k8s_failure_model.pkl"
7666
joblib.dump(rf, MODEL_PATH)
7767
model = joblib.load("models/k8s_failure_model.pkl")
7868
print("The features in model are\n")
7969
print(model.feature_names_in_)
8070
print(f"\n✅ Model saved at {MODEL_PATH}")
8171

82-
# 🔥 Confusion Matrix Plot
72+
#confusion matrix to be plotted
8373
cm = confusion_matrix(y_test, y_pred_ensemble)
8474
plt.figure(figsize=(6, 4))
8575
sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", xticklabels=["No Failure", "Failure"], yticklabels=["No Failure", "Failure"])
@@ -88,10 +78,9 @@
8878
plt.ylabel("Actual")
8979
plt.show()
9080

91-
# 🔥 Feature Importance Plot
81+
#feature importance to be plotted
9282
feature_importances = pd.DataFrame({'Feature': X_train.columns, 'Importance': rf.feature_importances_})
9383
feature_importances = feature_importances.sort_values(by='Importance', ascending=False).head(15)
94-
9584
plt.figure(figsize=(10, 6))
9685
sns.barplot(x='Importance', y='Feature', data=feature_importances, palette="viridis")
9786
plt.title("Top 15 Important Features")

0 commit comments

Comments
 (0)