Skip to content

Commit 50612dd

Browse files
authored
feat: Add preprocessing pipeline and model script for Medical Appointment No-Shows
This PR adds the preprocessing pipeline and machine learning model script for predicting medical appointment no-shows using Random Forest classifier with proper feature scaling and encoding.
1 parent 8b24b72 commit 50612dd

File tree

1 file changed

+98
-0
lines changed
  • projects/detection/Medical_Appointment_No_Shows

1 file changed

+98
-0
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""Medical Appointment No-Shows Prediction Model
2+
This module implements a machine learning model to predict whether a patient
3+
will miss their medical appointment using preprocessing pipeline and model training.
4+
"""
5+
6+
import pandas as pd
7+
import numpy as np
8+
from sklearn.preprocessing import StandardScaler, LabelEncoder
9+
from sklearn.model_selection import train_test_split
10+
from sklearn.ensemble import RandomForestClassifier
11+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
12+
13+
14+
class AppointmentPreprocessor:
15+
"""Preprocessing pipeline for medical appointment data."""
16+
17+
def __init__(self):
18+
self.scaler = StandardScaler()
19+
self.label_encoders = {}
20+
21+
def preprocess(self, data):
22+
"""Preprocess medical appointment data.
23+
24+
Args:
25+
data (pd.DataFrame): Raw appointment data
26+
27+
Returns:
28+
pd.DataFrame: Preprocessed data ready for model training
29+
"""
30+
# Handle missing values
31+
data = data.fillna(data.mean(numeric_only=True))
32+
33+
# Encode categorical variables
34+
categorical_cols = data.select_dtypes(include=['object']).columns
35+
for col in categorical_cols:
36+
if col not in self.label_encoders:
37+
self.label_encoders[col] = LabelEncoder()
38+
data[col] = self.label_encoders[col].fit_transform(data[col].astype(str))
39+
40+
# Scale numerical features
41+
numerical_cols = data.select_dtypes(include=['int64', 'float64']).columns
42+
data[numerical_cols] = self.scaler.fit_transform(data[numerical_cols])
43+
44+
return data
45+
46+
47+
class AppointmentNoShowModel:
48+
"""Machine Learning model for predicting appointment no-shows."""
49+
50+
def __init__(self):
51+
self.model = RandomForestClassifier(n_estimators=100, random_state=42)
52+
self.preprocessor = AppointmentPreprocessor()
53+
54+
def train(self, X_train, y_train):
55+
"""Train the model.
56+
57+
Args:
58+
X_train: Training features
59+
y_train: Training labels
60+
"""
61+
self.model.fit(X_train, y_train)
62+
63+
def predict(self, X_test):
64+
"""Make predictions.
65+
66+
Args:
67+
X_test: Test features
68+
69+
Returns:
70+
predictions: Predicted labels
71+
"""
72+
return self.model.predict(X_test)
73+
74+
def evaluate(self, X_test, y_test):
75+
"""Evaluate model performance.
76+
77+
Args:
78+
X_test: Test features
79+
y_test: Test labels
80+
81+
Returns:
82+
dict: Performance metrics
83+
"""
84+
predictions = self.predict(X_test)
85+
86+
metrics = {
87+
'accuracy': accuracy_score(y_test, predictions),
88+
'precision': precision_score(y_test, predictions, average='weighted'),
89+
'recall': recall_score(y_test, predictions, average='weighted'),
90+
'f1': f1_score(y_test, predictions, average='weighted')
91+
}
92+
93+
return metrics
94+
95+
96+
if __name__ == '__main__':
97+
# Example usage
98+
print('Medical Appointment No-Shows Prediction Model initialized.')

0 commit comments

Comments
 (0)