Disease_Prediction-Based-On-Report-ML-/med.py at main · sushmidha06/Disease_Prediction-Based-On-Report-ML- · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from flask import Flask, render_template, request, jsonify
import fitz  # PyMuPDF for PDF processing
import pickle
import joblib
import numpy as np
import os
from enum import Enum

app = Flask(_name_)

# Define model paths
MODEL_DIR = "models"
CLASSIFICATION_DIR = os.path.join(MODEL_DIR, "classification")
DISEASE_DIR = os.path.join(MODEL_DIR, "disease")
SCALER_DIR = os.path.join(MODEL_DIR, "scalers")

# Enum for model types
class DiseaseType(Enum):
    DIABETES = "diabetes"
    KIDNEY = "kidney"
    LIVER = "liver"
    HEART = "heart"
    HYPERTENSION = "hypertension"

# Define preprocess_text function to prevent attribute errors
def preprocess_text(text):
    return text.lower().strip()

# Load Classification Models
classification_models = {}
for model_name in ["meta_model.pkl", "random_forest_model.pkl", "simicheck.pkl"]:
    model_path = os.path.join(CLASSIFICATION_DIR, model_name)
    if os.path.exists(model_path):
        with open(model_path, "rb") as file:
            classification_models[model_name.split(".")[0]] = joblib.load(file)

# Load Disease Models and Scalers
models, scalers = {}, {}
for disease in DiseaseType:
    model_path = os.path.join(DISEASE_DIR, f"{disease.value}_model.pkl")
    scaler_path = os.path.join(SCALER_DIR, f"{disease.value}_scaler.pkl")
    if os.path.exists(model_path) and os.path.exists(scaler_path):
        with open(model_path, "rb") as file:
            models[disease.value] = joblib.load(file)
        with open(scaler_path, "rb") as file:
            scalers[disease.value] = joblib.load(file)

# Function to extract text from PDF
def extract_text_from_pdf(file):
    try:
        doc = fitz.open(stream=file.read(), filetype="pdf")
        text = "".join([page.get_text("text") for page in doc])
        return text
    except Exception as e:
        return f"Error: {str(e)}"

# Function to classify disease type
def classify_disease(features):
    features_array = np.array(features).reshape(1, -1)
    return classification_models.get("meta_model", None).predict(features_array)[0] if "meta_model" in classification_models else "Unknown"

# Function to predict disease outcome
def predict_disease(features, disease_type):
    if disease_type not in models or disease_type not in scalers:
        return "Unknown disease type"

    model = models[disease_type]
    scaler = scalers[disease_type]
    features_scaled = scaler.transform([features])
    prediction = model.predict(features_scaled)[0]
    return prediction

@app.route('/')
def home():
    return render_template("index.html")

@app.route('/upload_pdf', methods=['POST'])
def upload_pdf():
    if 'file' not in request.files:
        return jsonify({"error": "No file uploaded"}), 400

    file = request.files['file']
    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400

    extracted_text = extract_text_from_pdf(file)
    features = [float(value) for value in extracted_text.split() if value.replace('.', '', 1).isdigit()]
    if len(features) < 5:
        return jsonify({"error": "Not enough data extracted for prediction"}), 400

    disease_type = classify_disease(features)
    prediction = predict_disease(features, disease_type)

    return jsonify({
        "extracted_text": extracted_text,
        "disease_type": disease_type,
        "prediction": prediction
    })

if _name_ == '_main_':
    app.run(debug=True)