Add FastAPI app with Docker, CI/CD, and linter

sumeyaaaa · sumeyaaaa · commit a229e9c88460 · 2025-07-01T16:12:40.000+03:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -23,7 +23,10 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
+          pip install flake8  # make sure flake8 is installed
 
-      - name: Run tests
-        run: pytest || exit 0
+      - name: Run flake8 linter
+        run: flake8 .
 
+      - name: Run tests
+        run: pytest
diff --git a/.gitignore b/.gitignore
@@ -23,7 +23,7 @@ ENV/
 
 # Jupyter Notebook checkpoints
 .ipynb_checkpoints/
-
+mlruns
 # Data files (don’t track raw or output data)
 *.csv
 *.tsv
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,8 +1,13 @@
-version: "3.9"
-
+# docker-compose.yml (no version)
 services:
   api:
     build: .
     ports:
       - "8000:8000"
-    restart: always
+    volumes:
+     - ./mlruns:/app/mlruns
+     - ./mlruns.db:/app/mlruns.db
+    environment:
+     - MLFLOW_TRACKING_URI=http://host.docker.internal:5000
+
+    working_dir: /app
diff --git a/dockerfile b/dockerfile
@@ -1,24 +1,18 @@
-# Use a slim Python base image
-FROM python:3.9-slim
+FROM python:3.10-slim
 
-# Set working directory
 WORKDIR /app
 
-# Install system dependencies (optional: helps with ML libraries)
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    && rm -rf /var/lib/apt/lists/*
-
-# Copy and install Python dependencies
+# Copy requirements and install deps first for caching
 COPY requirements.txt .
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy all code and mlruns folder
+COPY . /app
 
-# Copy source code
-COPY src ./src
+# You can also explicitly copy mlruns if it's outside project root
+# COPY mlruns /app/mlruns
 
-# Expose FastAPI port
+# Expose port 8000 (optional)
 EXPOSE 8000
 
-# Start the API
 CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/notebooks/task 1 and 2/load_EDA.ipynb b/notebooks/task 1 and 2/load_EDA.ipynb
@@ -596,6 +596,31 @@
     "output_path = r\"C:\\Users\\ABC\\Desktop\\10Acadamy\\Week 5\\Credit-Risk-Probability-Model\\data\\processed\\nan_null.xlsx\"\n",
     "df.to_excel(output_path, index=False)\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c5366e18",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Tracking URI: file:///C:/Users/ABC/Desktop/10Acadamy/Week 5/Credit-Risk-Probability-Model/mlruns\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pathlib import Path\n",
+    "import mlflow\n",
+    "\n",
+    "mlruns_path = Path(\"C:/Users/ABC/Desktop/10Acadamy/Week 5/Credit-Risk-Probability-Model/mlruns\").absolute()\n",
+    "mlflow.set_tracking_uri(f\"file:///{mlruns_path.as_posix()}\")\n",
+    "\n",
+    "print(\"Tracking URI:\", mlflow.get_tracking_uri())\n",
+    "\n"
+   ]
   }
  ],
  "metadata": {
diff --git a/notebooks/task-6/assigning.ipynb b/notebooks/task-6/assigning.ipynb
@@ -0,0 +1,195 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ae6b5e80",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\ABC\\AppData\\Local\\Temp\\ipykernel_16292\\506102746.py:12: FutureWarning: ``mlflow.tracking.client.MlflowClient.transition_model_version_stage`` is deprecated since 2.9.0. Model registry stages will be removed in a future major release. To learn more about the deprecation of model registry stages, see our migration guide here: https://mlflow.org/docs/latest/model-registry.html#migrating-from-stages\n",
+      "  client.transition_model_version_stage(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<ModelVersion: aliases=[], creation_timestamp=1751283921891, current_stage='Staging', deployment_job_state=None, description=None, last_updated_timestamp=1751283921936, metrics=None, model_id=None, name='best_model', params=None, run_id='<your_run_id>', run_link=None, source='runs:/<your_run_id>/model', status='READY', status_message=None, tags={}, user_id=None, version=1>"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from mlflow.tracking import MlflowClient\n",
+    "\n",
+    "client = MlflowClient()\n",
+    "\n",
+    "run_id = \"<your_run_id>\"  # Run where model was logged\n",
+    "model_uri = f\"runs:/{run_id}/model\"\n",
+    "\n",
+    "model_details = client.create_registered_model(\"best_model\")  # Create model in registry if not exists\n",
+    "mv = client.create_model_version(\"best_model\", model_uri, run_id)\n",
+    "\n",
+    "# Transition to staging\n",
+    "client.transition_model_version_stage(\n",
+    "    name=\"best_model\",\n",
+    "    version=mv.version,\n",
+    "    stage=\"Staging\"\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5a4642cb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[<ModelVersion: aliases=[], creation_timestamp=1751336471270, current_stage='Staging', deployment_job_state=None, description=None, last_updated_timestamp=1751336471924, metrics=[], model_id='m-4eb2843bda5d4e60b07f3a7e2dbc78cf', name='credit-risk-model', params={}, run_id='4d40cba2b1ef4491813380e7a40eb4f9', run_link=None, source='models:/m-4eb2843bda5d4e60b07f3a7e2dbc78cf', status='READY', status_message=None, tags={}, user_id=None, version=1>]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\ABC\\AppData\\Local\\Temp\\ipykernel_12312\\1572118651.py:9: FutureWarning: ``mlflow.tracking.client.MlflowClient.get_latest_versions`` is deprecated since 2.9.0. Model registry stages will be removed in a future major release. To learn more about the deprecation of model registry stages, see our migration guide here: https://mlflow.org/docs/latest/model-registry.html#migrating-from-stages\n",
+      "  versions = client.get_latest_versions(\"credit-risk-model\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mlflow\n",
+    "from mlflow.tracking import MlflowClient\n",
+    "\n",
+    "# Set tracking URI to point to your local mlruns folder\n",
+    "mlflow.set_tracking_uri(\"file:///C:/Users/ABC/Desktop/10Acadamy/Week 5/Credit-Risk-Probability-Model/src/model/mlruns\")\n",
+    "\n",
+    "client = MlflowClient()\n",
+    "try:\n",
+    "    versions = client.get_latest_versions(\"credit-risk-model\")\n",
+    "    print(versions)\n",
+    "except Exception as e:\n",
+    "    print(\"Model not found or no versions:\", e)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "e43e31b8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\ABC\\AppData\\Local\\Temp\\ipykernel_12312\\77176695.py:3: FutureWarning: ``mlflow.tracking.client.MlflowClient.get_latest_versions`` is deprecated since 2.9.0. Model registry stages will be removed in a future major release. To learn more about the deprecation of model registry stages, see our migration guide here: https://mlflow.org/docs/latest/model-registry.html#migrating-from-stages\n",
+      "  versions = client.get_latest_versions(\"credit-risk-model\")\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model versions found: [<ModelVersion: aliases=[], creation_timestamp=1751336471270, current_stage='Staging', deployment_job_state=None, description=None, last_updated_timestamp=1751336471924, metrics=[], model_id='m-4eb2843bda5d4e60b07f3a7e2dbc78cf', name='credit-risk-model', params={}, run_id='4d40cba2b1ef4491813380e7a40eb4f9', run_link=None, source='models:/m-4eb2843bda5d4e60b07f3a7e2dbc78cf', status='READY', status_message=None, tags={}, user_id=None, version=1>]\n",
+      "Loading model from: C:\\Users\\ABC\\Desktop\\10Acadamy\\Week 5\\Credit-Risk-Probability-Model\\src\\model\\mlruns\\0\\models\\m-4eb2843bda5d4e60b07f3a7e2dbc78cf\\artifacts\\artifacts\\model.pkl\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\ABC\\Desktop\\10Acadamy\\Week 5\\Credit-Risk-Probability-Model\\env\\Lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator DecisionTreeClassifier from version 1.6.1 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
+      "https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
+      "  warnings.warn(\n",
+      "c:\\Users\\ABC\\Desktop\\10Acadamy\\Week 5\\Credit-Risk-Probability-Model\\env\\Lib\\site-packages\\sklearn\\base.py:440: InconsistentVersionWarning: Trying to unpickle estimator RandomForestClassifier from version 1.6.1 when using version 1.7.0. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
+      "https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model loaded successfully.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check if the model exists\n",
+    "try:\n",
+    "    versions = client.get_latest_versions(\"credit-risk-model\")\n",
+    "    print(\"Model versions found:\", versions)\n",
+    "except Exception as e:\n",
+    "    print(\"Error fetching model versions:\", e)\n",
+    "\n",
+    "# Load model from MLflow Model Registry (Staging version)\n",
+    "try:\n",
+    "    model = mlflow.pyfunc.load_model(\"models:/credit-risk-model/Staging\")\n",
+    "    print(\"Model loaded successfully.\")\n",
+    "except Exception as e:\n",
+    "    print(\"Error loading model:\", e)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "06a72fc9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Version: 1, Stage: Staging\n"
+     ]
+    }
+   ],
+   "source": [
+    "from mlflow.tracking import MlflowClient\n",
+    "import mlflow\n",
+    "from mlflow.tracking import MlflowClient\n",
+    "# Set tracking URI to point to your local mlruns folder\n",
+    "mlflow.set_tracking_uri(\"file:///C:/Users/ABC/Desktop/10Acadamy/Week 5/Credit-Risk-Probability-Model/src/model/mlruns\")\n",
+    "\n",
+    "client = MlflowClient()\n",
+    "model_name = \"credit-risk-model\"\n",
+    "model_versions = client.get_registered_model(model_name)\n",
+    "\n",
+    "# Print details about the model versions\n",
+    "for version in model_versions.latest_versions:\n",
+    "    print(f\"Version: {version.version}, Stage: {version.current_stage}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/register.py b/register.py
@@ -0,0 +1,37 @@
+import mlflow
+from mlflow.tracking import MlflowClient
+from pathlib import Path
+
+# Set tracking URI
+mlruns_path = Path(r"C:\Users\ABC\Desktop\10Acadamy\Week 5\Credit-Risk-Probability-Model\mlruns").absolute()
+mlflow.set_tracking_uri(f"file:///{mlruns_path.as_posix()}")
+# Define model variables
+model_name = "best_model"
+run_id = "1bed56713a694528a9571bb00576059c"
+artifact_path = "models"
+model_uri = f"runs:/{run_id}/{artifact_path}"
+
+client = MlflowClient()
+
+# Register the model (will raise exception if already exists)
+try:
+    client.create_registered_model(model_name)
+except:
+    pass  # model already exists
+
+# Create new version
+mv = client.create_model_version(
+    name=model_name,
+    source=model_uri,
+    run_id=run_id
+)
+
+# Transition to Staging
+client.transition_model_version_stage(
+    name=model_name,
+    version=mv.version,
+    stage="Staging"
+)
+
+print(f"✅ Re-registered as models:/{model_name}/Staging")
+
diff --git a/src/api/main.py b/src/api/main.py
@@ -1,21 +1,33 @@
+import mlflow
 from fastapi import FastAPI
-import mlflow.pyfunc
 import pandas as pd
-from src.api.pydantic_models import CustomerData, PredictionResponse
+from .pydantic_models import CustomerData, PredictionResponse
 
 app = FastAPI()
 
-# Load model from MLflow Model Registry
-model_name = "best_model"
-model_version = 1  # or 'latest'
-model_uri = f"models:/{model_name}/{model_version}"
-model = mlflow.pyfunc.load_model(model_uri)
+# Set MLflow tracking URI to the container's path
+mlflow.set_tracking_uri("file:///app/mlruns")
+
+# Load the model from MLflow artifacts inside Docker
+model_uri = "file:///app/mlruns/1/models/m-b56f931bfa444e04b71e0ac2ecbe00fb/artifacts"
+model = mlflow.sklearn.load_model(model_uri)
 
 @app.post("/predict", response_model=PredictionResponse)
 def predict(data: CustomerData):
-    input_df = pd.DataFrame([data.dict()])
-    
-    # Add any required preprocessing here if needed
-    
-    risk_prob = model.predict_proba(input_df)[:, 1][0]
-    return PredictionResponse(risk_probability=risk_prob)
+    try:
+        input_df = pd.DataFrame([data.dict()])
+        prob_array = model.predict_proba(input_df)
+        print("🔥 Prediction probabilities:", prob_array)
+        prob = prob_array[0][1]
+        return {"risk_probability": prob}
+    except Exception as e:
+        return {"detail": f"Prediction failed: {e}"}
+
+#@app.post("/predict", response_model=PredictionResponse)
+#def predict(data: CustomerData):
+    #try:
+        #input_df = pd.DataFrame([data.dict()])
+        #prob = model.predict_proba(input_df)[0][1]
+        #return {"risk_probability": prob}
+    #except Exception as e:
+        #return {"detail": f"Prediction failed: {e}"}
diff --git a/src/api/pydantic_models.py b/src/api/pydantic_models.py
diff --git a/test/test_api.py b/test/test_api.py