feat: integrate MLflow and DVC for experiment tracking and model versioning

ciro-maciel · ciro-maciel · commit acd84e88c38b · 2025-08-19T16:14:06.000-03:00
diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml
@@ -1,4 +1,4 @@
-name: CI/CD
+name: CI/CD with DVC and MLflow
 
 on:
   push:
@@ -7,39 +7,57 @@ on:
     branches: [mlops-concepts, mlops-market-tools]
 
 jobs:
-  build-train:
+  build-train-log:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
         uses: actions/checkout@v4
 
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+
       - name: Setup Bun
         uses: oven-sh/setup-bun@v1
         with:
           bun-version: latest
 
-      - name: Install dependencies (workspaces)
-        run: bun install
+      - name: Install Python Dependencies
+        run: pip install "dvc[s3]" mlflow
 
-      - name: Generate DB migrations
-        run: bun --cwd inference run db:generate
-
-      - name: Run DB migrations
-        run: bun --cwd inference run db:migrate
+      - name: Install JS Dependencies
+        run: bun install
 
-      - name: Train model
+      - name: Configure DVC and S3 Credentials
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        run: |
+          # Comente/descomente e configure conforme seu provedor S3
+          # dvc remote modify my-remote endpointurl s3.amazonaws.com
+          echo "DVC remote configured."
+
+      - name: Pull Data from DVC
+        run: dvc pull -r my-remote
+
+      - name: Train model and Log to MLflow
+        env:
+          # Segredos para o MLflow se conectar ao seu servidor na Fly.io
+          MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_TRACKING_URI }}
+          # Se seu servidor MLflow precisar de autenticação, configure aqui
+          # MLFLOW_TRACKING_USERNAME: ${{ secrets.MLFLOW_USERNAME }}
+          # MLFLOW_TRACKING_PASSWORD: ${{ secrets.MLFLOW_PASSWORD }}
         run: bun --cwd training run train
 
-      - name: Build web
+      - name: Push Artifacts to DVC
+        run: dvc push -r my-remote
+
+      - name: Build Dashboard
         run: bun --cwd dashboard run build
 
-      - name: Upload web artifact
+      - name: Upload Dashboard Artifact
         uses: actions/upload-artifact@v4
         with:
           name: dashboard-dist
           path: dashboard/dist
-
-      - name: Deploy (placeholder)
-        if: ${{ secrets.FLY_API_TOKEN != '' }}
-        run: |
-          echo "FLY_API_TOKEN detected. Add deployment steps here (e.g., Fly.io)."
diff --git a/dashboard/src/components/Dashboard.jsx b/dashboard/src/components/Dashboard.jsx
@@ -1,92 +1,27 @@
-import React, { useEffect, useMemo, useState } from 'react';
-import { Card, Group, Loader, Table, Text, Title, Badge } from '@mantine/core';
-import { LineChart, Line, XAxis, YAxis, CartesianGrid, Tooltip, ResponsiveContainer } from 'recharts';
+import React from 'react';
+import { Card, Title, Text, Button, Center } from '@mantine/core';
 
 export default function Dashboard() {
-  const [runs, setRuns] = useState([]);
-  const [loading, setLoading] = useState(true);
-  const [error, setError] = useState('');
-
-  useEffect(() => {
-    async function load() {
-      try {
-        const res = await fetch('http://localhost:3001/dashboard');
-        const data = await res.json();
-        setRuns(Array.isArray(data) ? data : []);
-      } catch (e) {
-        setError(e.message || 'Erro ao carregar');
-      } finally {
-        setLoading(false);
-      }
-    }
-    load();
-  }, []);
-
-  const chartData = useMemo(() => {
-    const ordered = [...runs].reverse();
-    return ordered.map((r, idx) => ({
-      idx,
-      accuracy: (r.metrics && r.metrics.accuracy) || 0,
-      createdAt: r.createdAt ? new Date(r.createdAt).toLocaleString() : ''
-    }));
-  }, [runs]);
+  // IMPORTANTE: Substitua pela URL do seu app MLflow na Fly.io
+  const mlflowUiUrl = "https://spamguard-mlflow.fly.dev";
 
   return (
-    <>
-      <Title order={4} mb="sm">Evolução de Métricas</Title>
-      <Card withBorder mb="lg" style={{ width: '100%', height: 280 }}>
-        {loading ? (
-          <Group justify="center" align="center" style={{ height: '100%' }}>
-            <Loader />
-          </Group>
-        ) : error ? (
-          <Text c="red">{error}</Text>
-        ) : (
-          <ResponsiveContainer>
-            <LineChart data={chartData}>
-              <CartesianGrid strokeDasharray="3 3" />
-              <XAxis dataKey="createdAt" interval={0} angle={-15} textAnchor="end" height={60} />
-              <YAxis domain={[0, 1]} />
-              <Tooltip formatter={(v) => (v*100).toFixed(1) + '%'} />
-              <Line type="monotone" dataKey="accuracy" stroke="#10b981" strokeWidth={2} dot={{ r: 3 }} />
-            </LineChart>
-          </ResponsiveContainer>
-        )}
-      </Card>
-
-      <Title order={4} mb="sm">Execuções</Title>
-      <Card withBorder>
-        {loading ? (
-          <Group justify="center" align="center"><Loader /></Group>
-        ) : (
-          <Table highlightOnHover withTableBorder withColumnBorders>
-            <Table.Thead>
-              <Table.Tr>
-                <Table.Th>ID</Table.Th>
-                <Table.Th>Commit</Table.Th>
-                <Table.Th>Acurácia</Table.Th>
-                <Table.Th>F1</Table.Th>
-                <Table.Th>Criado em</Table.Th>
-                <Table.Th>Produção</Table.Th>
-              </Table.Tr>
-            </Table.Thead>
-            <Table.Tbody>
-              {runs.map((r) => (
-                <Table.Tr key={r.id}>
-                  <Table.Td>{r.id}</Table.Td>
-                  <Table.Td><code>{r.gitCommit || '-'}</code></Table.Td>
-                  <Table.Td>{r.metrics ? (r.metrics.accuracy*100).toFixed(1)+'%' : '-'}</Table.Td>
-                  <Table.Td>{r.metrics ? (r.metrics.f1Score*100).toFixed(1)+'%' : '-'}</Table.Td>
-                  <Table.Td>{r.createdAt ? new Date(r.createdAt).toLocaleString() : '-'}</Table.Td>
-                  <Table.Td>
-                    {r.isProduction ? <Badge color="green">Sim</Badge> : <Badge variant="light">Não</Badge>}
-                  </Table.Td>
-                </Table.Tr>
-              ))}
-            </Table.Tbody>
-          </Table>
-        )}
-      </Card>
-    </>
+    <Card withBorder>
+      <Center style={{ flexDirection: 'column', textAlign: 'center', padding: '2rem' }}>
+        <Title order={3}>MLOps Dashboard</Title>
+        <Text c="dimmed" mt="md" maw={600}>
+          O rastreamento de experimentos foi atualizado para MLflow, a ferramenta padrão da indústria. A UI abaixo foi substituída por um dashboard profissional e interativo hospedado no nosso próprio servidor MLflow.
+        </Text>
+        <Button
+          component="a"
+          href={mlflowUiUrl}
+          target="_blank"
+          mt="xl"
+          size="md"
+        >
+          Abrir Dashboard MLflow
+        </Button>
+      </Center>
+    </Card>
   );
 }
diff --git a/inference/src/index.js b/inference/src/index.js
@@ -1,57 +1,28 @@
 import { Elysia, t } from 'elysia';
 import { cors } from '@elysiajs/cors';
-import { Database } from 'bun:sqlite';
-import { drizzle } from 'drizzle-orm/bun-sqlite';
-import * as schema from './db/schema.js';
-import { eq } from 'drizzle-orm';
 import natural from 'natural';
 
 const { BayesClassifier } = natural;
-
-// Resolve DB path relative to this file (independent of CWD)
-const dbPath = new URL('../main.db', import.meta.url).pathname;
-const sqlite = new Database(dbPath);
-const db = drizzle(sqlite, { schema });
-
 let classifier = null;
 
-async function loadProductionModel() {
-    const prodRun = await db.query.runs.findFirst({ where: eq(schema.runs.isProduction, true) });
-    if (prodRun && prodRun.modelArtifactPath) {
-        // Resolve artifact path. If it's relative (e.g., 'artifacts/...'), resolve from the repo root.
-        const artifactPath = prodRun.modelArtifactPath.startsWith('/')
-          ? prodRun.modelArtifactPath
-          : new URL(`../../${prodRun.modelArtifactPath}`, import.meta.url).pathname;
-        console.log(`Loading model: ${artifactPath}`);
-        const modelJson = await Bun.file(artifactPath).text();
-        classifier = BayesClassifier.restore(JSON.parse(modelJson));
-    } else {
-        console.log("No production model found.");
-    }
-}
-
-function parseMetrics(m) {
+// NOTE: In production you'd pull the model from MLflow Model Registry.
+// For this template, try to load a local demo artifact if present.
+async function loadDemoModel() {
   try {
-    if (!m) return null;
-    return typeof m === 'string' ? JSON.parse(m) : m;
-  } catch {
-    return null;
+    const artifactPath = new URL(`../../artifacts/model_latest.json`, import.meta.url).pathname;
+    console.log(`Loading demo model: ${artifactPath}`);
+    const modelJson = await Bun.file(artifactPath).text();
+    classifier = BayesClassifier.restore(JSON.parse(modelJson));
+  } catch (e) {
+    console.error('Could not load a demo model. Please run training to produce an artifact.', e.message);
   }
 }
 
+
 const app = new Elysia()
   .use(cors())
-  .get('/dashboard', async () => {
-    const rows = await db.query.runs.findMany({
-      orderBy: (runs, { desc }) => [desc(runs.createdAt)],
-    });
-    return rows.map((r) => ({ ...r, metrics: parseMetrics(r.metrics) }));
-  })
   .post('/predict', async ({ body }) => {
-    if (!classifier) {
-        await loadProductionModel();
-        if(!classifier) return { error: 'Model is not loaded' };
-    }
+    if (!classifier) return { error: 'Model is not loaded on the server. The MLOps pipeline is the focus.' };
     const prediction = classifier.getClassifications(body.message);
     return { prediction };
   }, {
@@ -61,4 +32,4 @@ const app = new Elysia()
   .listen(3001);
 
 console.log(`API running at http://${app.server?.hostname}:${app.server?.port}`);
-loadProductionModel();
+loadDemoModel();
diff --git a/training/src/log_mlflow.py b/training/src/log_mlflow.py
@@ -0,0 +1,17 @@
+import mlflow
+import sys
+import json
+
+# Argumentos: 1=metricas_json, 2=caminho_artefatos_dir
+metrics_json = sys.argv[1]
+artifacts_dir = sys.argv[2]
+
+metrics = json.loads(metrics_json)
+
+# Inicia uma nova execução no MLflow
+with mlflow.start_run():
+    print("MLflow: Logging metrics...")
+    mlflow.log_metrics(metrics)
+    print("MLflow: Logging model artifacts...")
+    mlflow.log_artifacts(artifacts_dir, artifact_path="model")
+    print("MLflow: Logged successfully.")
diff --git a/training/src/train.js b/training/src/train.js