diff --git a/source/index.html.md b/source/index.html.md
index fbbe078..ece7421 100644
--- a/source/index.html.md
+++ b/source/index.html.md
@@ -41,10 +41,206 @@ The [Developers category](https://community.monzo.com/c/uk/developers/43) on our
The Monzo Developer API is not suitable for building public applications.
You may only connect to your own account or those of a small set of users you explicitly allow.
-
+
+# Machine Learning
+
+Monzo's transaction API is well-suited for machine learning applications.
+This section shows patterns for building ML pipelines on top of transaction data.
+
+## Fetching training data
+
+```python
+import requests
+import pandas as pd
+
+def fetch_transactions_for_ml(access_token, account_id, days=90):
+ """
+ Fetch and normalise transactions for ML pipelines.
+
+ Returns a DataFrame matching Monzo's analytical feature contract:
+ created (UTC), amount_gbp, category, merchant_name, is_debit
+ """
+ from datetime import datetime, timedelta, timezone
+
+ since = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
+
+ resp = requests.get(
+ "https://api.monzo.com/transactions",
+ headers={"Authorization": f"Bearer {access_token}"},
+ params={
+ "account_id": account_id,
+ "since": since,
+ "limit": 100,
+ "expand[]": "merchant",
+ },
+ )
+ resp.raise_for_status()
+ txns = resp.json()["transactions"]
+
+ df = pd.DataFrame([{
+ "transaction_id": t["id"],
+ "created": pd.to_datetime(t["created"], utc=True),
+ "amount_gbp": t["amount"] / 100,
+ "category": t.get("category", "unknown"),
+ "merchant_name": (t.get("merchant") or {}).get("name", ""),
+ "is_debit": t["amount"] < 0,
+ "settled": t.get("settled") is not None,
+ } for t in txns])
+
+ return df.sort_values("created").reset_index(drop=True)
+```
+
+## Feature engineering
+
+```python
+def engineer_features(df):
+ """
+ Create ML features from raw transaction data.
+
+ Produces both operational features (computed from transaction fields)
+ and analytical features (rolling aggregations over time windows).
+ """
+ feats = pd.DataFrame(index=df.index)
+
+ # Operational features — available at inference time
+ ts = df["created"]
+ feats["hour_of_day"] = ts.dt.hour
+ feats["day_of_week"] = ts.dt.dayofweek
+ feats["is_weekend"] = ts.dt.dayofweek.isin([5, 6]).astype(int)
+ feats["is_payday_week"] = ts.dt.day.between(25, 31).astype(int)
+ feats["abs_amount_gbp"] = df["amount_gbp"].abs()
+ feats["is_round_amount"] = (
+ (df["amount_gbp"].abs() * 100) % 100 == 0
+ ).astype(int)
+
+ # Analytical features — rolling aggregations
+ df_s = df.sort_values("created").copy()
+ df_s["abs_amount"] = df_s["amount_gbp"].abs()
+
+ for window in [7, 14, 30]:
+ rolled = (
+ df_s.set_index("created")["abs_amount"]
+ .rolling(f"{window}D", min_periods=1)
+ .agg(["sum", "mean", "std"])
+ )
+ feats[f"rolling_{window}d_sum"] = rolled["sum"].values
+ feats[f"rolling_{window}d_mean"] = rolled["mean"].values
+ feats[f"rolling_{window}d_std"] = rolled["std"].fillna(0).values
+
+ return feats.fillna(0)
+```
+
+## Anomaly detection
+
+```python
+from sklearn.ensemble import IsolationForest
+import numpy as np
+
+def detect_anomalies(df, contamination=0.05):
+ """
+ Identify unusual transactions using Isolation Forest.
+
+ contamination: expected fraction of anomalies (default 5%)
+ """
+ X = engineer_features(df)
+
+ model = IsolationForest(
+ contamination=contamination,
+ random_state=42,
+ n_estimators=200,
+ )
+ labels = model.fit_predict(X) # -1 = anomaly
+ scores = -model.score_samples(X) # higher = more anomalous
+
+ # Z-score on amount as secondary signal
+ amounts = df["amount_gbp"].abs()
+ z_scores = (amounts - amounts.mean()) / (amounts.std() + 1e-9)
+
+ result = df.copy()
+ result["is_anomaly"] = (labels == -1) | (z_scores > 3.0)
+ result["anomaly_score"] = scores.round(4)
+ result["z_score"] = z_scores.round(2)
+
+ return result[result["is_anomaly"]].sort_values(
+ "anomaly_score", ascending=False
+ )
+
+anomalies = detect_anomalies(df)
+print(f"Found {len(anomalies)} unusual transactions")
+```
+
+## Spending prediction
+
+```python
+import xgboost as xgb
+from sklearn.model_selection import TimeSeriesSplit
+import numpy as np
+
+def predict_next_month_spend(df):
+ """
+ Forecast next month's spend per category using XGBoost.
+ Uses TimeSeriesSplit to prevent future data leaking into training.
+ """
+ # Monthly aggregation
+ df_work = df[df["is_debit"]].copy()
+ df_work["month"] = df_work["created"].dt.month
+ df_work["quarter"] = df_work["created"].dt.quarter
+ df_work["period"] = df_work["created"].dt.to_period("M")
+
+ predictions = {}
+ for cat in df_work["category"].unique():
+ cat_df = (
+ df_work[df_work["category"] == cat]
+ .groupby("period")
+ .agg(spend=("amount_gbp", lambda x: x.abs().sum()),
+ month=("month", "first"),
+ quarter=("quarter", "first"))
+ .reset_index()
+ .sort_values("period")
+ )
+ if len(cat_df) < 3:
+ continue
+
+ X = cat_df[["month", "quarter"]].values
+ y = cat_df["spend"].values
+
+ model = xgb.XGBRegressor(
+ n_estimators=200, max_depth=4,
+ learning_rate=0.05, random_state=42, verbosity=0
+ )
+ model.fit(X, y)
+
+ import calendar
+ from datetime import date
+ today = date.today()
+ next_month = (today.month % 12) + 1
+ next_quarter = ((next_month - 1) // 3) + 1
+
+ pred = float(model.predict([[next_month, next_quarter]])[0])
+ predictions[cat] = round(max(pred, 0), 2)
+
+ return pd.DataFrame(
+ list(predictions.items()),
+ columns=["category", "predicted_spend_gbp"]
+ ).sort_values("predicted_spend_gbp", ascending=False)
+
+forecast = predict_next_month_spend(df)
+print(forecast)
+```
+
+## Using monzo-txn-ml
+
+For a complete ML toolkit built on these patterns, see
+[monzo-txn-ml](https://github.com/atharvsatpute/monzo-txn-ml) — an open-source
+Python package (MIT licence) that packages the above into production-ready
+modules with tests and CI.
+
+```bash
+pip install monzo-txn-ml
+```