Update binary_logistic_regression_manual.ipynb

fs446 · fs446 · commit 779ce04b7285 · 2025-10-20T13:01:02.000+02:00
ROC curve and other curves based on shifted threshold
diff --git a/binary_logistic_regression_manual.ipynb b/binary_logistic_regression_manual.ipynb
@@ -54,6 +54,7 @@
     "\n",
     "from sklearn.metrics import confusion_matrix, precision_recall_fscore_support\n",
     "from sklearn.metrics import balanced_accuracy_score, accuracy_score\n",
+    "# from sklearn.metrics import RocCurveDisplay\n",
     "\n",
     "from util_binary_logistic_regression import toy_data, init_weights\n",
     "from util_binary_logistic_regression import my_sigmoid, predict_class\n",
@@ -637,6 +638,112 @@
     "print('accuray', accuracy_test)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5c265037",
+   "metadata": {},
+   "source": [
+    "### Curves Based on True/False Positive/Negative Rates\n",
+    "\n",
+    "- model robustness is evaluated by checking different prediction thresholds\n",
+    "- for each prediction threshold four entries in the confusion matrix can be calculated on a unseen test data set\n",
+    "- we can set up (at least) four characteristic curves\n",
+    "- a model that is fair w.r.t. binary classes should have type-I $\\approx$ type-II error, which is the same as requiring TPR $\\approx$ TNR\n",
+    "- this information is conveniently and directly deduced by inspecting not only one but at least two of those curves (for example the first column or the second column of the plot below)\n",
+    "- very often the **receiver operating characteristic** (ROC) curve is discussed (left, top subplot)\n",
+    "- the ROC tells us about type-I error vs. test power"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "caedb2f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# RocCurveDisplay.from_predictions(\n",
+    "#     Y_test_man[0, :],\n",
+    "#     my_sigmoid(np.dot(w.T, X_test_man) + b)[0, :])\n",
+    "\n",
+    "plt.figure(figsize=(8, 8))\n",
+    "N = 1000\n",
+    "predict_threshold = np.linspace(0, 1, N, endpoint=False)\n",
+    "TPR = np.zeros_like(predict_threshold)\n",
+    "FPR = np.zeros_like(predict_threshold)\n",
+    "TNR = np.zeros_like(predict_threshold)\n",
+    "FNR = np.zeros_like(predict_threshold)\n",
+    "for idx, val in enumerate(predict_threshold):\n",
+    "    Y_pred = (my_sigmoid(np.dot(w.T, X_test_man) + b) >= val) * 1\n",
+    "    cm = confusion_matrix(Y_test_man[0, :], Y_pred[0, :])\n",
+    "    TN, FP = cm[0, 0], cm[0, 1]\n",
+    "    FN, TP = cm[1, 0], cm[1, 1]\n",
+    "    FPR[idx] = FP / (TN+FP)  # type I error\n",
+    "    TPR[idx] = TP / (FN+TP)  # recall, sensitivity, test power\n",
+    "    FNR[idx] = FN / (FN+TP)  # type II error\n",
+    "    TNR[idx] = TN / (TN+FP)  # specificity, selectivity\n",
+    "    if idx == N//2:  # indicate 0.5 probability decision point\n",
+    "        plt.subplot(2, 2, 1)\n",
+    "        plt.text(FPR[idx], TPR[idx], '. %0.2f' % val)\n",
+    "        plt.subplot(2, 2, 2)\n",
+    "        plt.text(FPR[idx], FNR[idx], '. %0.2f' % val)\n",
+    "        plt.subplot(2, 2, 3)\n",
+    "        plt.text(FNR[idx], TNR[idx], '. %0.2f' % val)\n",
+    "        plt.subplot(2, 2, 4)\n",
+    "        plt.text(TPR[idx], TNR[idx], '. %0.2f' % val)\n",
+    "\n",
+    "# receiver operating characteristic (ROC) curve:\n",
+    "plt.subplot(2, 2, 1)\n",
+    "plt.plot(FPR, TPR, lw=2)  # TN ok, FP ok, FN ok, TP ok\n",
+    "plt.plot(0.05, 0.95, 'C3x'),\n",
+    "plt.plot([0, 1], [0, 1])\n",
+    "plt.text(0.05, 0.7, 'ROC curve')\n",
+    "plt.xlabel('FPR = type I error')\n",
+    "plt.ylabel('TPR = recall = sensitivity = power')\n",
+    "plt.grid(True)\n",
+    "plt.axis([0, 1, 0, 1])\n",
+    "\n",
+    "plt.subplot(2, 2, 2)\n",
+    "plt.plot(FPR, FNR, lw=2)  # TN ok, FP ok, FN ok, TP ok\n",
+    "plt.plot(0.05, 0.05, 'C3x')\n",
+    "plt.plot([0, 1], [1, 0])\n",
+    "plt.xlabel('FPR = type I error')\n",
+    "plt.ylabel('FNR = type II error')\n",
+    "plt.grid(True)\n",
+    "plt.axis([0, 1, 0, 1])\n",
+    "\n",
+    "plt.subplot(2, 2, 3)\n",
+    "plt.plot(FNR, TNR, lw=2)  # TN ok, FP ok, FN ok, TP ok\n",
+    "plt.plot(0.05, 0.95, 'C3x')\n",
+    "plt.plot([0, 1], [0, 1])\n",
+    "plt.xlabel('FNR = type II error')\n",
+    "plt.ylabel('TNR = specificity = selectivity')\n",
+    "plt.grid(True)\n",
+    "plt.axis([0, 1, 0, 1])\n",
+    "\n",
+    "plt.subplot(2, 2, 4)\n",
+    "plt.plot(TPR, TNR, lw=2)  # TN ok, FP ok, FN ok, TP ok\n",
+    "plt.plot(0.95, 0.95, 'C3x')\n",
+    "plt.plot([0, 1], [1, 0])\n",
+    "plt.xlabel('TPR = recall = sensitivity = power')\n",
+    "plt.ylabel('TNR = specificity = selectivity')\n",
+    "plt.grid(True)\n",
+    "plt.axis([0, 1, 0, 1])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c386ec3c",
+   "metadata": {},
+   "source": [
+    "We see that the model is highly balanced w.r.t. the {0,1}-class predictions.\n",
+    "\n",
+    "The model performs with a type-I error less than 5% and with a type-II error less than 5%.\n",
+    "\n",
+    "The model performs with a true positive rate (TPR) larger than 95% and with a true negative rate (TNR) larger than 95%.\n",
+    "\n",
+    "In medical applications (cf. COVID-19 testing) type-I and type-II percentages are typically even smaller; and thus TPR and TNR percentages are typically even larger."
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "651b1eff",