Update ensemble_predictions_fixed.ipynb

adamyhe · adamyhe · commit 14dd2ff80b8b · 2024-03-19T14:05:13.000-04:00
diff --git a/clipnet_evaluation/ensemble_predictions_fixed.ipynb b/clipnet_evaluation/ensemble_predictions_fixed.ipynb
@@ -25,7 +25,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,7 +34,7 @@
     "import pandas as pd\n",
     "import os\n",
     "import seaborn as sns\n",
-    "from scipy.stats import pearsonr\n",
+    "from scipy.stats import pearsonr, spearmanr\n",
     "\n",
     "from matplotlib.colors import LinearSegmentedColormap\n",
     "import mpl_scatter_density # needed for density scatter plots\n",
@@ -94,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -239,6 +239,76 @@
     "plt.savefig(\"img/ensemble_predictions_pearson_cdf.pdf\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "log_sums.index = log_sums.index % int(log_sums.shape[0] / 67)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "PearsonRResult(statistic=0.6686770636128865, pvalue=0.0)"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pearsonr(log_sums.groupby(level=0).mean().pred, log_sums.groupby(level=0).mean().expt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "individual_pearsons = pd.Series(\n",
+    "    [\n",
+    "        pearsonr(log_sums.iloc[4901 * i:4901 * (i + 1), :][\"pred\"], log_sums.iloc[4901 * i:4901 * (i + 1), :][\"expt\"])[0]\n",
+    "        for i in range(67)\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "individual_spearmans = pd.Series(\n",
+    "    [\n",
+    "        spearmanr(log_sums.iloc[4901 * i:4901 * (i + 1), :][\"pred\"], log_sums.iloc[4901 * i:4901 * (i + 1), :][\"expt\"])[0]\n",
+    "        for i in range(67)\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6509504629274192"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "individual_pearsons.median()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 76,
@@ -295,6 +365,13 @@
     "pearsonr(log_sums[\"pred\"], log_sums[\"expt\"])"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": 78,