NGO-Algorithm-Audit
diff --git a/‎notebooks/unsupervised bias detection tool/COMPAS_FP.ipynb‎
Lines changed: 28 additions & 28 deletions b/‎notebooks/unsupervised bias detection tool/COMPAS_FP.ipynb‎
Lines changed: 28 additions & 28 deletions
@@ -597,7 +597,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -694,7 +694,7 @@
        "4      0.0  1.0   5.0              0.0       0.0         0.0             0.0"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -710,7 +710,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -726,7 +726,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -737,7 +737,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -750,7 +750,8 @@
    ],
    "source": [
     "# Create a dataframe from filtered_df except the bias metric column\n",
-    "df_no_bias_metric = filtered_df.drop(columns=[\"false_positive\"])\n",
+    "bias_metric = \"false_positive\"\n",
+    "df_no_bias_metric = filtered_df.drop(columns=[bias_metric])\n",
     "if df_no_bias_metric.dtypes.nunique() == 1:\n",
     "    print('consistent data')\n",
     "else:\n",
@@ -766,7 +767,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -786,7 +787,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -844,7 +845,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -858,7 +859,6 @@
    ],
    "source": [
     "# split the data into training and testing sets\n",
-    "bias_metric = \"false_positive\"\n",
     "train_df, test_df = train_test_split(filtered_df, test_size=0.2, random_state=42)\n",
     "X_train = train_df.drop(columns=[bias_metric])\n",
     "\n",
@@ -896,7 +896,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -1331,7 +1331,7 @@
        "BiasAwareHierarchicalKModes(bahc_max_iter=20, bahc_min_cluster_size=57.71)"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1344,7 +1344,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -1353,7 +1353,7 @@
        "5"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1365,7 +1365,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -1374,7 +1374,7 @@
        "array([ 0.05290941, -0.02457072, -0.04827102, -0.04888889, -0.05093596])"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1386,7 +1386,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -1417,7 +1417,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -1426,7 +1426,7 @@
        "array([0, 0, 4, ..., 0, 0, 1], dtype=uint32)"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1439,7 +1439,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -1618,7 +1618,7 @@
        "[1443 rows x 7 columns]"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1642,7 +1642,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -1752,7 +1752,7 @@
        "4          0              0              0  "
       ]
      },
-     "execution_count": 17,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1776,7 +1776,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1787,7 +1787,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -1842,7 +1842,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -1906,7 +1906,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 22,
    "metadata": {
     "vscode": {
      "languageId": "ruby"
@@ -1970,7 +1970,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 23,
    "metadata": {
     "vscode": {
      "languageId": "ruby"
Original file line number	Diff line number	Diff line change
`@@ -597,7 +597,7 @@`
`597`	`597`	`},`
`598`	`598`	`{`
`599`	`599`	`"cell_type": "code",`
`600`		`- "execution_count": 5,`
	`600`	`+ "execution_count": 6,`
`601`	`601`	`"metadata": {},`
`602`	`602`	`"outputs": [`
`603`	`603`	`{`
`@@ -694,7 +694,7 @@`
`694`	`694`	`"4 0.0 1.0 5.0 0.0 0.0 0.0 0.0"`
`695`	`695`	`]`
`696`	`696`	`},`
`697`		`- "execution_count": 5,`
	`697`	`+ "execution_count": 6,`
`698`	`698`	`"metadata": {},`
`699`	`699`	`"output_type": "execute_result"`
`700`	`700`	`}`
`@@ -710,7 +710,7 @@`
`710`	`710`	`},`
`711`	`711`	`{`
`712`	`712`	`"cell_type": "code",`
`713`		`- "execution_count": 6,`
	`713`	`+ "execution_count": 7,`
`714`	`714`	`"metadata": {},`
`715`	`715`	`"outputs": [`
`716`	`716`	`{`
`@@ -726,7 +726,7 @@`
`726`	`726`	`"dtype: object"`
`727`	`727`	`]`
`728`	`728`	`},`
`729`		`- "execution_count": 6,`
	`729`	`+ "execution_count": 7,`
`730`	`730`	`"metadata": {},`
`731`	`731`	`"output_type": "execute_result"`
`732`	`732`	`}`
`@@ -737,7 +737,7 @@`
`737`	`737`	`},`
`738`	`738`	`{`
`739`	`739`	`"cell_type": "code",`
`740`		`- "execution_count": 7,`
	`740`	`+ "execution_count": null,`
`741`	`741`	`"metadata": {},`
`742`	`742`	`"outputs": [`
`743`	`743`	`{`
`@@ -750,7 +750,8 @@`
`750`	`750`	`],`
`751`	`751`	`"source": [`
`752`	`752`	`"# Create a dataframe from filtered_df except the bias metric column\n",`
`753`		`- "df_no_bias_metric = filtered_df.drop(columns=[\"false_positive\"])\n",`
	`753`	`+ "bias_metric = \"false_positive\"\n",`
	`754`	`+ "df_no_bias_metric = filtered_df.drop(columns=[bias_metric])\n",`
`754`	`755`	`"if df_no_bias_metric.dtypes.nunique() == 1:\n",`
`755`	`756`	`" print('consistent data')\n",`
`756`	`757`	`"else:\n",`
`@@ -766,7 +767,7 @@`
`766`	`767`	`},`
`767`	`768`	`{`
`768`	`769`	`"cell_type": "code",`
`769`		`- "execution_count": 8,`
	`770`	`+ "execution_count": 9,`
`770`	`771`	`"metadata": {},`
`771`	`772`	`"outputs": [`
`772`	`773`	`{`
`@@ -786,7 +787,7 @@`
`786`	`787`	`},`
`787`	`788`	`{`
`788`	`789`	`"cell_type": "code",`
`789`		`- "execution_count": 9,`
	`790`	`+ "execution_count": 10,`
`790`	`791`	`"metadata": {},`
`791`	`792`	`"outputs": [`
`792`	`793`	`{`
`@@ -844,7 +845,7 @@`
`844`	`845`	`},`
`845`	`846`	`{`
`846`	`847`	`"cell_type": "code",`
`847`		`- "execution_count": 10,`
	`848`	`+ "execution_count": null,`
`848`	`849`	`"metadata": {},`
`849`	`850`	`"outputs": [`
`850`	`851`	`{`
`@@ -858,7 +859,6 @@`
`858`	`859`	`],`
`859`	`860`	`"source": [`
`860`	`861`	`"# split the data into training and testing sets\n",`
`861`		`- "bias_metric = \"false_positive\"\n",`
`862`	`862`	`"train_df, test_df = train_test_split(filtered_df, test_size=0.2, random_state=42)\n",`
`863`	`863`	`"X_train = train_df.drop(columns=[bias_metric])\n",`
`864`	`864`	`"\n",`
`@@ -896,7 +896,7 @@`
`896`	`896`	`},`
`897`	`897`	`{`
`898`	`898`	`"cell_type": "code",`
`899`		`- "execution_count": 11,`
	`899`	`+ "execution_count": 12,`
`900`	`900`	`"metadata": {},`
`901`	`901`	`"outputs": [`
`902`	`902`	`{`
`@@ -1331,7 +1331,7 @@`
`1331`	`1331`	`"BiasAwareHierarchicalKModes(bahc_max_iter=20, bahc_min_cluster_size=57.71)"`
`1332`	`1332`	`]`
`1333`	`1333`	`},`
`1334`		`- "execution_count": 11,`
	`1334`	`+ "execution_count": 12,`
`1335`	`1335`	`"metadata": {},`
`1336`	`1336`	`"output_type": "execute_result"`
`1337`	`1337`	`}`
`@@ -1344,7 +1344,7 @@`
`1344`	`1344`	`},`
`1345`	`1345`	`{`
`1346`	`1346`	`"cell_type": "code",`
`1347`		`- "execution_count": 12,`
	`1347`	`+ "execution_count": 13,`
`1348`	`1348`	`"metadata": {},`
`1349`	`1349`	`"outputs": [`
`1350`	`1350`	`{`
`@@ -1353,7 +1353,7 @@`
`1353`	`1353`	`"5"`
`1354`	`1354`	`]`
`1355`	`1355`	`},`
`1356`		`- "execution_count": 12,`
	`1356`	`+ "execution_count": 13,`
`1357`	`1357`	`"metadata": {},`
`1358`	`1358`	`"output_type": "execute_result"`
`1359`	`1359`	`}`
`@@ -1365,7 +1365,7 @@`
`1365`	`1365`	`},`
`1366`	`1366`	`{`
`1367`	`1367`	`"cell_type": "code",`
`1368`		`- "execution_count": 13,`
	`1368`	`+ "execution_count": 14,`
`1369`	`1369`	`"metadata": {},`
`1370`	`1370`	`"outputs": [`
`1371`	`1371`	`{`
`@@ -1374,7 +1374,7 @@`
`1374`	`1374`	`"array([ 0.05290941, -0.02457072, -0.04827102, -0.04888889, -0.05093596])"`
`1375`	`1375`	`]`
`1376`	`1376`	`},`
`1377`		`- "execution_count": 13,`
	`1377`	`+ "execution_count": 14,`
`1378`	`1378`	`"metadata": {},`
`1379`	`1379`	`"output_type": "execute_result"`
`1380`	`1380`	`}`
`@@ -1386,7 +1386,7 @@`
`1386`	`1386`	`},`
`1387`	`1387`	`{`
`1388`	`1388`	`"cell_type": "code",`
`1389`		`- "execution_count": 14,`
	`1389`	`+ "execution_count": 15,`
`1390`	`1390`	`"metadata": {},`
`1391`	`1391`	`"outputs": [`
`1392`	`1392`	`{`
`@@ -1417,7 +1417,7 @@`
`1417`	`1417`	`},`
`1418`	`1418`	`{`
`1419`	`1419`	`"cell_type": "code",`
`1420`		`- "execution_count": 15,`
	`1420`	`+ "execution_count": 16,`
`1421`	`1421`	`"metadata": {},`
`1422`	`1422`	`"outputs": [`
`1423`	`1423`	`{`
`@@ -1426,7 +1426,7 @@`
`1426`	`1426`	`"array([0, 0, 4, ..., 0, 0, 1], dtype=uint32)"`
`1427`	`1427`	`]`
`1428`	`1428`	`},`
`1429`		`- "execution_count": 15,`
	`1429`	`+ "execution_count": 16,`
`1430`	`1430`	`"metadata": {},`
`1431`	`1431`	`"output_type": "execute_result"`
`1432`	`1432`	`}`
`@@ -1439,7 +1439,7 @@`
`1439`	`1439`	`},`
`1440`	`1440`	`{`
`1441`	`1441`	`"cell_type": "code",`
`1442`		`- "execution_count": 16,`
	`1442`	`+ "execution_count": 17,`
`1443`	`1443`	`"metadata": {},`
`1444`	`1444`	`"outputs": [`
`1445`	`1445`	`{`
`@@ -1618,7 +1618,7 @@`
`1618`	`1618`	`"[1443 rows x 7 columns]"`
`1619`	`1619`	`]`
`1620`	`1620`	`},`
`1621`		`- "execution_count": 16,`
	`1621`	`+ "execution_count": 17,`
`1622`	`1622`	`"metadata": {},`
`1623`	`1623`	`"output_type": "execute_result"`
`1624`	`1624`	`}`
`@@ -1642,7 +1642,7 @@`
`1642`	`1642`	`},`
`1643`	`1643`	`{`
`1644`	`1644`	`"cell_type": "code",`
`1645`		`- "execution_count": 17,`
	`1645`	`+ "execution_count": 18,`
`1646`	`1646`	`"metadata": {},`
`1647`	`1647`	`"outputs": [`
`1648`	`1648`	`{`
`@@ -1752,7 +1752,7 @@`
`1752`	`1752`	`"4 0 0 0 "`
`1753`	`1753`	`]`
`1754`	`1754`	`},`
`1755`		`- "execution_count": 17,`
	`1755`	`+ "execution_count": 18,`
`1756`	`1756`	`"metadata": {},`
`1757`	`1757`	`"output_type": "execute_result"`
`1758`	`1758`	`}`
`@@ -1776,7 +1776,7 @@`
`1776`	`1776`	`},`
`1777`	`1777`	`{`
`1778`	`1778`	`"cell_type": "code",`
`1779`		`- "execution_count": 18,`
	`1779`	`+ "execution_count": 19,`
`1780`	`1780`	`"metadata": {},`
`1781`	`1781`	`"outputs": [],`
`1782`	`1782`	`"source": [`
`@@ -1787,7 +1787,7 @@`
`1787`	`1787`	`},`
`1788`	`1788`	`{`
`1789`	`1789`	`"cell_type": "code",`
`1790`		`- "execution_count": 19,`
	`1790`	`+ "execution_count": 20,`
`1791`	`1791`	`"metadata": {},`
`1792`	`1792`	`"outputs": [`
`1793`	`1793`	`{`
`@@ -1842,7 +1842,7 @@`
`1842`	`1842`	`},`
`1843`	`1843`	`{`
`1844`	`1844`	`"cell_type": "code",`
`1845`		`- "execution_count": 20,`
	`1845`	`+ "execution_count": 21,`
`1846`	`1846`	`"metadata": {},`
`1847`	`1847`	`"outputs": [`
`1848`	`1848`	`{`
`@@ -1906,7 +1906,7 @@`
`1906`	`1906`	`},`
`1907`	`1907`	`{`
`1908`	`1908`	`"cell_type": "code",`
`1909`		`- "execution_count": 33,`
	`1909`	`+ "execution_count": 22,`
`1910`	`1910`	`"metadata": {`
`1911`	`1911`	`"vscode": {`
`1912`	`1912`	`"languageId": "ruby"`
`@@ -1970,7 +1970,7 @@`
`1970`	`1970`	`},`
`1971`	`1971`	`{`
`1972`	`1972`	`"cell_type": "code",`
`1973`		`- "execution_count": 32,`
	`1973`	`+ "execution_count": 23,`
`1974`	`1974`	`"metadata": {`
`1975`	`1975`	`"vscode": {`
`1976`	`1976`	`"languageId": "ruby"`