Skip to content

Commit 19cea34

Browse files
authored
Merge pull request #108 from NGO-Algorithm-Audit/feature/sdg-chart-dropdowns-16-jun
Feature/sdg chart dropdowns 16 jun
2 parents 17c134f + 8df2dd4 commit 19cea34

17 files changed

+1263
-753
lines changed

notebooks/synthetic data generation tool/GC_drop_LawSchoolAdmissionBar.ipynb

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -553,24 +553,9 @@
553553
},
554554
{
555555
"cell_type": "code",
556-
"execution_count": 14,
556+
"execution_count": 11,
557557
"metadata": {},
558-
"outputs": [
559-
{
560-
"ename": "ValueError",
561-
"evalue": "All objects passed were None",
562-
"output_type": "error",
563-
"traceback": [
564-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
565-
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
566-
"Cell \u001b[0;32mIn[14], line 22\u001b[0m\n\u001b[1;32m 20\u001b[0m encoders[col] \u001b[38;5;241m=\u001b[39m encoder\n\u001b[1;32m 21\u001b[0m data\u001b[38;5;241m.\u001b[39mdrop(columns\u001b[38;5;241m=\u001b[39m[col], inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 22\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreset_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtransformed_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreset_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdrop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m dtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnumerical\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 25\u001b[0m scaler \u001b[38;5;241m=\u001b[39m StandardScaler(with_mean\u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, with_std\u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m)\n",
567-
"File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/reshape/concat.py:382\u001b[0m, in \u001b[0;36mconcat\u001b[0;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m copy \u001b[38;5;129;01mand\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 380\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m--> 382\u001b[0m op \u001b[38;5;241m=\u001b[39m \u001b[43m_Concatenator\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 383\u001b[0m \u001b[43m \u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 384\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 385\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[43mnames\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnames\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43mverify_integrity\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverify_integrity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 391\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 393\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 395\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m op\u001b[38;5;241m.\u001b[39mget_result()\n",
568-
"File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/reshape/concat.py:445\u001b[0m, in \u001b[0;36m_Concatenator.__init__\u001b[0;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverify_integrity \u001b[38;5;241m=\u001b[39m verify_integrity\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy \u001b[38;5;241m=\u001b[39m copy\n\u001b[0;32m--> 445\u001b[0m objs, keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_clean_keys_and_objs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# figure out what our result ndim is going to be\u001b[39;00m\n\u001b[1;32m 448\u001b[0m ndims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ndims(objs)\n",
569-
"File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/pandas/core/reshape/concat.py:541\u001b[0m, in \u001b[0;36m_Concatenator._clean_keys_and_objs\u001b[0;34m(self, objs, keys)\u001b[0m\n\u001b[1;32m 538\u001b[0m keys \u001b[38;5;241m=\u001b[39m Index(clean_keys, name\u001b[38;5;241m=\u001b[39mname, dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mgetattr\u001b[39m(keys, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 540\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(objs_list) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 541\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll objects passed were None\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 543\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m objs_list, keys\n",
570-
"\u001b[0;31mValueError\u001b[0m: All objects passed were None"
571-
]
572-
}
573-
],
558+
"outputs": [],
574559
"source": [
575560
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler\n",
576561
"\n",
@@ -593,32 +578,32 @@
593578
" transformed_data = _encode_categorical(data[col], encoder)\n",
594579
" encoders[col] = encoder\n",
595580
" data.drop(columns=[col], inplace=True)\n",
596-
" data = pd.concat([data.reset_index(drop=True, inplace=True), transformed_data.reset_index(drop=True, inplace=True)], axis=1)\n",
581+
" data = pd.concat([data.reset_index(drop=True), transformed_data.reset_index(drop=True)], axis=1)\n",
597582
"\n",
598583
" elif dtype == \"numerical\":\n",
599584
" scaler = StandardScaler(with_mean= False, with_std= False)\n",
600585
" data[col] = scaler.fit_transform(data[[col]])\n",
601-
" scalers[col] = scaler\n"
586+
" scalers[col] = scaler"
602587
]
603588
},
604589
{
605590
"cell_type": "code",
606-
"execution_count": 13,
591+
"execution_count": 12,
607592
"metadata": {},
608593
"outputs": [
609594
{
610595
"name": "stdout",
611596
"output_type": "stream",
612597
"text": [
613598
"Missing values in data:\n",
614-
"ugpa 20\n",
615-
"sex 20\n",
616-
"race1 0\n",
617-
"bar 0\n",
599+
"ugpa 0\n",
600+
"sex 0\n",
601+
"race1 0\n",
602+
"bar 0\n",
618603
"dtype: int64\n",
619604
"\n",
620-
"Total missing values: 40\n",
621-
"Data shape: (22407, 4)\n"
605+
"Total missing values: 0\n",
606+
"Data shape: (22387, 4)\n"
622607
]
623608
}
624609
],
@@ -631,7 +616,7 @@
631616
},
632617
{
633618
"cell_type": "code",
634-
"execution_count": null,
619+
"execution_count": 13,
635620
"metadata": {},
636621
"outputs": [
637622
{
@@ -652,7 +637,7 @@
652637
},
653638
{
654639
"cell_type": "code",
655-
"execution_count": null,
640+
"execution_count": 14,
656641
"metadata": {},
657642
"outputs": [
658643
{
@@ -678,7 +663,7 @@
678663
},
679664
{
680665
"cell_type": "code",
681-
"execution_count": null,
666+
"execution_count": 15,
682667
"metadata": {},
683668
"outputs": [
684669
{

0 commit comments

Comments
 (0)