Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -527,12 +527,7 @@
"with sset.PATH_GWDB_RAW.open(\"wb\") as f:\n",
" f.write(\n",
" requests.get(\n",
" \"https://www.ubs.com/global/en/family-office-uhnw/reports/\"\n",
" \"global-wealth-report-2023/_jcr_content/mainpar/toplevelgrid_5684475/col2/\"\n",
" \"linklistnewlook/link_copy.0357374027.file/\"\n",
" \"PS9jb250ZW50L2RhbS9hc3NldHMvd20vZ2xvYmFsL2ltZy9nbG9iYWwtZmFtaWx5LW9mZmljZS\"\n",
" \"9kb2NzL2RhdGFib29rLWdsb2JhbC13ZWFsdGgtcmVwb3J0LTIwMjMtZW4ucGRm/\"\n",
" \"databook-global-wealth-report-2023-en.pdf\"\n",
" \"https://urldefense.us/v3/__https:/www.ubs.com/global/en/family-office-uhnw/reports/global-wealth-report-2023/_jcr_content/mainpar/toplevelgrid_5684475_1708633751/col1/innergrid/xcol1/actionbutton_copy_co.1784379955.file/PS9jb250ZW50L2RhbS9hc3NldHMvd20vZ2xvYmFsL2ltZy9nbG9iYWwtZmFtaWx5LW9mZmljZS9kb2NzL2d3ci0yMDIzLWVuLTIucGRm/gwr-2023-en-2.pdf__;!!PvBDto6Hs4WbVuu7!LkcjRnpeJpav7n7pccnu9GARkLWeYSdBrUR7_2wbXi9tKQiH0E3ypwI9h8cm0uHmnY0eFJ3da07dQQScVsUHry39Hg$\"\n",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just checked this link and it looks like it's not correct - sorry for sending you the wrong one. If you use this link, you'd see that it goes to the "report" rather than the "databook". Here's a link to the databook:

https://rev01ution.red/wp-content/uploads/2024/03/global-wealth-databook-2023-ubs.pdf

Also, the "urldefense" part of the URL is just something that comes through on email clients so can be removed. But let's use the link above, otherwise it will be downloading the wrong thing (which is why you didn't see Table 2-1 or Table 2-2 when you looked at it)

" ).content\n",
" )"
]
Expand Down Expand Up @@ -1740,7 +1735,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.12.2"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
Expand Down
146 changes: 64 additions & 82 deletions notebooks/data-cleaning/01-clean-country-socioecon-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -97,20 +97,78 @@
},
{
"cell_type": "markdown",
"id": "dbff1f30-3dd4-4666-9df2-5358f456f5eb",
"id": "3207b1e9-62fd-43b1-8265-a923d4ab8eb7",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the same changes made in #16 (moving the GADM cell) are repeated in this PR, and I don't see the removal of the mangroves section.

"metadata": {},
"source": [
"## Get country codes from GADM"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "994246ad-0616-43c7-a4b8-675c2a2e1d65",
"metadata": {},
"outputs": [],
"source": [
"## Mangroves"
"ccode_mapping = (\n",
" read_shapefile(sset.PATH_GADM, layer=0)\n",
" .set_index(\"COUNTRY\")\n",
" .GID_0.rename(\"ccode\")\n",
" .rename_axis(\"name\")\n",
")\n",
"# drop numerical china/india/pakistan GID_0's\n",
"ccode_mapping = ccode_mapping[ccode_mapping.str[1] != \"0\"]\n",
"\n",
"# add on manaully added segments (which account for uninhabited areas not in GADM)\n",
"ccode_mapping = pd.concat(\n",
" (ccode_mapping, pd.read_parquet(sset.PATH_SEG_PTS_MANUAL).ccode)\n",
")\n",
"\n",
"# add some manual mappers\n",
"# Netherlands Antilles in CIA WFB corresponds to these three (not ABW)\n",
"manual = sset.CCODE_MANUAL.copy()\n",
"manual[\"Netherlands Antilles\"] = \"BES+CUW+SXM\"\n",
"ccode_mapping = pd.concat([ccode_mapping, manual]).sort_index()\n",
"\n",
"# Handle no-accent names\n",
"alt_index = (\n",
" ccode_mapping.index.str.normalize(\"NFKD\")\n",
" .str.encode(\"ascii\", errors=\"ignore\")\n",
" .astype(str)\n",
")\n",
"alt = pd.Series(ccode_mapping.values, index=alt_index, name=\"ccode\")\n",
"ccode_mapping = (\n",
" pd.concat((ccode_mapping, alt))\n",
" .reset_index()\n",
" .drop_duplicates()\n",
" .set_index(\"name\")\n",
" .ccode.sort_index()\n",
")\n",
"\n",
"# getting list of valid ccodes including some previously uncaptured mixtures (i.e.\n",
"# France + overseas depts)\n",
"valid_ccodes = np.setdiff1d(\n",
" np.unique(\n",
" np.concatenate(\n",
" (\n",
" ccode_mapping.unique(),\n",
" [k for v in sset.PPP_CCODE_IF_MSNG.values() for k in v],\n",
" )\n",
" )\n",
" ),\n",
" sset.EXCLUDED_ISOS,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1bc40d4b-ab6f-40de-b37f-cef0b2cffe6c",
"execution_count": 12,
"id": "f0525373-d81a-4710-b729-de857effa1ef",
"metadata": {},
"outputs": [],
"source": [
"this = gpd.GeoDataFrame(geometry=[read_shapefile(sset.PATH_GLOBAL_MANGROVES_RAW).make_valid().unary_union], crs=4326).to_parquet(sset.PATH_GLOBAL_MANGROVES_INT)"
"save(pd.DataFrame({\"ccode\": valid_ccodes}), sset.PATH_ALL_VALID_HIST_CCODES)\n",
"save(ccode_mapping.to_frame(), sset.PATH_HIST_CCODE_MAPPING)"
]
},
{
Expand Down Expand Up @@ -242,82 +300,6 @@
"save_geoparquet(adm1, sset.PATH_GADM_ADM1_INT)"
]
},
{
"cell_type": "markdown",
"id": "3207b1e9-62fd-43b1-8265-a923d4ab8eb7",
"metadata": {},
"source": [
"## Get country codes from GADM"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "994246ad-0616-43c7-a4b8-675c2a2e1d65",
"metadata": {},
"outputs": [],
"source": [
"ccode_mapping = (\n",
" read_shapefile(sset.PATH_GADM, layer=0)\n",
" .set_index(\"COUNTRY\")\n",
" .GID_0.rename(\"ccode\")\n",
" .rename_axis(\"name\")\n",
")\n",
"# drop numerical china/india/pakistan GID_0's\n",
"ccode_mapping = ccode_mapping[ccode_mapping.str[1] != \"0\"]\n",
"\n",
"# add on manaully added segments (which account for uninhabited areas not in GADM)\n",
"ccode_mapping = pd.concat(\n",
" (ccode_mapping, pd.read_parquet(sset.PATH_SEG_PTS_MANUAL).ccode)\n",
")\n",
"\n",
"# add some manual mappers\n",
"# Netherlands Antilles in CIA WFB corresponds to these three (not ABW)\n",
"manual = sset.CCODE_MANUAL.copy()\n",
"manual[\"Netherlands Antilles\"] = \"BES+CUW+SXM\"\n",
"ccode_mapping = pd.concat([ccode_mapping, manual]).sort_index()\n",
"\n",
"# Handle no-accent names\n",
"alt_index = (\n",
" ccode_mapping.index.str.normalize(\"NFKD\")\n",
" .str.encode(\"ascii\", errors=\"ignore\")\n",
" .astype(str)\n",
")\n",
"alt = pd.Series(ccode_mapping.values, index=alt_index, name=\"ccode\")\n",
"ccode_mapping = (\n",
" pd.concat((ccode_mapping, alt))\n",
" .reset_index()\n",
" .drop_duplicates()\n",
" .set_index(\"name\")\n",
" .ccode.sort_index()\n",
")\n",
"\n",
"# getting list of valid ccodes including some previously uncaptured mixtures (i.e.\n",
"# France + overseas depts)\n",
"valid_ccodes = np.setdiff1d(\n",
" np.unique(\n",
" np.concatenate(\n",
" (\n",
" ccode_mapping.unique(),\n",
" [k for v in sset.PPP_CCODE_IF_MSNG.values() for k in v],\n",
" )\n",
" )\n",
" ),\n",
" sset.EXCLUDED_ISOS,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f0525373-d81a-4710-b729-de857effa1ef",
"metadata": {},
"outputs": [],
"source": [
"save(pd.DataFrame({\"ccode\": valid_ccodes}), sset.PATH_ALL_VALID_HIST_CCODES)\n",
"save(ccode_mapping.to_frame(), sset.PATH_HIST_CCODE_MAPPING)"
]
},
{
"cell_type": "markdown",
"id": "647b195b",
Expand Down Expand Up @@ -1471,7 +1453,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.12.2"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
Expand Down