|
19 | 19 | }, |
20 | 20 | { |
21 | 21 | "cell_type": "code", |
22 | | - "execution_count": 2, |
| 22 | + "execution_count": 1, |
23 | 23 | "metadata": {}, |
24 | 24 | "outputs": [], |
25 | 25 | "source": [ |
|
46 | 46 | }, |
47 | 47 | { |
48 | 48 | "cell_type": "code", |
49 | | - "execution_count": 3, |
| 49 | + "execution_count": 2, |
50 | 50 | "metadata": {}, |
51 | 51 | "outputs": [ |
52 | 52 | { |
|
867 | 867 | "[27 rows x 22 columns]" |
868 | 868 | ] |
869 | 869 | }, |
870 | | - "execution_count": 3, |
| 870 | + "execution_count": 2, |
871 | 871 | "metadata": {}, |
872 | 872 | "output_type": "execute_result" |
873 | 873 | } |
|
899 | 899 | }, |
900 | 900 | { |
901 | 901 | "cell_type": "code", |
902 | | - "execution_count": 4, |
| 902 | + "execution_count": 3, |
903 | 903 | "metadata": {}, |
904 | 904 | "outputs": [ |
905 | 905 | { |
|
1332 | 1332 | "2017260N12310 0.000000e+00 1.534596e+09 " |
1333 | 1333 | ] |
1334 | 1334 | }, |
1335 | | - "execution_count": 4, |
| 1335 | + "execution_count": 3, |
1336 | 1336 | "metadata": {}, |
1337 | 1337 | "output_type": "execute_result" |
1338 | 1338 | } |
|
1385 | 1385 | }, |
1386 | 1386 | { |
1387 | 1387 | "cell_type": "code", |
1388 | | - "execution_count": 5, |
| 1388 | + "execution_count": 4, |
1389 | 1389 | "metadata": {}, |
1390 | 1390 | "outputs": [], |
1391 | 1391 | "source": [ |
|
1412 | 1412 | }, |
1413 | 1413 | { |
1414 | 1414 | "cell_type": "code", |
1415 | | - "execution_count": 6, |
| 1415 | + "execution_count": 5, |
1416 | 1416 | "metadata": {}, |
1417 | 1417 | "outputs": [ |
1418 | 1418 | { |
1419 | 1419 | "name": "stdout", |
1420 | 1420 | "output_type": "stream", |
1421 | 1421 | "text": [ |
1422 | | - "2023-06-13 16:36:05,304 - climada.hazard.tc_tracks - WARNING - The cached IBTrACS data set dates from 2022-03-08 23:23:51 (older than 180 days). Very likely, a more recent version is available. Consider manually removing the file /Users/ldr.riedel/climada/data/IBTrACS.ALL.v04r00.nc and re-running this function, which will download the most recent version of the IBTrACS data set from the official URL.\n" |
| 1422 | + "2023-06-13 17:55:56,240 - climada.hazard.tc_tracks - WARNING - The cached IBTrACS data set dates from 2022-03-08 23:23:51 (older than 180 days). Very likely, a more recent version is available. Consider manually removing the file /Users/ldr.riedel/climada/data/IBTrACS.ALL.v04r00.nc and re-running this function, which will download the most recent version of the IBTrACS data set from the official URL.\n" |
1423 | 1423 | ] |
1424 | 1424 | }, |
1425 | 1425 | { |
|
1484 | 1484 | }, |
1485 | 1485 | { |
1486 | 1486 | "cell_type": "code", |
1487 | | - "execution_count": 7, |
| 1487 | + "execution_count": 6, |
1488 | 1488 | "metadata": {}, |
1489 | 1489 | "outputs": [], |
1490 | 1490 | "source": [ |
|
1521 | 1521 | "\n", |
1522 | 1522 | "Computations on data frames align columns and indexes.\n", |
1523 | 1523 | "The indexes of the calibration data are the IBTrACS IDs, but the indexes of the result of `Impact.impact_at_reg` are the hazard event IDs, which at this point are only integer numbers.\n", |
1524 | | - "To resolve that, we simply set the hazard event IDs to the IBTrACS IDs, which are stored in `Hazard.event_name`.\n", |
| 1524 | + "To resolve that, we set the index of the resulting impact data to `Hazard.event_name`.\n", |
1525 | 1525 | "Once both the impact data and the calibration data are in the same data format, we can compute the RMSE:" |
1526 | 1526 | ] |
1527 | 1527 | }, |
1528 | 1528 | { |
1529 | 1529 | "cell_type": "code", |
1530 | | - "execution_count": 8, |
| 1530 | + "execution_count": 7, |
1531 | 1531 | "metadata": {}, |
1532 | 1532 | "outputs": [], |
1533 | 1533 | "source": [ |
1534 | 1534 | "import numpy as np\n", |
1535 | 1535 | "from climada.engine import Impact\n", |
1536 | 1536 | "\n", |
1537 | | - "# TODO: Dont\n", |
1538 | | - "# Make sure that Hazard.event_id matches indexes of 'data'\n", |
1539 | | - "# hazard.event_id = np.asarray(hazard.event_name)\n", |
1540 | | - "\n", |
1541 | 1537 | "def cost_rmse(impact: Impact, data: pd.DataFrame):\n", |
1542 | 1538 | " \"\"\"A cost function computing the RMSE\"\"\"\n", |
1543 | 1539 | " impact = impact.impact_at_reg(exposure.gdf[\"region_id\"])\n", |
| 1540 | + " impact.set_index(np.asarray(hazard.event_name), inplace=True)\n", |
1544 | 1541 | " return np.sqrt(np.mean(((data - impact) ** 2).to_numpy()))" |
1545 | 1542 | ] |
1546 | 1543 | }, |
|
1558 | 1555 | }, |
1559 | 1556 | { |
1560 | 1557 | "cell_type": "code", |
1561 | | - "execution_count": 9, |
| 1558 | + "execution_count": 8, |
1562 | 1559 | "metadata": {}, |
1563 | 1560 | "outputs": [], |
1564 | 1561 | "source": [ |
|
1582 | 1579 | }, |
1583 | 1580 | { |
1584 | 1581 | "cell_type": "code", |
1585 | | - "execution_count": 10, |
| 1582 | + "execution_count": 11, |
1586 | 1583 | "metadata": {}, |
1587 | 1584 | "outputs": [ |
1588 | 1585 | { |
|
1609 | 1606 | "{'scale': 0.9903198881207879, 'v_half': 61.51163348395183}" |
1610 | 1607 | ] |
1611 | 1608 | }, |
1612 | | - "execution_count": 10, |
| 1609 | + "execution_count": 11, |
1613 | 1610 | "metadata": {}, |
1614 | 1611 | "output_type": "execute_result" |
1615 | 1612 | } |
|
1625 | 1622 | " cost_func=cost_rmse,\n", |
1626 | 1623 | " impact_func_gen=impact_func_tc,\n", |
1627 | 1624 | " bounds=bounds,\n", |
1628 | | - " align=False,\n", |
1629 | 1625 | ")\n", |
1630 | | - "exposure.assign_centroids(hazard)\n", |
1631 | 1626 | "\n", |
1632 | 1627 | "# Create and run the optimizer\n", |
1633 | 1628 | "opt = BayesianOptimizer(input)\n", |
|
1648 | 1643 | }, |
1649 | 1644 | { |
1650 | 1645 | "cell_type": "code", |
1651 | | - "execution_count": 11, |
| 1646 | + "execution_count": 12, |
1652 | 1647 | "metadata": {}, |
1653 | 1648 | "outputs": [ |
1654 | 1649 | { |
|
1657 | 1652 | "<AxesSubplot:title={'center':'TC 1: Emanuel 2011'}, xlabel='Intensity (m/s)', ylabel='Impact (%)'>" |
1658 | 1653 | ] |
1659 | 1654 | }, |
1660 | | - "execution_count": 11, |
| 1655 | + "execution_count": 12, |
1661 | 1656 | "metadata": {}, |
1662 | 1657 | "output_type": "execute_result" |
1663 | 1658 | }, |
|
1690 | 1685 | }, |
1691 | 1686 | { |
1692 | 1687 | "cell_type": "code", |
1693 | | - "execution_count": 12, |
| 1688 | + "execution_count": 13, |
1694 | 1689 | "metadata": {}, |
1695 | 1690 | "outputs": [ |
1696 | 1691 | { |
|
1815 | 1810 | "[200 rows x 3 columns]" |
1816 | 1811 | ] |
1817 | 1812 | }, |
1818 | | - "execution_count": 12, |
| 1813 | + "execution_count": 13, |
1819 | 1814 | "metadata": {}, |
1820 | 1815 | "output_type": "execute_result" |
1821 | 1816 | } |
|
1835 | 1830 | }, |
1836 | 1831 | { |
1837 | 1832 | "cell_type": "code", |
1838 | | - "execution_count": 13, |
| 1833 | + "execution_count": 14, |
1839 | 1834 | "metadata": {}, |
1840 | 1835 | "outputs": [ |
1841 | 1836 | { |
|
1960 | 1955 | "[200 rows x 3 columns]" |
1961 | 1956 | ] |
1962 | 1957 | }, |
1963 | | - "execution_count": 13, |
| 1958 | + "execution_count": 14, |
1964 | 1959 | "metadata": {}, |
1965 | 1960 | "output_type": "execute_result" |
1966 | 1961 | } |
|
1972 | 1967 | }, |
1973 | 1968 | { |
1974 | 1969 | "cell_type": "code", |
1975 | | - "execution_count": 17, |
| 1970 | + "execution_count": 15, |
1976 | 1971 | "metadata": {}, |
1977 | 1972 | "outputs": [ |
1978 | 1973 | { |
1979 | 1974 | "data": { |
1980 | 1975 | "text/plain": [ |
1981 | | - "[<matplotlib.lines.Line2D at 0x329bee910>]" |
| 1976 | + "[<matplotlib.lines.Line2D at 0x30dc62cd0>]" |
1982 | 1977 | ] |
1983 | 1978 | }, |
1984 | | - "execution_count": 17, |
| 1979 | + "execution_count": 15, |
1985 | 1980 | "metadata": {}, |
1986 | 1981 | "output_type": "execute_result" |
1987 | 1982 | }, |
|
2019 | 2014 | }, |
2020 | 2015 | { |
2021 | 2016 | "cell_type": "code", |
2022 | | - "execution_count": 19, |
| 2017 | + "execution_count": 24, |
2023 | 2018 | "metadata": {}, |
2024 | 2019 | "outputs": [ |
2025 | 2020 | { |
|
2354 | 2349 | "</div>" |
2355 | 2350 | ], |
2356 | 2351 | "text/plain": [ |
2357 | | - " 28 44 92 132 \n", |
2358 | | - "2010176N16278 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 \\\n", |
| 2352 | + " 28 44 92 132 \\\n", |
| 2353 | + "2010176N16278 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 \n", |
2359 | 2354 | "2010236N12341 9.610111e+06 0.000000e+00 2.390220e+07 0.000000 \n", |
2360 | 2355 | "2010257N16282 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 \n", |
2361 | 2356 | "2010302N09306 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 \n", |
|
2373 | 2368 | "2017242N16333 1.816087e+08 8.362996e+05 3.338384e+08 0.000000 \n", |
2374 | 2369 | "2017260N12310 0.000000e+00 0.000000e+00 1.910071e+07 0.000000 \n", |
2375 | 2370 | "\n", |
2376 | | - " 192 212 214 388 \n", |
2377 | | - "2010176N16278 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \\\n", |
| 2371 | + " 192 212 214 388 \\\n", |
| 2372 | + "2010176N16278 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n", |
2378 | 2373 | "2010236N12341 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n", |
2379 | 2374 | "2010257N16282 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n", |
2380 | 2375 | "2010302N09306 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n", |
|
2392 | 2387 | "2017242N16333 1.573645e+09 0.000000e+00 1.005487e+07 0.000000e+00 \n", |
2393 | 2388 | "2017260N12310 0.000000e+00 4.919065e+08 4.214500e+07 0.000000e+00 \n", |
2394 | 2389 | "\n", |
2395 | | - " 484 630 659 662 \n", |
2396 | | - "2010176N16278 7.221970e+08 0.000000e+00 0.000000e+00 0.000000e+00 \\\n", |
| 2390 | + " 484 630 659 662 \\\n", |
| 2391 | + "2010176N16278 7.221970e+08 0.000000e+00 0.000000e+00 0.000000e+00 \n", |
2397 | 2392 | "2010236N12341 0.000000e+00 2.872097e+07 4.721180e+06 0.000000e+00 \n", |
2398 | 2393 | "2010257N16282 5.025816e+07 0.000000e+00 0.000000e+00 0.000000e+00 \n", |
2399 | 2394 | "2010302N09306 0.000000e+00 0.000000e+00 0.000000e+00 3.233805e+06 \n", |
|
2431 | 2426 | "2017260N12310 0.000000e+00 2.477373e+07 " |
2432 | 2427 | ] |
2433 | 2428 | }, |
2434 | | - "execution_count": 19, |
| 2429 | + "execution_count": 24, |
2435 | 2430 | "metadata": {}, |
2436 | 2431 | "output_type": "execute_result" |
2437 | 2432 | } |
|
2442 | 2437 | "impf = impact_func_tc(**bayesian_output.params)\n", |
2443 | 2438 | "impact = ImpactCalc(exposure, impf, hazard).impact(assign_centroids=False)\n", |
2444 | 2439 | "impact_data = impact.impact_at_reg(exposure.gdf[\"region_id\"])\n", |
| 2440 | + "impact_data.set_index(np.asarray(hazard.event_name), inplace=True)\n", |
2445 | 2441 | "impact_data" |
2446 | 2442 | ] |
2447 | 2443 | }, |
|
2459 | 2455 | }, |
2460 | 2456 | { |
2461 | 2457 | "cell_type": "code", |
2462 | | - "execution_count": 40, |
| 2458 | + "execution_count": 25, |
2463 | 2459 | "metadata": {}, |
2464 | 2460 | "outputs": [ |
2465 | 2461 | { |
|
2468 | 2464 | "<AxesSubplot:ylabel='Damages (USD)'>" |
2469 | 2465 | ] |
2470 | 2466 | }, |
2471 | | - "execution_count": 40, |
| 2467 | + "execution_count": 25, |
2472 | 2468 | "metadata": {}, |
2473 | 2469 | "output_type": "execute_result" |
2474 | 2470 | }, |
|
2538 | 2534 | }, |
2539 | 2535 | { |
2540 | 2536 | "cell_type": "code", |
2541 | | - "execution_count": 42, |
| 2537 | + "execution_count": 26, |
2542 | 2538 | "metadata": {}, |
2543 | 2539 | "outputs": [ |
2544 | 2540 | { |
|
2873 | 2869 | "</div>" |
2874 | 2870 | ], |
2875 | 2871 | "text/plain": [ |
2876 | | - " Antigua and Barbuda Bahamas Virgin Islands, British \n", |
2877 | | - "2010176N16278 NaN NaN NaN \\\n", |
| 2872 | + " Antigua and Barbuda Bahamas Virgin Islands, British \\\n", |
| 2873 | + "2010176N16278 NaN NaN NaN \n", |
2878 | 2874 | "2010236N12341 -0.161719 NaN 7.378438 \n", |
2879 | 2875 | "2010257N16282 NaN NaN NaN \n", |
2880 | 2876 | "2010302N09306 NaN NaN NaN \n", |
|
2892 | 2888 | "2017242N16333 -0.065509 -0.333363 -0.953585 \n", |
2893 | 2889 | "2017260N12310 NaN NaN 7.281049 \n", |
2894 | 2890 | "\n", |
2895 | | - " Cabo Verde Cuba Dominica Dominican Republic Jamaica \n", |
2896 | | - "2010176N16278 NaN NaN NaN NaN NaN \\\n", |
| 2891 | + " Cabo Verde Cuba Dominica Dominican Republic Jamaica \\\n", |
| 2892 | + "2010176N16278 NaN NaN NaN NaN NaN \n", |
2897 | 2893 | "2010236N12341 NaN NaN NaN NaN NaN \n", |
2898 | 2894 | "2010257N16282 NaN NaN NaN NaN NaN \n", |
2899 | 2895 | "2010302N09306 NaN NaN NaN NaN NaN \n", |
|
2911 | 2907 | "2017242N16333 NaN -0.844200 NaN 7.002377 NaN \n", |
2912 | 2908 | "2017260N12310 NaN NaN -0.494112 -0.114143 NaN \n", |
2913 | 2909 | "\n", |
2914 | | - " Mexico Puerto Rico Saint Kitts and Nevis Saint Lucia \n", |
2915 | | - "2010176N16278 -0.536752 NaN NaN NaN \\\n", |
| 2910 | + " Mexico Puerto Rico Saint Kitts and Nevis Saint Lucia \\\n", |
| 2911 | + "2010176N16278 -0.536752 NaN NaN NaN \n", |
2916 | 2912 | "2010236N12341 NaN 7.458199 6.674051 NaN \n", |
2917 | 2913 | "2010257N16282 -1.984236 NaN NaN NaN \n", |
2918 | 2914 | "2010302N09306 NaN NaN NaN 0.770404 \n", |
|
2950 | 2946 | "2017260N12310 NaN 7.393991 " |
2951 | 2947 | ] |
2952 | 2948 | }, |
2953 | | - "execution_count": 42, |
| 2949 | + "execution_count": 26, |
2954 | 2950 | "metadata": {}, |
2955 | 2951 | "output_type": "execute_result" |
2956 | 2952 | }, |
|
3001 | 2997 | "Using a cost function based on the ratio between modelled and observed impact might increase the overall error but decrease the log-error for many events.\n", |
3002 | 2998 | "\n", |
3003 | 2999 | "So we present some ideas on how to continue and/or improve the calibration:\n", |
3004 | | - "1. Use a different cost function\n", |
3005 | | - "2. Also calibrate the `v_thresh` parameter. This requires adding constraints, because `v_thresh` < `v_half`.\n", |
3006 | | - "3. Calibrate different impact functions for houses in Mexico and Puerto Rico within the same optimization task.\n", |
3007 | | - "4. Employ the `ScipyMinimizeOptimizer` instead of the `BayesianOptimizer`" |
3008 | | - ] |
3009 | | - }, |
3010 | | - { |
3011 | | - "cell_type": "code", |
3012 | | - "execution_count": 70, |
3013 | | - "metadata": {}, |
3014 | | - "outputs": [], |
3015 | | - "source": [ |
3016 | | - "import pandas as pd\n", |
3017 | | - "from climada.engine import Impact\n", |
3018 | | - "\n", |
3019 | | - "# Define a cost function\n", |
3020 | | - "def cost_rmsle(impact: Impact, data: pd.DataFrame):\n", |
3021 | | - " impact = impact.impact_at_reg(exposure.gdf[\"region_id\"])\n", |
3022 | | - " data, impact = data.align(impact, \"outer\", fill_value=0)\n", |
3023 | | - " data, impact = data.to_numpy(), impact.to_numpy()\n", |
3024 | | - " return np.exp(np.sqrt(np.mean((np.log(data + 1) - np.log(impact + 1)) ** 2)) - 1)\n", |
3025 | 3000 | "\n", |
3026 | | - "def cost_rmse(impact: Impact, data: pd.DataFrame):\n", |
3027 | | - " impact = impact.impact_at_reg(exposure.gdf[\"region_id\"])\n", |
3028 | | - " return np.sqrt(np.mean(((data - impact) ** 2).to_numpy()))\n" |
| 3001 | + "1. Run the calibration again, but change the number of initial steps and/or iteration steps.\n", |
| 3002 | + "2. Use a different cost function, e.g., an error measure based on a ratio rather than a difference.\n", |
| 3003 | + "3. Also calibrate the `v_thresh` parameter. This requires adding constraints, because `v_thresh` < `v_half`.\n", |
| 3004 | + "4. Calibrate different impact functions for houses in Mexico and Puerto Rico within the same optimization task.\n", |
| 3005 | + "5. Employ the `ScipyMinimizeOptimizer` instead of the `BayesianOptimizer`." |
3029 | 3006 | ] |
3030 | 3007 | } |
3031 | 3008 | ], |
|
0 commit comments