Skip to content

Commit e1fe68a

Browse files
committed
Update calibration tutorial
1 parent 0e5a557 commit e1fe68a

File tree

1 file changed

+48
-71
lines changed

1 file changed

+48
-71
lines changed

doc/tutorial/climada_util_calibrate.ipynb

Lines changed: 48 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
},
2020
{
2121
"cell_type": "code",
22-
"execution_count": 2,
22+
"execution_count": 1,
2323
"metadata": {},
2424
"outputs": [],
2525
"source": [
@@ -46,7 +46,7 @@
4646
},
4747
{
4848
"cell_type": "code",
49-
"execution_count": 3,
49+
"execution_count": 2,
5050
"metadata": {},
5151
"outputs": [
5252
{
@@ -867,7 +867,7 @@
867867
"[27 rows x 22 columns]"
868868
]
869869
},
870-
"execution_count": 3,
870+
"execution_count": 2,
871871
"metadata": {},
872872
"output_type": "execute_result"
873873
}
@@ -899,7 +899,7 @@
899899
},
900900
{
901901
"cell_type": "code",
902-
"execution_count": 4,
902+
"execution_count": 3,
903903
"metadata": {},
904904
"outputs": [
905905
{
@@ -1332,7 +1332,7 @@
13321332
"2017260N12310 0.000000e+00 1.534596e+09 "
13331333
]
13341334
},
1335-
"execution_count": 4,
1335+
"execution_count": 3,
13361336
"metadata": {},
13371337
"output_type": "execute_result"
13381338
}
@@ -1385,7 +1385,7 @@
13851385
},
13861386
{
13871387
"cell_type": "code",
1388-
"execution_count": 5,
1388+
"execution_count": 4,
13891389
"metadata": {},
13901390
"outputs": [],
13911391
"source": [
@@ -1412,14 +1412,14 @@
14121412
},
14131413
{
14141414
"cell_type": "code",
1415-
"execution_count": 6,
1415+
"execution_count": 5,
14161416
"metadata": {},
14171417
"outputs": [
14181418
{
14191419
"name": "stdout",
14201420
"output_type": "stream",
14211421
"text": [
1422-
"2023-06-13 16:36:05,304 - climada.hazard.tc_tracks - WARNING - The cached IBTrACS data set dates from 2022-03-08 23:23:51 (older than 180 days). Very likely, a more recent version is available. Consider manually removing the file /Users/ldr.riedel/climada/data/IBTrACS.ALL.v04r00.nc and re-running this function, which will download the most recent version of the IBTrACS data set from the official URL.\n"
1422+
"2023-06-13 17:55:56,240 - climada.hazard.tc_tracks - WARNING - The cached IBTrACS data set dates from 2022-03-08 23:23:51 (older than 180 days). Very likely, a more recent version is available. Consider manually removing the file /Users/ldr.riedel/climada/data/IBTrACS.ALL.v04r00.nc and re-running this function, which will download the most recent version of the IBTrACS data set from the official URL.\n"
14231423
]
14241424
},
14251425
{
@@ -1484,7 +1484,7 @@
14841484
},
14851485
{
14861486
"cell_type": "code",
1487-
"execution_count": 7,
1487+
"execution_count": 6,
14881488
"metadata": {},
14891489
"outputs": [],
14901490
"source": [
@@ -1521,26 +1521,23 @@
15211521
"\n",
15221522
"Computations on data frames align columns and indexes.\n",
15231523
"The indexes of the calibration data are the IBTrACS IDs, but the indexes of the result of `Impact.impact_at_reg` are the hazard event IDs, which at this point are only integer numbers.\n",
1524-
"To resolve that, we simply set the hazard event IDs to the IBTrACS IDs, which are stored in `Hazard.event_name`.\n",
1524+
"To resolve that, we set the index of the resulting impact data to `Hazard.event_name`.\n",
15251525
"Once both the impact data and the calibration data are in the same data format, we can compute the RMSE:"
15261526
]
15271527
},
15281528
{
15291529
"cell_type": "code",
1530-
"execution_count": 8,
1530+
"execution_count": 7,
15311531
"metadata": {},
15321532
"outputs": [],
15331533
"source": [
15341534
"import numpy as np\n",
15351535
"from climada.engine import Impact\n",
15361536
"\n",
1537-
"# TODO: Dont\n",
1538-
"# Make sure that Hazard.event_id matches indexes of 'data'\n",
1539-
"# hazard.event_id = np.asarray(hazard.event_name)\n",
1540-
"\n",
15411537
"def cost_rmse(impact: Impact, data: pd.DataFrame):\n",
15421538
" \"\"\"A cost function computing the RMSE\"\"\"\n",
15431539
" impact = impact.impact_at_reg(exposure.gdf[\"region_id\"])\n",
1540+
" impact.set_index(np.asarray(hazard.event_name), inplace=True)\n",
15441541
" return np.sqrt(np.mean(((data - impact) ** 2).to_numpy()))"
15451542
]
15461543
},
@@ -1558,7 +1555,7 @@
15581555
},
15591556
{
15601557
"cell_type": "code",
1561-
"execution_count": 9,
1558+
"execution_count": 8,
15621559
"metadata": {},
15631560
"outputs": [],
15641561
"source": [
@@ -1582,7 +1579,7 @@
15821579
},
15831580
{
15841581
"cell_type": "code",
1585-
"execution_count": 10,
1582+
"execution_count": 11,
15861583
"metadata": {},
15871584
"outputs": [
15881585
{
@@ -1609,7 +1606,7 @@
16091606
"{'scale': 0.9903198881207879, 'v_half': 61.51163348395183}"
16101607
]
16111608
},
1612-
"execution_count": 10,
1609+
"execution_count": 11,
16131610
"metadata": {},
16141611
"output_type": "execute_result"
16151612
}
@@ -1625,9 +1622,7 @@
16251622
" cost_func=cost_rmse,\n",
16261623
" impact_func_gen=impact_func_tc,\n",
16271624
" bounds=bounds,\n",
1628-
" align=False,\n",
16291625
")\n",
1630-
"exposure.assign_centroids(hazard)\n",
16311626
"\n",
16321627
"# Create and run the optimizer\n",
16331628
"opt = BayesianOptimizer(input)\n",
@@ -1648,7 +1643,7 @@
16481643
},
16491644
{
16501645
"cell_type": "code",
1651-
"execution_count": 11,
1646+
"execution_count": 12,
16521647
"metadata": {},
16531648
"outputs": [
16541649
{
@@ -1657,7 +1652,7 @@
16571652
"<AxesSubplot:title={'center':'TC 1: Emanuel 2011'}, xlabel='Intensity (m/s)', ylabel='Impact (%)'>"
16581653
]
16591654
},
1660-
"execution_count": 11,
1655+
"execution_count": 12,
16611656
"metadata": {},
16621657
"output_type": "execute_result"
16631658
},
@@ -1690,7 +1685,7 @@
16901685
},
16911686
{
16921687
"cell_type": "code",
1693-
"execution_count": 12,
1688+
"execution_count": 13,
16941689
"metadata": {},
16951690
"outputs": [
16961691
{
@@ -1815,7 +1810,7 @@
18151810
"[200 rows x 3 columns]"
18161811
]
18171812
},
1818-
"execution_count": 12,
1813+
"execution_count": 13,
18191814
"metadata": {},
18201815
"output_type": "execute_result"
18211816
}
@@ -1835,7 +1830,7 @@
18351830
},
18361831
{
18371832
"cell_type": "code",
1838-
"execution_count": 13,
1833+
"execution_count": 14,
18391834
"metadata": {},
18401835
"outputs": [
18411836
{
@@ -1960,7 +1955,7 @@
19601955
"[200 rows x 3 columns]"
19611956
]
19621957
},
1963-
"execution_count": 13,
1958+
"execution_count": 14,
19641959
"metadata": {},
19651960
"output_type": "execute_result"
19661961
}
@@ -1972,16 +1967,16 @@
19721967
},
19731968
{
19741969
"cell_type": "code",
1975-
"execution_count": 17,
1970+
"execution_count": 15,
19761971
"metadata": {},
19771972
"outputs": [
19781973
{
19791974
"data": {
19801975
"text/plain": [
1981-
"[<matplotlib.lines.Line2D at 0x329bee910>]"
1976+
"[<matplotlib.lines.Line2D at 0x30dc62cd0>]"
19821977
]
19831978
},
1984-
"execution_count": 17,
1979+
"execution_count": 15,
19851980
"metadata": {},
19861981
"output_type": "execute_result"
19871982
},
@@ -2019,7 +2014,7 @@
20192014
},
20202015
{
20212016
"cell_type": "code",
2022-
"execution_count": 19,
2017+
"execution_count": 24,
20232018
"metadata": {},
20242019
"outputs": [
20252020
{
@@ -2354,8 +2349,8 @@
23542349
"</div>"
23552350
],
23562351
"text/plain": [
2357-
" 28 44 92 132 \n",
2358-
"2010176N16278 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 \\\n",
2352+
" 28 44 92 132 \\\n",
2353+
"2010176N16278 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 \n",
23592354
"2010236N12341 9.610111e+06 0.000000e+00 2.390220e+07 0.000000 \n",
23602355
"2010257N16282 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 \n",
23612356
"2010302N09306 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 \n",
@@ -2373,8 +2368,8 @@
23732368
"2017242N16333 1.816087e+08 8.362996e+05 3.338384e+08 0.000000 \n",
23742369
"2017260N12310 0.000000e+00 0.000000e+00 1.910071e+07 0.000000 \n",
23752370
"\n",
2376-
" 192 212 214 388 \n",
2377-
"2010176N16278 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \\\n",
2371+
" 192 212 214 388 \\\n",
2372+
"2010176N16278 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
23782373
"2010236N12341 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
23792374
"2010257N16282 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
23802375
"2010302N09306 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
@@ -2392,8 +2387,8 @@
23922387
"2017242N16333 1.573645e+09 0.000000e+00 1.005487e+07 0.000000e+00 \n",
23932388
"2017260N12310 0.000000e+00 4.919065e+08 4.214500e+07 0.000000e+00 \n",
23942389
"\n",
2395-
" 484 630 659 662 \n",
2396-
"2010176N16278 7.221970e+08 0.000000e+00 0.000000e+00 0.000000e+00 \\\n",
2390+
" 484 630 659 662 \\\n",
2391+
"2010176N16278 7.221970e+08 0.000000e+00 0.000000e+00 0.000000e+00 \n",
23972392
"2010236N12341 0.000000e+00 2.872097e+07 4.721180e+06 0.000000e+00 \n",
23982393
"2010257N16282 5.025816e+07 0.000000e+00 0.000000e+00 0.000000e+00 \n",
23992394
"2010302N09306 0.000000e+00 0.000000e+00 0.000000e+00 3.233805e+06 \n",
@@ -2431,7 +2426,7 @@
24312426
"2017260N12310 0.000000e+00 2.477373e+07 "
24322427
]
24332428
},
2434-
"execution_count": 19,
2429+
"execution_count": 24,
24352430
"metadata": {},
24362431
"output_type": "execute_result"
24372432
}
@@ -2442,6 +2437,7 @@
24422437
"impf = impact_func_tc(**bayesian_output.params)\n",
24432438
"impact = ImpactCalc(exposure, impf, hazard).impact(assign_centroids=False)\n",
24442439
"impact_data = impact.impact_at_reg(exposure.gdf[\"region_id\"])\n",
2440+
"impact_data.set_index(np.asarray(hazard.event_name), inplace=True)\n",
24452441
"impact_data"
24462442
]
24472443
},
@@ -2459,7 +2455,7 @@
24592455
},
24602456
{
24612457
"cell_type": "code",
2462-
"execution_count": 40,
2458+
"execution_count": 25,
24632459
"metadata": {},
24642460
"outputs": [
24652461
{
@@ -2468,7 +2464,7 @@
24682464
"<AxesSubplot:ylabel='Damages (USD)'>"
24692465
]
24702466
},
2471-
"execution_count": 40,
2467+
"execution_count": 25,
24722468
"metadata": {},
24732469
"output_type": "execute_result"
24742470
},
@@ -2538,7 +2534,7 @@
25382534
},
25392535
{
25402536
"cell_type": "code",
2541-
"execution_count": 42,
2537+
"execution_count": 26,
25422538
"metadata": {},
25432539
"outputs": [
25442540
{
@@ -2873,8 +2869,8 @@
28732869
"</div>"
28742870
],
28752871
"text/plain": [
2876-
" Antigua and Barbuda Bahamas Virgin Islands, British \n",
2877-
"2010176N16278 NaN NaN NaN \\\n",
2872+
" Antigua and Barbuda Bahamas Virgin Islands, British \\\n",
2873+
"2010176N16278 NaN NaN NaN \n",
28782874
"2010236N12341 -0.161719 NaN 7.378438 \n",
28792875
"2010257N16282 NaN NaN NaN \n",
28802876
"2010302N09306 NaN NaN NaN \n",
@@ -2892,8 +2888,8 @@
28922888
"2017242N16333 -0.065509 -0.333363 -0.953585 \n",
28932889
"2017260N12310 NaN NaN 7.281049 \n",
28942890
"\n",
2895-
" Cabo Verde Cuba Dominica Dominican Republic Jamaica \n",
2896-
"2010176N16278 NaN NaN NaN NaN NaN \\\n",
2891+
" Cabo Verde Cuba Dominica Dominican Republic Jamaica \\\n",
2892+
"2010176N16278 NaN NaN NaN NaN NaN \n",
28972893
"2010236N12341 NaN NaN NaN NaN NaN \n",
28982894
"2010257N16282 NaN NaN NaN NaN NaN \n",
28992895
"2010302N09306 NaN NaN NaN NaN NaN \n",
@@ -2911,8 +2907,8 @@
29112907
"2017242N16333 NaN -0.844200 NaN 7.002377 NaN \n",
29122908
"2017260N12310 NaN NaN -0.494112 -0.114143 NaN \n",
29132909
"\n",
2914-
" Mexico Puerto Rico Saint Kitts and Nevis Saint Lucia \n",
2915-
"2010176N16278 -0.536752 NaN NaN NaN \\\n",
2910+
" Mexico Puerto Rico Saint Kitts and Nevis Saint Lucia \\\n",
2911+
"2010176N16278 -0.536752 NaN NaN NaN \n",
29162912
"2010236N12341 NaN 7.458199 6.674051 NaN \n",
29172913
"2010257N16282 -1.984236 NaN NaN NaN \n",
29182914
"2010302N09306 NaN NaN NaN 0.770404 \n",
@@ -2950,7 +2946,7 @@
29502946
"2017260N12310 NaN 7.393991 "
29512947
]
29522948
},
2953-
"execution_count": 42,
2949+
"execution_count": 26,
29542950
"metadata": {},
29552951
"output_type": "execute_result"
29562952
},
@@ -3001,31 +2997,12 @@
30012997
"Using a cost function based on the ratio between modelled and observed impact might increase the overall error but decrease the log-error for many events.\n",
30022998
"\n",
30032999
"So we present some ideas on how to continue and/or improve the calibration:\n",
3004-
"1. Use a different cost function\n",
3005-
"2. Also calibrate the `v_thresh` parameter. This requires adding constraints, because `v_thresh` < `v_half`.\n",
3006-
"3. Calibrate different impact functions for houses in Mexico and Puerto Rico within the same optimization task.\n",
3007-
"4. Employ the `ScipyMinimizeOptimizer` instead of the `BayesianOptimizer`"
3008-
]
3009-
},
3010-
{
3011-
"cell_type": "code",
3012-
"execution_count": 70,
3013-
"metadata": {},
3014-
"outputs": [],
3015-
"source": [
3016-
"import pandas as pd\n",
3017-
"from climada.engine import Impact\n",
3018-
"\n",
3019-
"# Define a cost function\n",
3020-
"def cost_rmsle(impact: Impact, data: pd.DataFrame):\n",
3021-
" impact = impact.impact_at_reg(exposure.gdf[\"region_id\"])\n",
3022-
" data, impact = data.align(impact, \"outer\", fill_value=0)\n",
3023-
" data, impact = data.to_numpy(), impact.to_numpy()\n",
3024-
" return np.exp(np.sqrt(np.mean((np.log(data + 1) - np.log(impact + 1)) ** 2)) - 1)\n",
30253000
"\n",
3026-
"def cost_rmse(impact: Impact, data: pd.DataFrame):\n",
3027-
" impact = impact.impact_at_reg(exposure.gdf[\"region_id\"])\n",
3028-
" return np.sqrt(np.mean(((data - impact) ** 2).to_numpy()))\n"
3001+
"1. Run the calibration again, but change the number of initial steps and/or iteration steps.\n",
3002+
"2. Use a different cost function, e.g., an error measure based on a ratio rather than a difference.\n",
3003+
"3. Also calibrate the `v_thresh` parameter. This requires adding constraints, because `v_thresh` < `v_half`.\n",
3004+
"4. Calibrate different impact functions for houses in Mexico and Puerto Rico within the same optimization task.\n",
3005+
"5. Employ the `ScipyMinimizeOptimizer` instead of the `BayesianOptimizer`."
30293006
]
30303007
}
30313008
],

0 commit comments

Comments
 (0)