diff --git a/baus/slr.py b/baus/slr.py index 043c6d4d..19b86a01 100644 --- a/baus/slr.py +++ b/baus/slr.py @@ -10,7 +10,7 @@ @orca.step() -def slr_inundate(slr_progression, slr_parcel_inundation, year, parcels): +def slr_inundate(slr_progression, slr_parcel_inundation, year, parcels, initial_year): # inundated parcels are all parcels at or below the SLR progression level in that year slr_progression = slr_progression.to_frame() @@ -33,6 +33,16 @@ def slr_inundate(slr_progression, slr_parcel_inundation, year, parcels): orca.add_column('parcels', 'slr_nodev', slr_nodev) parcels = orca.get_table("parcels") + # also track how many parcels were mitigated for summary purposes + # because of the way the inputs are setup, all mitigated parcels are mitigated from the start of the simulation + if year != initial_year: + return + mitigation_parcels = slr_parcel_inundation.query('inundation==100').astype('bool') + slr_mitigation = pd.Series(False, parcels.index) + mitigation = pd.Series(mitigation_parcels['inundation']) + slr_mitigation.update(mitigation) + orca.add_column('parcels', 'slr_mitigation', slr_mitigation) + @orca.step() def slr_remove_dev(buildings, households, jobs): diff --git a/baus/summaries/hazards_summaries.py b/baus/summaries/hazards_summaries.py index d5608e63..b915ca65 100644 --- a/baus/summaries/hazards_summaries.py +++ b/baus/summaries/hazards_summaries.py @@ -7,7 +7,13 @@ @orca.step() -def hazards_slr_summary(run_setup, run_name, year): +def hazards_slr_summary(run_setup, run_name, parcels, year): + + # first export parcel level information on inundated and mitigated parcels + slr_parcel_summary = parcels.to_frame(['parcel_id', 'slr_nodev', 'slr_mitigation']) + hazsumm_output_dir = pathlib.Path(orca.get_injectable("outputs_dir")) / "hazards_summaries" + hazsumm_output_dir.mkdir(parents=True, exist_ok=True) + slr_parcel_summary.to_csv(hazsumm_output_dir / f"{run_name}_slr_parcel_summary_{year}.csv") if not run_setup['run_slr']: return @@ -54,8 +60,6 @@ def hazards_slr_summary(run_setup, run_name, year): for empsix in ['AGREMPN', 'MWTEMPN', 'RETEMPN', 'FPSEMPN', 'HEREMPN', 'OTHEMPN']: slr_summary["impacted_jobs_"+str(empsix)] = (unplaced_jobs_tot["empsix"] == empsix).sum() - hazsumm_output_dir = pathlib.Path(orca.get_injectable("outputs_dir")) / "hazards_summaries" - hazsumm_output_dir.mkdir(parents=True, exist_ok=True) slr_summary.to_csv(hazsumm_output_dir / f"{run_name}_slr_summary_{year}.csv") diff --git a/scripts/metrics/metrics_healthy.py b/scripts/metrics/metrics_healthy.py index 4df65088..a7dbbbbb 100644 --- a/scripts/metrics/metrics_healthy.py +++ b/scripts/metrics/metrics_healthy.py @@ -129,92 +129,62 @@ def non_greenfield_development_share( modelrun_alias: str, modelrun_id: str, modelrun_data: dict, - run_directory_path: pathlib.Path, output_path: pathlib.Path, append_output: bool ): ''' - Calculate and export the share of development that falls within the 2020 urban area footprint - (or is outside the urban area footprint but suitably low-density as to be rural in character). + Calculate and export the share of non-greenfield development in the final model year, + with greenfield development defined as development that falls outside of the 2020 urban area footprint + and has a dwelling unit (or non-residential equivalent) per acre greater than 1. Parameters: - rtp (str): RTP2021 or RTP2025. - modelrun_alias (str): Alias for the model run, used for labeling output. - modelrun_id (str): Identifier for the model run. - modelrun_data (dict): year -> {"parcel" -> parcel DataFrame, "county" -> county DataFrame } - - run_directory_path (Path): The directory path for this model run. - output_path (Path): The directory path to save the output CSV file. - append_output (bool): True if appending output; False if writing. ''' logging.info("Calculating non_greenfield_development_share") - # Guard clause: this metric is implemented for RTP2025 / PBA50+ only - if rtp != 'RTP2025': - logging.info(" RTP2021 is not supported - skipping") - return - # Define a potentially very impactful constant used to convert residential units to non-residential sqft and vice versa SQFT_PER_UNIT = 1750 # close to the weighted average size of developer-model units in a recent BAUS run - # Read in and select new buildings post 2020 - modelrun_name = modelrun_id - # Sometimes the modelrun_id is a whole file path - # Handle both forms of slashes in this field - if '\\' in modelrun_id: - modelrun_name = modelrun_id.split('\\')[-1] - if '/' in modelrun_id: - modelrun_name = modelrun_id.split('/')[-1] - NEW_BUILDINGS_PATH = pathlib.Path(run_directory_path) / f'core_summaries/{modelrun_name}_new_buildings_summary.csv' - logging.info(f' Reading new_buildings_summary from {NEW_BUILDINGS_PATH}...') - new_buildings = pd.read_csv( - NEW_BUILDINGS_PATH, - usecols=['parcel_id', 'year_built', 'building_sqft', 'residential_units'], - dtype={'parcel_id': int} - ) - new_buildings = new_buildings[new_buildings['year_built'] > 2020] - logging.debug(f' {len(new_buildings)} buildings built after 2020') + # get parcel and buildings data for horizon year + year_horizon = sorted(modelrun_data.keys())[-1] + buildings_horizon_year = modelrun_data[year_horizon]["buildings"] + # only look at buildings built after the Plan's initial year + year_initial = sorted(modelrun_data.keys())[0] + buildings_df = buildings_horizon_year.loc[buildings_horizon_year.year_built > year_initial] - # Some residential buildings (from the development pipeline) have no building_sqft); - # convert residential units to sqft equivalent so we can summarize "all development" - new_buildings.loc[new_buildings['building_sqft'] == 0, 'building_sqft'] = \ - new_buildings.loc[new_buildings['building_sqft'] == 0, 'residential_units'] * SQFT_PER_UNIT - - # We are interested in development on any parcel: + # we are interested in development on any parcel: # 1. outside the 2020 urban area footprint AND # 2. greater than 1 DU-equivalent per acre in 2050 - parcel_df = modelrun_data[2050]['parcel'].copy() - parcel_df['du_equiv_per_acre'] = (parcel_df['residential_units'] + (parcel_df['non_residential_sqft'] / SQFT_PER_UNIT)) \ - / parcel_df['ACRES'] - dense_greenfield_parcels = parcel_df.loc[ - (parcel_df['du_equiv_per_acre'] > 1.0) & (parcel_df['in_urban_area'] == 0), - 'parcel_id' - ] + buildings_df['du_equiv_per_acre'] = (buildings_df['residential_units_total'] + (buildings_df['non_residential_sqft_total'] / SQFT_PER_UNIT)) /\ + buildings_df['parcel_acres'] + dense_greenfield = buildings_df.loc[(buildings_df['du_equiv_per_acre'] > 1.0) & (buildings_df['in_urban_area'] == 0)] - # Calculate share of "all development" (in terms of building_sqft) that occurred on "dense greenfield parcels" - total_development = new_buildings['building_sqft'].sum() - dense_greenfield_development = new_buildings.loc[ - new_buildings['parcel_id'].isin(dense_greenfield_parcels), - 'building_sqft' - ].sum() - greenfield_development_pct = dense_greenfield_development / total_development + # then calculate the share of denser greenfield development parcel avres as a proportion of all development parcel acres + dense_greenfield_development_share = dense_greenfield.drop_duplicates(['parcel_id'])['parcel_acres'].sum() /\ + buildings_df.drop_duplicates(['parcel_id'])['parcel_acres'].sum() # Add metadata, format, and export to CSV - greenfield_development_df = pd.DataFrame({ + non_greenfield_development_df = pd.DataFrame({ 'modelrun_id': modelrun_id, 'modelrun_alias': modelrun_alias, 'area_alias': 'Regionwide', 'area': 'all', - 'development_in_urban_footprint_pct': 1 - greenfield_development_pct + 'non_greenfield_development_share': 1 - dense_greenfield_development_share }, index=[0]) out_file = pathlib.Path(output_path) / 'metrics_healthy2_development_in_urban_footprint.csv' - greenfield_development_df.to_csv( + non_greenfield_development_df.to_csv( out_file, mode='a' if append_output else 'w', header=False if append_output else True, index=False, ) - logging.info(f"{'Appended' if append_output else 'Wrote'} {len(greenfield_development_df)} " \ - + f"line{'s' if len(greenfield_development_df) > 1 else ''} to {out_file}") + logging.info(f"{'Appended' if append_output else 'Wrote'} {len(non_greenfield_development_df)} " \ + + f"line{'s' if len(non_greenfield_development_df) > 1 else ''} to {out_file}") def slr_protection(rtp, modelrun_alias, modelrun_id, modelrun_data, output_path, append_output): @@ -223,6 +193,11 @@ def slr_protection(rtp, modelrun_alias, modelrun_id, modelrun_data, output_path, as a percentage of all households in sea level rise areas and a percentage of all households in sea level rise areas that are EPCs. + To run for PBA50, move the files from "Box\Plan Bay Area 2050+\Performance and Equity\/ + Plan Performance\Equity_Performance_Metrics\PBA50_reproduce_for_QA\slr_metrics_inputs" + to the relevant Plan run outputs folder, since these model output files were generated + post-Plan run to use in these standalone metrics. + Parameters: - rtp (str): RTP2021 or RTP2025. - modelrun_alias (str): Alias for the model run, used for labeling output. @@ -231,7 +206,8 @@ def slr_protection(rtp, modelrun_alias, modelrun_id, modelrun_data, output_path, - output_path (str): File path for saving the output results - append_output (bool): True if appending output; False if writing - Writes metrics_slrProtection.csv to output_path, appending if append_output is True. Columns are: + Writes metrics_healthy1_hazard_resilience_SLR.csv to output_path, appending if append_output is True. Columns are: + - modelrun_id - modelrun_alias - hazard - area_alias @@ -250,8 +226,9 @@ def slr_protection(rtp, modelrun_alias, modelrun_id, modelrun_data, output_path, geog_name = 'eir_coc_id' if rtp=="RTP2021" else 'epc_id' # SLR parcels - all parcels in the SLR input files that are inundated or mitigated - slr_area = [df.inundation.isin([12,24,10,20,100]), (df.inundation.isin([12,24,10,20,100]) & (df[geog_name].notnull()))] - slr_protected_area = [df.inundation == 100, (df.inundation == 100) & (df[geog_name].notnull())] + slr_area = [((df.slr_nodev == True) | (df.slr_mitigation == True)), + (((df.slr_nodev == True) | (df.slr_mitigation == True)) & (df[geog_name].notnull()))] + slr_protected_area = [df.slr_mitigation == True, ((df.slr_mitigation == True) & (df[geog_name].notnull()))] protected_households_pct = [] for slr, slr_protected in zip(slr_area, slr_protected_area): diff --git a/scripts/metrics/metrics_lu_standalone.py b/scripts/metrics/metrics_lu_standalone.py index 3d515ed9..2999c1e4 100644 --- a/scripts/metrics/metrics_lu_standalone.py +++ b/scripts/metrics/metrics_lu_standalone.py @@ -230,8 +230,7 @@ def main(): metrics_healthy.urban_park_acres( BOX_DIR, args.rtp, modelrun_alias, modelrun_id, modelrun_data, OUTPUT_PATH, append_output) metrics_healthy.non_greenfield_development_share( - args.rtp, modelrun_alias, modelrun_id, modelrun_data, run_directory_path, - OUTPUT_PATH, append_output) + args.rtp, modelrun_alias, modelrun_id, modelrun_data, OUTPUT_PATH, append_output) metrics_healthy.slr_protection( args.rtp, modelrun_alias, modelrun_id, modelrun_data, OUTPUT_PATH, append_output) diff --git a/scripts/metrics/metrics_utils.py b/scripts/metrics/metrics_utils.py index 3aaaee68..597c8ecf 100644 --- a/scripts/metrics/metrics_utils.py +++ b/scripts/metrics/metrics_utils.py @@ -18,16 +18,10 @@ pba50_geography_crosswalk_df = pd.DataFrame() # parcel -> PBA50 growth geographies for use in rtp2025 metrics -rtp2025_np_parcel_inundation_df = pd.DataFrame() # parcel -> parcel sea level rise inundation -rtp2025_dbp_parcel_inundation_df = pd.DataFrame() # parcel -> parcel sea level rise inundation - rtp2021_tract_crosswalk_df = pd.DataFrame() # parcel -> tracts, including coc/epc, displacement, growth geography, HRA, TRA, PPA rtp2021_pda_crosswalk_df = pd.DataFrame() # parcel -> PDA (pda_id_pba50_fb) rtp2021_geography_crosswalk_df = pd.DataFrame() # parcel -> parcel category (fbpchcat -> growth geog, hra, tra), jurisdiction -rtp2021_np_parcel_inundation_df = pd.DataFrame() # parcel -> parcel sea level rise inundation -rtp2021_fbp_parcel_inundation_df = pd.DataFrame() # parcel -> parcel sea level rise inundation - PARCEL_AREA_FILTERS = { 'RTP2021': { 'HRA' : lambda df: df['hra_id'] == 'HRA', @@ -96,9 +90,10 @@ def load_data_for_runs( Returns: - dict with year -> { - "parcel" -> parcel DataFrame, - "county" -> county DataFrame, - "TAZ1454"-> taz DataFrame (necessary for totpop, which is only tabulated for TAZs) + "parcel" -> parcel DataFrame, + "buildings" -> buildings DataFrame, + "county" -> county DataFrame, + "TAZ1454" -> taz DataFrame (necessary for totpop, which is only tabulated for TAZs) } """ @@ -111,15 +106,11 @@ def load_data_for_runs( global rtp2025_parcel_taz_crosswalk_df global parcel_taz_sd_crosswalk_df - global rtp2025_np_parcel_inundation_df - global rtp2025_dbp_parcel_inundation_df global pba50_geography_crosswalk_df global rtp2021_geography_crosswalk_df global rtp2021_tract_crosswalk_df global rtp2021_pda_crosswalk_df - global rtp2021_np_parcel_inundation_df - global rtp2021_fbp_parcel_inundation_df CROSSWALKS_DIR = M_DRIVE / "urban_modeling" / "baus" / "BAUS Inputs" / "basis_inputs" / "crosswalks" @@ -362,19 +353,6 @@ def load_data_for_runs( logging.debug("rtp2025_parcel_taz_crosswalk_df.head():\n{}".format(rtp2025_parcel_taz_crosswalk_df)) logging.debug("rtp2025_parcel_taz_crosswalk_df.dtypes():\n{}".format(rtp2025_parcel_taz_crosswalk_df.dtypes)) - - - if len(rtp2025_np_parcel_inundation_df) == 0: - PARCEL_INUNDATION_FILE = METRICS_DIR / "metrics_input_files" / "slr_parcel_inundation_PBA50Plus_NP.csv" - rtp2025_np_parcel_inundation_df = pd.read_csv(PARCEL_INUNDATION_FILE) - logging.info(" Read {:,} rows from crosswalk {}".format(len(rtp2025_np_parcel_inundation_df), PARCEL_INUNDATION_FILE)) - logging.debug(" rtp2025_np_parcel_inundation_df.head():\n{}".format(rtp2025_np_parcel_inundation_df.head())) - - if len(rtp2025_dbp_parcel_inundation_df) == 0: - PARCEL_INUNDATION_FILE = METRICS_DIR / "metrics_input_files" / "slr_parcel_inundation_PBA50Plus_DBP.csv" - rtp2025_dbp_parcel_inundation_df = pd.read_csv(PARCEL_INUNDATION_FILE) - logging.info(" Read {:,} rows from crosswalk {}".format(len(rtp2025_dbp_parcel_inundation_df), PARCEL_INUNDATION_FILE)) - logging.debug(" rtp2025_dbp_parcel_inundation_df.head():\n{}".format(rtp2025_dbp_parcel_inundation_df.head())) # define analysis years if skip_base_year: @@ -385,6 +363,8 @@ def load_data_for_runs( modelrun_data[2025] = {} # for later interpolation to 2023 modelrun_data[2050] = {} parcel_pattern = "core_summaries/*_parcel_summary_{}.csv" + buildings_pattern = "core_summaries/*_building_summary_{}.csv" + slr_parcel_pattern = "hazards_summaries/*_slr_parcel_summary_{}.csv" geo_summary_pattern = "geographic_summaries/*_county_summary_{}.csv" taz1_summary_pattern = "travel_model_summaries/*_taz1_summary_{}.csv" taz1_interim_summary_pattern = "core_summaries/*_interim_zone_output_{}.csv" @@ -504,7 +484,9 @@ def load_data_for_runs( if len(rtp2021_geography_crosswalk_df) == 0: # pba50_metrics.py called this "parcel_geography_file" - use it to get fbpchcat GEOGRAPHY_CROSSWALK_FILE = METRICS_DIR / "metrics_input_files" / "2021_02_25_parcels_geography.csv" - rtp2021_geography_crosswalk_df = pd.read_csv(GEOGRAPHY_CROSSWALK_FILE, usecols=['PARCEL_ID','fbpchcat','ppa_id','eir_coc_id', 'juris_name_full']) + rtp2021_geography_crosswalk_df = pd.read_csv(GEOGRAPHY_CROSSWALK_FILE, usecols=['PARCEL_ID', 'ACRES', 'fbpchcat','ppa_id','eir_coc_id', 'juris_name_full', 'urbanized']) + # match RTP2025 column name + rtp2021_geography_crosswalk_df.rename(columns={"urbanized": "in_urban_area"}, inplace=True) logging.info(" Read {:,} rows from crosswalk {}".format(len(rtp2021_geography_crosswalk_df), GEOGRAPHY_CROSSWALK_FILE)) logging.debug(" rtp2021_geography_crosswalk_df.head():\n{}".format(rtp2021_geography_crosswalk_df.head())) @@ -531,29 +513,19 @@ def load_data_for_runs( rtp2021_geography_crosswalk_df['jurisdiction'] = rtp2021_geography_crosswalk_df.jurisdiction.str.replace("St ","St. ") # St. Helena logging.debug(f"rtp2021_geography_crosswalk_df.jurisdiction.value_counts(dropna=False):\n{rtp2021_geography_crosswalk_df.jurisdiction.value_counts(dropna=False)}") - if len(rtp2021_np_parcel_inundation_df) == 0: - PARCEL_INUNDATION_FILE = METRICS_DIR / "metrics_input_files" / "slr_parcel_inundation_PBA50_NP.csv" - rtp2021_np_parcel_inundation_df = pd.read_csv(PARCEL_INUNDATION_FILE) - logging.info(" Read {:,} rows from file {}".format(len(rtp2021_np_parcel_inundation_df), PARCEL_INUNDATION_FILE)) - logging.debug(" rtp2021_np_parcel_inundation_df.head():\n{}".format(rtp2021_np_parcel_inundation_df.head())) - - if len(rtp2021_fbp_parcel_inundation_df) == 0: - PARCEL_INUNDATION_FILE = METRICS_DIR / "metrics_input_files" / "slr_parcel_inundation_PBA50_FBP.csv" - rtp2021_fbp_parcel_inundation_df = pd.read_csv(PARCEL_INUNDATION_FILE) - logging.info(" Read {:,} rows from crosswalk {}".format(len(rtp2021_fbp_parcel_inundation_df), PARCEL_INUNDATION_FILE)) - logging.debug(" rtp2021_fbp_parcel_inundation_df.head():\n{}".format(rtp2021_fbp_parcel_inundation_df.head())) - # define analysis years modelrun_data[2015] = {} modelrun_data[2050] = {} parcel_pattern = "*_parcel_data_{}.csv" + slr_parcel_pattern = "*_slr_parcel_summary_{}.csv" + buildings_pattern = "*_building_data_{}.csv" geo_summary_pattern = "*_county_summaries_{}.csv" taz1_summary_pattern = "*_taz_summaries_{}.csv" else: raise ValueError(f"Unrecognized plan: {rtp}") - # Load parcels summaries + # Load parcel summaries for year in sorted(modelrun_data.keys()): # handle RTP2021 hacks if (rtp=="RTP2021") and (year == 2050) and (modelrun_alias=="No Project"): @@ -577,6 +549,24 @@ def load_data_for_runs( logging.debug("Head:\n{}".format(parcel_df.head())) logging.debug("preserved_units.value_counts():\n{}".format(parcel_df['preserved_units'].value_counts(dropna=False))) + # also add parcel-level sea level rise summaries and merge them to the parcels table + logging.debug("Looking for sea level rise parcel data matching {}".format(slr_parcel_pattern).format(year)) + file = next(run_directory_path.glob(slr_parcel_pattern.format(year))) + logging.debug(f"Found {file}") + slr_parcel_df = pd.read_csv(file) + logging.info(" Read {:,} rows from slr parcel file {}".format(len(slr_parcel_df), file)) + logging.debug("Head:\n{}".format(slr_parcel_df.head())) + parcel_df = pd.merge( + left = parcel_df, + right = slr_parcel_df, + how = "left", + left_on = "parcel_id", + right_on = "parcel_id", + validate = "one_to_one" + ) + logging.debug("Head after merge with slr_parcel_df:\n{}".format(parcel_df.head())) + logging.debug("slr_parcel_df.dtypes:\n{}".format(parcel_df.dtypes)) + if rtp == "RTP2025": # add geography crosswalk for zoning categories parcel_df = pd.merge( @@ -649,29 +639,6 @@ def load_data_for_runs( logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes)) logging.debug("Head after merge with rtp2025_urban_area_crosswalk_df:\n{}".format(parcel_df.head())) - # add parcel sea level rise inundation based on the Plan scenario - this_modelrun_alias = classify_runid_alias(modelrun_alias) - if this_modelrun_alias == "NP": - parcel_df = pd.merge( - left = parcel_df, - right = rtp2025_np_parcel_inundation_df, - how = "left", - on = "parcel_id", - validate = "one_to_one" - ) - logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes)) - logging.debug("Head after merge with rtp2025_np_parcel_inundation_df:\n{}".format(parcel_df.head())) - elif this_modelrun_alias == "DBP": - parcel_df = pd.merge( - left = parcel_df, - right = rtp2025_dbp_parcel_inundation_df, - how = "left", - on = "parcel_id", - validate = "one_to_one" - ) - logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes)) - logging.debug("Head after merge with rtp2025_dbp_parcel_inundation_df:\n{}".format(parcel_df.head())) - # rtp2025_tract_crosswalk_df.columns should all be ints -- convert cols_int64 = ['tract10','tract20'] cols_int = ['tract20_epc','tract20_growth_geo','tract20_tra','tract20_hra','tract10_DispRisk','in_urban_area'] @@ -722,29 +689,6 @@ def load_data_for_runs( logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes)) logging.debug("Head after merge with rtp2025_tract_crosswalk_df:\n{}".format(parcel_df.head())) - # add parcel sea level rise inundation *input* based on the scenario - this_modelrun_alias = classify_runid_alias(modelrun_alias) - if this_modelrun_alias == "NP": - parcel_df = pd.merge( - left = parcel_df, - right = rtp2021_np_parcel_inundation_df, - how = "left", - on = "parcel_id", - validate = "one_to_one" - ) - logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes)) - logging.debug("Head after merge with rtp2021_np_parcel_inundation_df:\n{}".format(parcel_df.head())) - else: - parcel_df = pd.merge( - left = parcel_df, - right = rtp2021_fbp_parcel_inundation_df, - how = "left", - on = "parcel_id", - validate = "one_to_one" - ) - logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes)) - logging.debug("Head after merge with rtp2021_fbp_parcel_inundation_df:\n{}".format(parcel_df.head())) - # Merge the tract and coc crosswalks parcel_df = parcel_df.merge(rtp2021_tract_crosswalk_df, on="parcel_id", how="left") logging.debug("parcel_df after first merge with tract crosswalk:\n{}".format(parcel_df.head(30))) @@ -755,6 +699,8 @@ def load_data_for_runs( # Retain only a subset of columns after merging columns_to_keep = ['parcel_id', 'tract10', 'fbpchcat', 'gg_id', 'tra_id', 'hra_id', 'dis_id', 'ppa_id', 'eir_coc_id','jurisdiction', + # greenfield columns + 'in_urban_area', 'ACRES', 'zone_id', 'county', 'superdistrict', 'hhq1', 'hhq2', 'hhq3', 'hhq4', 'tothh', 'totemp', @@ -771,14 +717,44 @@ def load_data_for_runs( # use after may 3 2024 'np','cur','dbp', - # sea level rise column - "inundation"] + # sea level rise columns + "slr_nodev", "slr_mitigation"] parcel_df = parcel_df[columns_to_keep] logging.debug("parcel_df:\n{}".format(parcel_df.head(30))) modelrun_data[year]['parcel'] = parcel_df + # Load building data for horizon year + horizon_year = sorted(modelrun_data.keys())[-1] + logging.debug("Looking for buildings summary matching {}".format(buildings_pattern.format(horizon_year))) + file = next(run_directory_path.glob(buildings_pattern.format(horizon_year))) + logging.debug(f"Found {file}") + buildings_df = pd.read_csv(file) + logging.info(" Read {:,} rows from buildinsg summary {}".format(len(buildings_df), file)) + logging.debug("Head:\n{}".format(buildingss_df)) + + # merge parcel information for horizon year onto buildings + parcel_df = modelrun_data[horizon_year]['parcel'] + # if RTP2021 get non_residential_sqft from the buildings table + if rtp=="RTP2021": + parcel_df = parcel_df.merge(buildings_df[['parcel_id', 'non_residential_sqft']].groupby(['parcel_id']).sum(), on='parcel_id', how='left') + # distinguish the column names from the buildings table names (these are parcel totals) + parcels = parcel_df[['parcel_id', 'residential_units', 'non_residential_sqft', 'ACRES', 'in_urban_area']].\ + rename(columns={"residential_units": "residential_units_total", "non_residential_sqft": "non_residential_sqft_total", + "ACRES": "parcel_acres"}) + buildings_df = pd.merge( + left = buildings_df, + right = parcels, + how = "left", + on = "parcel_id", + validate = "many_to_one" + ) + logging.debug("Head after merge with parcel_df:\n{}".format(buildings_df.head())) + logging.debug("Length after merge with parcel_df:\n{}".format(len(buildings_df))) + + modelrun_data[horizon_year]['buildings'] = buildings_df + # Load county summaries for year in sorted(modelrun_data.keys()): logging.debug("Looking for geographic summaries matching {}".format(geo_summary_pattern.format(year))) @@ -877,7 +853,7 @@ def load_data_for_runs( df = df1.copy() for col in df.columns: - if pd.api.types.is_numeric_dtype(df[col]): + if (type(df[col]) == int) or (type(df[col]) == float): # Long way to write 3/5 but maybe it'll pay off in future... :) df[col] = df1[col] + ((2023 - t1) / (t2 - t1))*(df2[col] - df1[col])