|
| 1 | +# SPDX-FileCopyrightText: gb-dispatch-model contributors |
| 2 | +# |
| 3 | +# SPDX-License-Identifier: MIT |
| 4 | + |
| 5 | + |
| 6 | +""" |
| 7 | +GSP-level data table generator. |
| 8 | +
|
| 9 | +This is a script to combine the BB1 sheet with the BB2 (metadata) sheet of the FES workbook. |
| 10 | +""" |
| 11 | + |
| 12 | +import logging |
| 13 | +import re |
| 14 | +import zipfile |
| 15 | +from pathlib import Path |
| 16 | + |
| 17 | +import geopandas as gpd |
| 18 | +import pandas as pd |
| 19 | + |
| 20 | +from scripts._helpers import configure_logging, set_scenario_config |
| 21 | +from scripts.gb_model._helpers import ( |
| 22 | + get_scenario_name, |
| 23 | +) |
| 24 | +from scripts.gb_model.preprocess.process_fes_gsp_data import ( |
| 25 | + process_bb1_data, |
| 26 | + process_gsp_coordinates, |
| 27 | +) |
| 28 | + |
| 29 | +logger = logging.getLogger(__name__) |
| 30 | + |
| 31 | + |
| 32 | +def _merge_gsps(df: pd.DataFrame, gsps: str, key: str) -> pd.DataFrame: |
| 33 | + """ |
| 34 | + Merge multiple GSPs into a single GSP, by dissolving the geometries |
| 35 | +
|
| 36 | + Parameters |
| 37 | + ---------- |
| 38 | + df: pd.DataFrame |
| 39 | + The dataframe containing the GSPs to merge |
| 40 | + gsps: str |
| 41 | + The GSPs to merge, as a single string with "|" separating the different GSPs (e.g. "GSP1|GSP2|GSP3")s |
| 42 | + key: str |
| 43 | + column to merge the GSPs by |
| 44 | + """ |
| 45 | + |
| 46 | + # All occurrences of the GSPs to merge, i.e. all rows where any of the GSPs to merge are mentioned in the "GSPs" column (there may be multiple rows for each GSP if there are multiple GSPs to merge) |
| 47 | + all_occurrences = df.loc[ |
| 48 | + (df[key].str.contains(rf"({gsps})\b", regex=True)) & (df[key].notna()) |
| 49 | + ] |
| 50 | + |
| 51 | + if len(all_occurrences) > 1: |
| 52 | + # Dissolve the rows of geometries matching the GSPs to merge into a single row |
| 53 | + if key == "GSP": |
| 54 | + geo_key = "geometryshape" |
| 55 | + else: |
| 56 | + geo_key = "geometrycoord" |
| 57 | + df.loc[df[key] == gsps, geo_key] = ( |
| 58 | + df.loc[all_occurrences.index] |
| 59 | + .set_geometry(geo_key) |
| 60 | + .dissolve() |
| 61 | + .iloc[0][geo_key] |
| 62 | + ) |
| 63 | + |
| 64 | + if key == "GSPs": |
| 65 | + # Assign the GSP name as the concatenation of the GSP names of the merged GSPs, separated by "|" |
| 66 | + df.loc[df[key] == gsps, "GSP"] = all_occurrences[ |
| 67 | + all_occurrences[key] != gsps |
| 68 | + ].GSP.str.cat(sep="|") |
| 69 | + |
| 70 | + # Drop the other rows of the merged GSPs, keeping only the dissolved row |
| 71 | + indices = all_occurrences.index.tolist() |
| 72 | + indices_filter = [ |
| 73 | + x |
| 74 | + for x in all_occurrences.index |
| 75 | + if x not in df.loc[df[key] == gsps].index.tolist() |
| 76 | + ] |
| 77 | + if len(indices_filter) == len(indices): |
| 78 | + # For merging the GSPs to combine busbars, the earlier filter will not work |
| 79 | + retain_row = gsps.split("|")[-1] |
| 80 | + indices_filter = [ |
| 81 | + x |
| 82 | + for x in all_occurrences.loc[ |
| 83 | + all_occurrences["GSPs"] != retain_row |
| 84 | + ].index.tolist() |
| 85 | + ] |
| 86 | + df.drop(index=indices_filter, inplace=True) |
| 87 | + |
| 88 | + return df |
| 89 | + |
| 90 | + |
| 91 | +def create_gsp_shapefile( |
| 92 | + df_gsp_coordinates: pd.DataFrame, |
| 93 | + df_gsp_shapes: gpd.GeoDataFrame, |
| 94 | + df_bb1: pd.DataFrame, |
| 95 | + gsp_mapping: dict, |
| 96 | + combine_gsps: dict, |
| 97 | +): |
| 98 | + """ |
| 99 | + Create a GSP shapefile by combining FES BB1 sheet data, GSP coordinate data and GSP shape data |
| 100 | +
|
| 101 | + Parameters |
| 102 | + ---------- |
| 103 | + df_gsp_coordinates: pd.DataFrame |
| 104 | + The GSP coordinate data dataframe |
| 105 | + df_gsp_shape: gpd.GeoDataFrame |
| 106 | + GSP polygon shape data |
| 107 | + df_bb1: pd.DataFrame |
| 108 | + FES BB1 sheet dataframe |
| 109 | + gsp_mapping: dict |
| 110 | + Manual mapping of GSP names between the FES workbook and the GSP coordinate/shape data |
| 111 | + combine_gsps: dict |
| 112 | + Groups of GSPs to combine |
| 113 | + """ |
| 114 | + |
| 115 | + # Convert the GSP coordinate data to a GeoDataFrame |
| 116 | + gdf_gsps = gpd.GeoDataFrame( |
| 117 | + df_gsp_coordinates, |
| 118 | + geometry=gpd.points_from_xy( |
| 119 | + df_gsp_coordinates.Longitude, df_gsp_coordinates.Latitude |
| 120 | + ), |
| 121 | + crs="EPSG:4326", |
| 122 | + ) |
| 123 | + |
| 124 | + df_bb1_gsp = pd.DataFrame(data=df_bb1.GSP.unique(), columns=["GSP"]) |
| 125 | + df_bb1_gsp["GSP"] = df_bb1_gsp["GSP"].replace(gsp_mapping) |
| 126 | + |
| 127 | + # Join GSP shape data with GSP coordinate data |
| 128 | + gsp_joined = df_gsp_shapes.set_index("GSPs").join( |
| 129 | + gdf_gsps.set_index("GSP ID"), lsuffix="shape", rsuffix="coord", how="outer" |
| 130 | + ) |
| 131 | + |
| 132 | + fes_merged = pd.merge( |
| 133 | + df_bb1_gsp, |
| 134 | + gsp_joined.reset_index(), |
| 135 | + left_on="GSP", |
| 136 | + right_on="Name", |
| 137 | + how="outer", |
| 138 | + ) |
| 139 | + |
| 140 | + # Drop unrequired columns |
| 141 | + fes_merged.drop( |
| 142 | + columns=["GSP Group", "Minor FLOP", "Latitude", "Longitude"], inplace=True |
| 143 | + ) |
| 144 | + |
| 145 | + # Some GSPs have not been matched with shape data as they are part of a geometry shape containing multiple GSPs |
| 146 | + unmatched_gsps = fes_merged.loc[ |
| 147 | + (fes_merged.geometrycoord.isna()) & (fes_merged.GSPs.notna()) |
| 148 | + ] |
| 149 | + for gsps in unmatched_gsps["GSPs"].tolist(): |
| 150 | + fes_merged = _merge_gsps(fes_merged, gsps, "GSPs") |
| 151 | + logger.info( |
| 152 | + "Merged GSP shape and coordinate data for GSPs that are part of a combined geometry shape in the FES workbook" |
| 153 | + ) |
| 154 | + |
| 155 | + # Some busbars are split into multiple GSPs in the FES workbook but represented as a single GSP in shape data |
| 156 | + for key in combine_gsps.keys(): |
| 157 | + combine_gsps[key].append(key) |
| 158 | + gsps = "|".join(combine_gsps[key]) |
| 159 | + fes_merged = _merge_gsps(fes_merged, gsps, "GSPs") |
| 160 | + logger.info( |
| 161 | + "Merged GSP shape and coordinate data for busbars that are split into multiple GSPs in the FES workbook but represented as a single GSP in shape data" |
| 162 | + ) |
| 163 | + |
| 164 | + # Some GSPs where coordinate data is available but shape data was not matched |
| 165 | + unmatched_gsp_name = fes_merged.loc[ |
| 166 | + (fes_merged.geometryshape.isna()) & (fes_merged.GSPs.notna()) |
| 167 | + ] |
| 168 | + for gsps in unmatched_gsp_name["GSP"].tolist(): |
| 169 | + fes_merged = _merge_gsps(fes_merged, gsps, "GSP") |
| 170 | + logger.info( |
| 171 | + "Merged GSP shape and coordinate data for GSPs where coordinate data is available but shape data was not matched" |
| 172 | + ) |
| 173 | + |
| 174 | + fes_merged = gpd.GeoDataFrame(fes_merged, geometry="geometryshape", crs="EPSG:4326") |
| 175 | + |
| 176 | + # Merging GSPs with same GSP ID but different GSP groups. |
| 177 | + # Dissolving the shapes into a single GSP as the rows still have distinct geometries though adjoining each other |
| 178 | + fes_merged_notnan = fes_merged[fes_merged["geometryshape"].notna()] |
| 179 | + fes_merged_nan = fes_merged[fes_merged["geometryshape"].isna()] |
| 180 | + fes_merged_notnan = fes_merged_notnan.dissolve(by="GSPs", as_index=False) |
| 181 | + fes_merged = pd.concat([fes_merged_notnan, fes_merged_nan], ignore_index=True) |
| 182 | + logger.info("Merged GSP duplicates due to different GSP groups") |
| 183 | + |
| 184 | + fes_merged["geometrycoord"] = fes_merged["geometrycoord"].to_wkt() |
| 185 | + |
| 186 | + if (missing_shapes := fes_merged.geometryshape.isna()).any(): |
| 187 | + logger.warning( |
| 188 | + f"There are {missing_shapes.sum()} GSPs with missing shape information after merging the GSP shape and coordinate data. These GSPs will be kept in the output but with null geometry.\n" |
| 189 | + f"{fes_merged[missing_shapes][['GSP', 'GSPs']]}" |
| 190 | + ) |
| 191 | + |
| 192 | + return fes_merged |
| 193 | + |
| 194 | + |
| 195 | +if __name__ == "__main__": |
| 196 | + if "snakemake" not in globals(): |
| 197 | + from scripts._helpers import mock_snakemake |
| 198 | + |
| 199 | + snakemake = mock_snakemake(Path(__file__).stem) |
| 200 | + configure_logging(snakemake) |
| 201 | + set_scenario_config(snakemake) |
| 202 | + |
| 203 | + fes_scenario = get_scenario_name(snakemake) |
| 204 | + |
| 205 | + df_gsp_coordinates = process_gsp_coordinates( |
| 206 | + gsp_coordinates_path=snakemake.input.gsp_coordinates, |
| 207 | + extra_gsp_coordinates=snakemake.params.fill_gsp_lat_lons, |
| 208 | + ) |
| 209 | + |
| 210 | + df_bb1 = process_bb1_data( |
| 211 | + bb1_path=snakemake.input.bb1_sheet, |
| 212 | + fes_scenario=fes_scenario, |
| 213 | + year_range=snakemake.params.year_range, |
| 214 | + ) |
| 215 | + |
| 216 | + # Read the GSP shapefile |
| 217 | + CRS = 4326 |
| 218 | + zip_path = Path(snakemake.input.gsp_shapes) |
| 219 | + shp_filename = [ |
| 220 | + x |
| 221 | + for x in zipfile.ZipFile(zip_path).namelist() |
| 222 | + if bool(re.search(rf"Proj_{CRS}/.*_{CRS}_.*\.geojson$", x)) |
| 223 | + ][0] |
| 224 | + df_gsp_shapes = gpd.read_file(f"{zip_path}!{shp_filename}") |
| 225 | + |
| 226 | + shape = create_gsp_shapefile( |
| 227 | + df_gsp_coordinates, |
| 228 | + df_gsp_shapes, |
| 229 | + df_bb1, |
| 230 | + gsp_mapping=snakemake.params.manual_gsp_mapping, |
| 231 | + combine_gsps=snakemake.params.combine_gsps, |
| 232 | + ) |
| 233 | + |
| 234 | + logger.info(f"Exported the GSP shapefile to {snakemake.output.shapefile}") |
| 235 | + shape.to_file(snakemake.output.shapefile, driver="GeoJSON") |
0 commit comments