@@ -226,7 +226,14 @@ def get_op(self) -> pd.DataFrame:
226226 .withColumn ("dataset" , F .lit ("NATIONAL" ))
227227 .withColumn ("sitetret" , F .lit ("NATIONAL" ))
228228 .groupBy (
229- op .drop ("index" , "fyear" , "attendances" , "tele_attendances" , "sushrg_trimmed" , "imd_quintile" ).columns
229+ op .drop (
230+ "index" ,
231+ "fyear" ,
232+ "attendances" ,
233+ "tele_attendances" ,
234+ "sushrg_trimmed" ,
235+ "imd_quintile" ,
236+ ).columns
230237 )
231238 .agg (
232239 (F .sum ("attendances" ) * self ._sample_rate ).alias ("attendances" ),
@@ -269,9 +276,7 @@ def get_birth_factors(self) -> pd.DataFrame:
269276 """
270277
271278 return (
272- self ._spark .read .parquet (
273- "/Volumes/nhp/population_projections/files/birth_data/"
274- )
279+ self ._spark .read .table ("nhp.population_projections.births" )
275280 .filter (F .col ("area_code" ).rlike ("^E0[6-9]" ))
276281 .withColumn ("sex" , F .lit (2 ))
277282 .groupBy ("projection" , "age" , "sex" )
@@ -289,10 +294,7 @@ def get_demographic_factors(self) -> pd.DataFrame:
289294 """
290295
291296 return (
292- self ._spark .read .parquet (
293- "/Volumes/nhp/population_projections/files/demographic_data/projection=principal_proj"
294- )
295- .withColumn ("projection" , F .lit ("principal_proj" ))
297+ self ._spark .read .table ("nhp.population_projections.demographics" )
296298 .filter (F .col ("area_code" ).rlike ("^E0[6-9]" ))
297299 .groupBy ("projection" , "age" , "sex" )
298300 .pivot ("year" )
0 commit comments