@@ -226,7 +226,7 @@ def get_op(self) -> pd.DataFrame:
226226 .withColumn ("dataset" , F .lit ("NATIONAL" ))
227227 .withColumn ("sitetret" , F .lit ("NATIONAL" ))
228228 .groupBy (
229- op .drop ("index" , "fyear" , "attendances" , "tele_attendances" ).columns
229+ op .drop ("index" , "fyear" , "attendances" , "tele_attendances" , "sushrg_trimmed" , "imd_quintile" ).columns
230230 )
231231 .agg (
232232 (F .sum ("attendances" ) * self ._sample_rate ).alias ("attendances" ),
@@ -270,7 +270,7 @@ def get_birth_factors(self) -> pd.DataFrame:
270270
271271 return (
272272 self ._spark .read .parquet (
273- "/Volumes/su_data/ nhp/population-projections/ birth_data"
273+ "/Volumes/nhp/population_projections/files/ birth_data/ "
274274 )
275275 .filter (F .col ("area_code" ).rlike ("^E0[6-9]" ))
276276 .withColumn ("sex" , F .lit (2 ))
@@ -290,8 +290,9 @@ def get_demographic_factors(self) -> pd.DataFrame:
290290
291291 return (
292292 self ._spark .read .parquet (
293- "/Volumes/su_data/ nhp/population-projections/ demographic_data"
293+ "/Volumes/nhp/population_projections/files/ demographic_data/projection=principal_proj "
294294 )
295+ .withColumn ("projection" , F .lit ("principal_proj" ))
295296 .filter (F .col ("area_code" ).rlike ("^E0[6-9]" ))
296297 .groupBy ("projection" , "age" , "sex" )
297298 .pivot ("year" )
@@ -307,7 +308,7 @@ def get_hsa_activity_table(self) -> pd.DataFrame:
307308 :rtype: pd.DataFrame
308309 """
309310 return (
310- self ._spark .read .table ("hsa_activity_tables_NATIONAL " )
311+ self ._spark .read .table ("nhp.default.hsa_activity_tables_national " )
311312 .filter (F .col ("fyear" ) == self ._year * 100 + (self ._year + 1 ) % 100 )
312313 .groupBy ("hsagrp" , "sex" , "age" )
313314 .agg (F .mean ("activity" ).alias ("activity" ))
0 commit comments