Skip to content

Commit acc8d7c

Browse files
authored
Merge pull request #317 from The-Strategy-Unit/316_update_volumes_pbm
Updates PBM notebook and data.databricks.DatabricksNational
2 parents 111fa56 + 7fccf22 commit acc8d7c

File tree

2 files changed

+8
-13
lines changed

2 files changed

+8
-13
lines changed

model/data/databricks.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def get_op(self) -> pd.DataFrame:
226226
.withColumn("dataset", F.lit("NATIONAL"))
227227
.withColumn("sitetret", F.lit("NATIONAL"))
228228
.groupBy(
229-
op.drop("index", "fyear", "attendances", "tele_attendances").columns
229+
op.drop("index", "fyear", "attendances", "tele_attendances", "sushrg_trimmed", "imd_quintile").columns
230230
)
231231
.agg(
232232
(F.sum("attendances") * self._sample_rate).alias("attendances"),
@@ -270,7 +270,7 @@ def get_birth_factors(self) -> pd.DataFrame:
270270

271271
return (
272272
self._spark.read.parquet(
273-
"/Volumes/su_data/nhp/population-projections/birth_data"
273+
"/Volumes/nhp/population_projections/files/birth_data/"
274274
)
275275
.filter(F.col("area_code").rlike("^E0[6-9]"))
276276
.withColumn("sex", F.lit(2))
@@ -290,8 +290,9 @@ def get_demographic_factors(self) -> pd.DataFrame:
290290

291291
return (
292292
self._spark.read.parquet(
293-
"/Volumes/su_data/nhp/population-projections/demographic_data"
293+
"/Volumes/nhp/population_projections/files/demographic_data/projection=principal_proj"
294294
)
295+
.withColumn("projection", F.lit("principal_proj"))
295296
.filter(F.col("area_code").rlike("^E0[6-9]"))
296297
.groupBy("projection", "age", "sex")
297298
.pivot("year")
@@ -307,7 +308,7 @@ def get_hsa_activity_table(self) -> pd.DataFrame:
307308
:rtype: pd.DataFrame
308309
"""
309310
return (
310-
self._spark.read.table("hsa_activity_tables_NATIONAL")
311+
self._spark.read.table("nhp.default.hsa_activity_tables_national")
311312
.filter(F.col("fyear") == self._year * 100 + (self._year + 1) % 100)
312313
.groupBy("hsagrp", "sex", "age")
313314
.agg(F.mean("activity").alias("activity"))

notebooks/national_run.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
# COMMAND ----------
3030

31-
dbutils.widgets.text("data_path", "/Volumes/su_data/nhp/old_nhp_data", "Data Path")
31+
dbutils.widgets.text("data_path", "/Volumes/nhp/model_data/files", "Data Path")
3232
dbutils.widgets.text("data_version", "dev", "Data Version")
3333
dbutils.widgets.text("params_file", "sample_params.json", "Params File")
3434
dbutils.widgets.text("sample_rate", "0.01", "Sample Rate")
@@ -66,14 +66,8 @@
6666

6767
# COMMAND ----------
6868

69-
# Check that the version is the same in the params and in the data_version variable above
70-
71-
assert dbutils.widgets.get('data_version').rsplit('.', 1)[0] == params["app_version"]
72-
73-
# COMMAND ----------
74-
75-
spark.catalog.setCurrentCatalog("su_data")
76-
spark.catalog.setCurrentDatabase("nhp")
69+
spark.catalog.setCurrentCatalog("nhp")
70+
spark.catalog.setCurrentDatabase("default")
7771

7872
# COMMAND ----------
7973

0 commit comments

Comments
 (0)