Skip to content

Commit bd96919

Browse files
committed
adds capacity_conversion_group column
fixes #183
1 parent 7581b2d commit bd96919

File tree

6 files changed

+112
-0
lines changed

6 files changed

+112
-0
lines changed

aggregated_data/ecds.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def get_ecds_data(spark: SparkSession) -> DataFrame:
2727
F.col("type"),
2828
F.col("hsagrp"),
2929
F.col("ndggrp"),
30+
F.col("capacity_conversion_group"),
3031
F.col("icb"),
3132
F.col("is_main_icb"),
3233
F.col("is_adult"),

aggregated_data/outpatients.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def get_outpatients_data(spark: SparkSession) -> DataFrame:
2525
F.col("pod"),
2626
F.col("hsagrp"),
2727
F.col("ndggrp"),
28+
F.col("capacity_conversion_group"),
2829
F.col("has_procedures"),
2930
F.col("sushrg").substr(1, 4).alias("sushrg_trimmed"),
3031
F.col("icb"),

raw_data/aae.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
from raw_data.helpers import add_age_group_column
1212

1313

14+
def create_capacity_conversion_group():
15+
# can't create capacity groups on AAE data
16+
return F.lit("aae-unknown")
17+
18+
1419
def get_aae_data(spark: SparkSession) -> None:
1520
"""Get AAE data
1621
@@ -192,6 +197,7 @@ def get_aae_data(spark: SparkSession) -> None:
192197
.withColumn("tretspef_grouped", F.lit("Other"))
193198
.withColumn("pod", F.concat(F.lit("aae_type-"), F.col("aedepttype")))
194199
.withColumn("ndggrp", F.col("group"))
200+
.withColumn("capacity_conversion_group", create_capacity_conversion_group())
195201
.repartition("fyear", "provider")
196202
)
197203

raw_data/ecds.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@
1313
from raw_data.helpers import add_age_group_column
1414

1515

16+
def create_capacity_conversion_group():
17+
is_child = F.col("age") <= 17
18+
19+
return (
20+
F.when(F.col("acuity") == "immediate-resuscitation", "aae-resus")
21+
.when(is_child, "aae-childrens")
22+
.when(F.col("acuity").isin(["urgent", "very-urgent"]), "aae-majors")
23+
.otherwise("aae-minors")
24+
)
25+
26+
1627
def get_ecds_data(spark: SparkSession) -> None:
1728
"""Get ECDS data"""
1829
df = spark.read.table("hes.silver.ecds")
@@ -239,6 +250,7 @@ def get_ecds_data(spark: SparkSession) -> None:
239250
.withColumn("tretspef_grouped", F.lit("Other"))
240251
.withColumn("pod", F.concat(F.lit("aae_type-"), F.col("aedepttype")))
241252
.withColumn("ndggrp", F.col("group"))
253+
.withColumn("capacity_conversion_group", create_capacity_conversion_group())
242254
.repartition("fyear", "provider")
243255
)
244256

raw_data/inpatients.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Generate inpatients data"""
22

3+
from operator import is_
4+
35
from databricks.connect import DatabricksSession
46
from delta.tables import DeltaTable
57
from pyspark.sql import SparkSession
@@ -11,6 +13,74 @@
1113
from raw_data.helpers import add_age_group_column, add_tretspef_grouped_column
1214

1315

16+
def create_capacity_conversion_group():
17+
is_child = F.col("age") <= 17
18+
is_surgical_specialty = F.col("tretspef").rlike("^1(?!80|9[012])")
19+
is_zero_los = F.col("speldur") == 0
20+
is_elective = F.col("group") == "elective"
21+
is_nonelective = F.col("group") == "non-elective"
22+
23+
# the logic for this will fall through, so we do not need to do thinks like apply an "is_adult"
24+
# filter after filtering for is_child.
25+
return (
26+
# daycases
27+
F.when(
28+
F.col("classpat").isin(["2", "3"]),
29+
F.when(is_child, "ip-daycase-childrens")
30+
.when(F.col("tretspef").isin(["320", "321"]), "ip-daycase-cardiology")
31+
.when(
32+
F.col("tretspef").isin(["280", "811"]),
33+
"ip-daycase-interventional_radiology",
34+
)
35+
# TODO: add endoscopy
36+
.when(
37+
F.col("tretspef").isin(["253", "260", "303", "370", "800"]),
38+
"ip-daycase-oncology_haematology",
39+
)
40+
.when(is_surgical_specialty, "ip-daycase-surgical")
41+
.otherwise("ip-daycase-non_surgical"),
42+
)
43+
# everything else will be non-daycase
44+
# maternity admissions
45+
.when(F.col("tretspef") == "501", "ip-maternity-obstetric")
46+
.when(F.col("tretspef") == "560", "ip-maternity-midwife_led")
47+
.when(F.col("group") == "maternity", "ip-maternity-unknown")
48+
# paediatric admissions
49+
.when(
50+
is_child,
51+
F.when(
52+
is_zero_los & is_nonelective, "ip-childrens-assessment_unit"
53+
).otherwise("ip-childrens-inpatients"),
54+
)
55+
# adult admissions
56+
# elective admissions
57+
# TODO: add ip-stroke
58+
.when(
59+
is_elective,
60+
F.when(
61+
is_surgical_specialty,
62+
F.when(
63+
F.col("speldur") <= 3, "ip-elective-surgical-short_stay"
64+
).otherwise("ip-elective-surgical-long_stay"),
65+
).otherwise(
66+
F.when(
67+
F.col("speldur") <= 3, "ip-elective-non_surgical-short_stay"
68+
).otherwise("ip-elective-non_surgical-long_stay")
69+
),
70+
)
71+
# non-elective admissions
72+
.when(is_zero_los, "ip-adult_acute_assessment")
73+
.when(
74+
is_surgical_specialty,
75+
F.when(F.col("speldur") <= 3, "ip-acute-surgical-short_stay").otherwise(
76+
"ip-acute-surgical-longer_stay"
77+
),
78+
)
79+
.when(F.col("speldur") <= 3, "ip-acute-non_surgical-short_stay")
80+
.otherwise("ip-acute-non_surgical-longer_stay")
81+
)
82+
83+
1484
def get_inpatients_data(spark: SparkSession) -> None:
1585
"""Get Inpatients Data"""
1686
# Spell has maternity delivery episode
@@ -92,6 +162,8 @@ def get_inpatients_data(spark: SparkSession) -> None:
92162
# add in primary diagnosis and procedure columns
93163
.join(df_primary_diagnosis, ["epikey", "fyear", "procode3"], "left")
94164
.join(df_primary_procedure, ["epikey", "fyear", "procode3"], "left")
165+
# capacity conversion
166+
.withColumn("capacity_conversion_group", create_capacity_conversion_group())
95167
.select(
96168
F.col("epikey"),
97169
F.col("fyear"),
@@ -110,6 +182,7 @@ def get_inpatients_data(spark: SparkSession) -> None:
110182
F.col("tretspef_grouped"),
111183
F.col("hsagrp"),
112184
F.col("group"),
185+
F.col("capacity_conversion_group"),
113186
F.col("admidate"),
114187
F.col("disdate"),
115188
F.col("speldur"),
@@ -186,3 +259,5 @@ def main() -> None:
186259

187260
if __name__ == "__main__":
188261
main()
262+
main()
263+
main()

raw_data/outpatients.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,22 @@
1111
from raw_data.helpers import add_age_group_column, add_tretspef_grouped_column
1212

1313

14+
def create_capacity_conversion_group():
15+
is_maternity = F.col("trestpef").isin(["424", "501", "505", "560"])
16+
is_child = F.col("age") <= 17
17+
18+
return F.when(
19+
F.col("has_procedures"),
20+
F.when(is_maternity, "op-procedure-maternity")
21+
.when(is_child, "op-procedure-childrens")
22+
.otherwise("op-procedure-adult"),
23+
).otherwise(
24+
F.when(is_maternity, "op-maternity")
25+
.when(is_child, "op-childrens")
26+
.otherwise("op-adult")
27+
)
28+
29+
1430
def get_outpatients_data(spark: SparkSession) -> None:
1531
"""Get Outpatients Data"""
1632
df = read_data_with_provider(spark, "hes.silver.opa")
@@ -144,6 +160,7 @@ def get_outpatients_data(spark: SparkSession) -> None:
144160
.when(F.col("is_first"), "op_first")
145161
.otherwise("op_follow-up"),
146162
)
163+
.withColumn("capacity_conversion_group", create_capacity_conversion_group())
147164
.withColumn("ndggrp", F.col("group"))
148165
)
149166

0 commit comments

Comments
 (0)