Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 45 additions & 7 deletions notebooks/national_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
import model as mdl
from model.data.databricks import DatabricksNational
from model.health_status_adjustment import HealthStatusAdjustmentInterpolated
from model.results import combine_results, generate_results_json
from model.results import combine_results, generate_results_json, save_results_files
from run_model import _run_model

os.environ["BATCH_SIZE"] = "8"
Expand Down Expand Up @@ -100,14 +100,17 @@

# COMMAND ----------

# save_full_model_results set to True
# This creates folders with the results for each of the 256 Monte Carlo simulations in notebooks/results/national/SCENARIONAME/CREATE_DATETIME

results_dict["inpatients"] = _run_model(
mdl.InpatientsModel,
params,
nhp_data,
hsa,
run_params,
pcallback,
False,
True,
)

# COMMAND ----------
Expand All @@ -124,7 +127,7 @@
hsa,
run_params,
pcallback,
False,
True,
)

# COMMAND ----------
Expand All @@ -141,7 +144,7 @@
hsa,
run_params,
pcallback,
False,
True,
)


Expand Down Expand Up @@ -213,6 +216,8 @@ def get_principal(df):

# COMMAND ----------

# JSON file

with open(f"results/{json_filename}.json", "rb") as f:
zipped_results = gzip.compress(f.read())

Expand All @@ -225,11 +230,44 @@ def get_principal(df):
# Metadata "dataset" needs to be SYNTHETIC otherwise it will not be viewable in outputs
metadata["dataset"] = "synthetic"

# COMMAND ----------

url = dbutils.secrets.get("nhpsa-results", "url")
sas = dbutils.secrets.get("nhpsa-results", "sas-token")
cont = ContainerClient.from_container_url(f"{url}?{sas}")
cont.upload_blob(
f"prod/dev/synthetic/{json_filename}.json.gz", zipped_results, metadata=metadata
f"prod/dev/synthetic/{json_filename}.json.gz", zipped_results, metadata=metadata, overwrite=True
)

# COMMAND ----------

# Save aggregated parquets as well (new format of model results, for future proofing)

saved_files = save_results_files(results, params)
for file in saved_files:
filename = file[8:]
with open(file, "rb") as f:
cont.upload_blob(
f"aggregated-model-results/dev/{filename}",
f.read(),
overwrite=True,
)


# COMMAND ----------

from pathlib import Path

# Save the IP full model results to storage
# From docker_run._upload_full_model_results
dataset = params["dataset"]
scenario = params["scenario"]
create_datetime = params["create_datetime"]

path = Path(f"results/{dataset}/{scenario}/{create_datetime}")
for file in path.glob("**/*.parquet"):
filename = file.as_posix()[8:]
with open(file, "rb") as f:
cont.upload_blob(
f"full-model-results/dev/{filename}",
f.read(),
overwrite=True,
)
Loading