Skip to content

Commit ba20277

Browse files
committed
add deletion endpoint
1 parent b6afe9f commit ba20277

File tree

2 files changed

+83
-1
lines changed

2 files changed

+83
-1
lines changed

src/webapp/databricks.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ def setup_new_inst(self, inst_name: str) -> None:
6666
)
6767
db_inst_name = databricksify_inst_name(inst_name)
6868
cat_name = databricks_vars["CATALOG_NAME"]
69-
print(cat_name)
7069
for medallion in medallion_levels:
7170
w.schemas.create(name=f"{db_inst_name}_{medallion}", catalog_name=cat_name)
7271
# Create a managed volume in the bronze schema for internal pipeline data.
@@ -143,3 +142,40 @@ def run_pdp_inference(
143142
},
144143
)
145144
return DatabricksInferenceRunResponse(job_run_id=run_job.response.run_id)
145+
146+
def delete_inst(self, inst_name: str) -> None:
147+
db_inst_name = databricksify_inst_name(inst_name)
148+
cat_name = databricks_vars["CATALOG_NAME"]
149+
w = WorkspaceClient(
150+
host=databricks_vars["DATABRICKS_HOST_URL"],
151+
# This should still be cloud run, since it's cloud run triggering the databricks
152+
# this account needs to exist on Databricks as well and needs to have permissions.
153+
google_service_account=gcs_vars["GCP_SERVICE_ACCOUNT_EMAIL"],
154+
)
155+
# Delete the managed volume.
156+
w.volumes.delete(name=f"{cat_name}.{db_inst_name}_bronze.bronze_volume")
157+
w.volumes.delete(name=f"{cat_name}.{db_inst_name}_silver.silver_volume")
158+
w.volumes.delete(name=f"{cat_name}.{db_inst_name}_gold.gold_volume")
159+
160+
# Delete the MLflow model.
161+
# TODO how to handle deleting all models?
162+
"""
163+
model_name = "latest_enrollment_model"
164+
new_institution_model_uri = f"{cat_name}.{db_inst_name}_gold.{model_name}"
165+
mlflow_client.delete_registered_model(name=new_institution_model_uri)
166+
"""
167+
168+
# Delete tables and schemas for each medallion level.
169+
for medallion in medallion_levels:
170+
all_tables = [
171+
table.name
172+
for table in w.tables.list(
173+
catalog_name=cat_name,
174+
schema_name=f"{db_inst_name}_{medallion}",
175+
)
176+
]
177+
for table in all_tables:
178+
w.tables.delete(
179+
full_name=f"{cat_name}.{db_inst_name}_{medallion}.{table}"
180+
)
181+
w.schemas.delete(full_name=f"{cat_name}.{db_inst_name}_{medallion}")

src/webapp/routers/institutions.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,52 @@ def create_institution(
231231
}
232232

233233

234+
# TODO: add tests
235+
@router.delete("/institutions/{inst_id}", response_model=None)
236+
def delete_inst(
237+
inst_id: str,
238+
current_user: Annotated[BaseUser, Depends(get_current_active_user)],
239+
sql_session: Annotated[Session, Depends(get_session)],
240+
storage_control: Annotated[StorageControl, Depends(StorageControl)],
241+
databricks_control: Annotated[DatabricksControl, Depends(DatabricksControl)],
242+
) -> Any:
243+
"""Delete an existing institution.
244+
245+
Only available to Datakinders.
246+
247+
Args:
248+
current_user: the user making the request.
249+
"""
250+
if not current_user.is_datakinder():
251+
raise HTTPException(
252+
status_code=status.HTTP_401_UNAUTHORIZED,
253+
detail="Not authorized to delete an institution.",
254+
)
255+
256+
local_session.set(sql_session)
257+
local_session.get().execute(
258+
delete(InstTable).where(InstTable.id == str_to_uuid(inst_id))
259+
)
260+
local_session.get().commit()
261+
# Delete GCS bucket
262+
bucket_name = get_external_bucket_name_from_uuid(query_result[0][0].id)
263+
try:
264+
storage_control.delete_bucket(bucket_name)
265+
except ValueError as e:
266+
raise HTTPException(
267+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
268+
detail="Storage bucket deletion failed:" + str(e),
269+
)
270+
# Delete all databricks managed pieces.
271+
try:
272+
databricks_control.delete_inst(query_result[0][0].name)
273+
except Exception as e:
274+
raise HTTPException(
275+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
276+
detail="Databricks deletion failed:" + str(e),
277+
)
278+
279+
234280
# All other API transactions require the UUID as an identifier, this allows the UUID lookup by human readable name.
235281
@router.get("/institutions/name/{inst_name}", response_model=Institution)
236282
def read_inst_name(

0 commit comments

Comments
 (0)