Skip to content

Commit 6ff37d2

Browse files
committed
add models db
1 parent 28e7ed2 commit 6ff37d2

File tree

7 files changed

+648
-222
lines changed

7 files changed

+648
-222
lines changed

src/webapp/database.py

Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,7 @@ class InstTable(Base):
114114
account_histories: Mapped[List["AccountHistoryTable"]] = relationship(
115115
back_populates="inst"
116116
)
117-
# models: Mapped[List["ModelTable"]] = relationship(
118-
# back_populates="inst"
119-
# )
117+
models: Mapped[Set["ModelTable"]] = relationship(back_populates="inst")
120118

121119
name = Column(String(VAR_CHAR_LONGER_LENGTH), nullable=False, unique=True)
122120
# If retention unset, the Datakind default is used. File-level retentions overrides
@@ -208,7 +206,7 @@ class AccountTable(Base):
208206
# Required for team integration with laravel
209207
current_team_id = Column(Uuid(as_uuid=True), nullable=True)
210208
access_type = Column(String(VAR_CHAR_LENGTH), nullable=True)
211-
profile_photo_path = Column(String(VAR_CHAR_LENGTH), nullable=True)
209+
# profile_photo_path = Column(String(VAR_CHAR_LENGTH), nullable=True)
212210
created_at = mapped_column(DateTime(timezone=True), server_default=func.now())
213211
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
214212

@@ -314,7 +312,7 @@ class BatchTable(Base):
314312
name = Column(String(VAR_CHAR_LONGER_LENGTH), nullable=False)
315313
# A short description or note on this inst.
316314
description = Column(String(VAR_CHAR_LONGER_LENGTH))
317-
creator = Column(Uuid(as_uuid=True))
315+
created_by = Column(Uuid(as_uuid=True))
318316
# If null, the following is non-deleted.
319317
deleted: Mapped[bool] = mapped_column(nullable=True)
320318
# If true, the batch is ready for use.
@@ -327,7 +325,6 @@ class BatchTable(Base):
327325
__table_args__ = (UniqueConstraint("name", "inst_id", name="batch_name_inst_uc"),)
328326

329327

330-
"""
331328
class ModelTable(Base):
332329
__tablename__ = "model"
333330
id = Column(Uuid(as_uuid=True), primary_key=True, default=uuid.uuid4)
@@ -341,43 +338,31 @@ class ModelTable(Base):
341338
inst: Mapped["InstTable"] = relationship(back_populates="models")
342339

343340
name = Column(String(VAR_CHAR_LONGER_LENGTH), nullable=False)
344-
# A short description or note on this inst.
345-
description = Column(String(VAR_CHAR_LONGER_LENGTH))
346-
# What configuration of schemas are allowed (e.g. 1 PDP Course + 1 PDP Cohort)
347-
schema_configs = Column(MutableList.as_mutable(JSON))
348-
creator = Column(Uuid(as_uuid=True))
341+
# A short description or note on this model.
342+
description = Column(String(VAR_CHAR_LONGER_LENGTH), nullable=True)
343+
# What configuration of schemas are allowed (list of maps e.g. [PDP Course : 1 + PDP Cohort : 1, X_schema :1 + Y_schema: 2])
344+
schema_configs = Column(MutableList.as_mutable(JSON), nullable=True)
345+
# A list of all the runs executed using this model. These ids will correspond to Databricks ids so that we can retrieve things like
346+
# status and correlate output using Databricks.
347+
run_ids = Column(MutableList.as_mutable(JSON), nullable=True)
348+
created_by = Column(Uuid(as_uuid=True), nullable=True)
349349
# If null, the following is non-deleted.
350350
deleted: Mapped[bool] = mapped_column(nullable=True)
351-
# If true, the model is ready for use.
352-
active: Mapped[bool] = mapped_column(nullable=True)
351+
# If true, the model has been approved and is ready for use.
352+
valid: Mapped[bool] = mapped_column(nullable=True)
353353
# The time the deletion request was set.
354354
deleted_at = Column(DateTime(timezone=True), nullable=True)
355355
created_at = Column(DateTime(timezone=True), server_default=func.now())
356356
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
357-
# Within a given institution, there should be no duplicated model names.
358-
__table_args__ = (UniqueConstraint("name", "inst_id", name="model_name_inst_uc"),)
357+
# Each new version of a model can have the same name, but note the uuid will be different.
358+
version = Column(Integer, default=0)
359359

360-
class ModelRunsTable(Base):
361-
__tablename__ = "model_runs"
362-
id = Column(Uuid(as_uuid=True), primary_key=True, default=uuid.uuid4)
363-
364-
# Set the parent foreign key to link to the institution table.
365-
inst_id = Column(
366-
Uuid(as_uuid=True),
367-
ForeignKey("inst.id", ondelete="CASCADE"),
368-
nullable=False,
360+
# Within a given institution, there should be no duplicated model names + versions.
361+
__table_args__ = (
362+
UniqueConstraint("name", "inst_id", "version", name="model_name_inst_uc"),
369363
)
370-
inst: Mapped["InstTable"] = relationship(back_populates="batches")
371364

372-
# Set the parent foreign key to link to the institution table.
373-
inst_id = Column(
374-
Uuid(as_uuid=True),
375-
ForeignKey("inst.id", ondelete="CASCADE"),
376-
nullable=False,
377-
)
378-
inst: Mapped["InstTable"] = relationship(back_populates="batches")
379365

380-
"""
381366
"""
382367
def get_one_record(sess_context_var: ContextVar, sess: Session, select_query: ) -> Any:
383368
local_session.set(sql_session)
@@ -394,7 +379,7 @@ def get_one_record(sess_context_var: ContextVar, sess: Session, select_query: )
394379
name=req.name,
395380
inst_id=inst_id,
396381
description=req.description,
397-
creator=current_user.user_id,
382+
created_by=current_user.user_id,
398383
)
399384
)
400385
local_session.get().commit()

src/webapp/routers/data.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class BatchInfo(BaseModel):
6262
name: str | None = None
6363
description: str | None = None
6464
# User id of uploader or person who triggered this data ingestion.
65-
creator: str | None = None
65+
created_by: str | None = None
6666
# Deleted data means this batch has a pending deletion request and can no longer be used.
6767
deleted: bool | None = None
6868
# Completed batches means this batch is ready for use. Completed batches will
@@ -215,7 +215,7 @@ def get_all_batches(
215215
"name": elem.name,
216216
"description": elem.description,
217217
"file_names_to_ids": {x.name: uuid_to_str(x.id) for x in elem.files},
218-
"creator": uuid_to_str(elem.creator),
218+
"created_by": uuid_to_str(elem.created_by),
219219
"deleted": False if elem.deleted is None else elem.deleted,
220220
"completed": False if elem.completed is None else elem.completed,
221221
"deletion_request_time": elem.deleted_at,
@@ -351,7 +351,7 @@ def read_batch_info(
351351
"name": res.name,
352352
"description": res.description,
353353
"file_names_to_ids": {x.name: uuid_to_str(x.id) for x in res.files},
354-
"creator": uuid_to_str(res.creator),
354+
"created_by": uuid_to_str(res.created_by),
355355
"deleted": False if res.deleted is None else res.deleted,
356356
"completed": False if res.completed is None else res.completed,
357357
"deletion_request_time": res.deleted_at,
@@ -415,13 +415,13 @@ def create_batch(
415415
name=req.name,
416416
inst_id=str_to_uuid(inst_id),
417417
description=req.description,
418-
creator=str_to_uuid(current_user.user_id),
418+
created_by=str_to_uuid(current_user.user_id),
419419
)
420420
f_names = [] if not req.file_names else req.file_names
421421
f_ids = [] if not req.file_ids else strs_to_uuids(req.file_ids)
422422
# Check that the files requested for this batch exists.
423423
# Only valid non-sst generated files can be added to a batch at creation time.
424-
query_result_file = (
424+
query_result_files = (
425425
local_session.get()
426426
.execute(
427427
select(FileTable).where(
@@ -438,17 +438,13 @@ def create_batch(
438438
)
439439
.all()
440440
)
441-
if not query_result_file or len(query_result_file) == 0:
441+
if not query_result_files or len(query_result_files) == 0:
442442
raise HTTPException(
443443
status_code=status.HTTP_404_NOT_FOUND,
444444
detail="file in request not found.",
445445
)
446-
elif len(query_result_file) > 1:
447-
raise HTTPException(
448-
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
449-
detail="Multiple files in request with same unique id found.",
450-
)
451-
batch.files.add(query_result_file[0][0])
446+
for elem in query_result_files:
447+
batch.files.add(elem[0])
452448
local_session.get().add(batch)
453449
local_session.get().commit()
454450
query_result = (
@@ -486,7 +482,7 @@ def create_batch(
486482
"file_names_to_ids": {
487483
x.name: uuid_to_str(x.id) for x in query_result[0][0].files
488484
},
489-
"creator": uuid_to_str(query_result[0][0].creator),
485+
"created_by": uuid_to_str(query_result[0][0].created_by),
490486
"deleted": False,
491487
"completed": False,
492488
"deletion_request_time": None,
@@ -641,7 +637,7 @@ def update_batch(
641637
"name": res[0][0].name,
642638
"description": res[0][0].description,
643639
"file_names_to_ids": {x.name: uuid_to_str(x.id) for x in res[0][0].files},
644-
"creator": uuid_to_str(res[0][0].creator),
640+
"created_by": uuid_to_str(res[0][0].created_by),
645641
"deleted": res[0][0].deleted,
646642
"completed": res[0][0].completed,
647643
"deletion_request_time": res[0][0].deleted_at,

src/webapp/routers/data_test.py

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
USER_UUID,
1818
UUID_INVALID,
1919
DATETIME_TESTING,
20+
SAMPLE_UUID,
2021
)
2122
from ..main import app
2223
from ..database import (
@@ -80,7 +81,7 @@ def same_orderless(a: DataOverview, b: DataOverview):
8081
or a_elem["file_names_to_ids"] == b_elem["file_names_to_ids"]
8182
or a_elem["name"] != b_elem["name"]
8283
or a_elem["description"] != b_elem["description"]
83-
or a_elem["creator"] != b_elem["creator"]
84+
or a_elem["created_by"] != b_elem["created_by"]
8485
or a_elem["deleted"] != b_elem["deleted"]
8586
or a_elem["completed"] != b_elem["completed"]
8687
or a_elem["deletion_request_time"] != b_elem["deletion_request_time"]
@@ -116,7 +117,7 @@ def session_fixture():
116117
id=BATCH_UUID,
117118
inst_id=USER_VALID_INST_UUID,
118119
name="batch_foo",
119-
creator=CREATOR_UUID,
120+
created_by=CREATOR_UUID,
120121
created_at=DATETIME_TESTING,
121122
updated_at=DATETIME_TESTING,
122123
)
@@ -135,14 +136,24 @@ def session_fixture():
135136
file_3 = FileTable(
136137
id=FILE_UUID_3,
137138
inst_id=USER_VALID_INST_UUID,
138-
name="file_output_one",
139+
name="file_output_three",
139140
batches={batch_1},
140141
created_at=DATETIME_TESTING,
141142
updated_at=DATETIME_TESTING,
142143
sst_generated=True,
143144
valid=True,
144145
schemas=[SchemaType.PDP_COHORT],
145146
)
147+
file_4 = FileTable(
148+
id=SAMPLE_UUID,
149+
inst_id=USER_VALID_INST_UUID,
150+
name="file_output_four",
151+
created_at=DATETIME_TESTING,
152+
updated_at=DATETIME_TESTING,
153+
sst_generated=True,
154+
valid=True,
155+
schemas=[SchemaType.PDP_COHORT],
156+
)
146157
try:
147158
with sqlalchemy.orm.Session(engine) as session:
148159
session.add_all(
@@ -167,6 +178,7 @@ def session_fixture():
167178
schemas=[SchemaType.PDP_COURSE],
168179
),
169180
file_3,
181+
file_4,
170182
]
171183
)
172184
session.commit()
@@ -223,7 +235,7 @@ def test_read_inst_all_input_files(client: TestClient):
223235
],
224236
"name": "batch_foo",
225237
"description": None,
226-
"creator": "0ad8b77c49fb459a84b18d2c05722c4a",
238+
"created_by": "0ad8b77c49fb459a84b18d2c05722c4a",
227239
"deleted": False,
228240
"completed": False,
229241
"deletion_request_time": None,
@@ -289,11 +301,11 @@ def test_read_inst_all_output_files(client: TestClient):
289301
"inst_id": "1d7c75c33eda42949c6675ea8af97b55",
290302
"file_names_to_ids": [
291303
{"file_input_one": "f0bb3a206d924254afed6a72f43c562a"},
292-
{"file_output_one": "fbe67a2e50e040c7b7b807043cb813a5"},
304+
{"file_output_three": "fbe67a2e50e040c7b7b807043cb813a5"},
293305
],
294306
"name": "batch_foo",
295307
"description": None,
296-
"creator": "0ad8b77c49fb459a84b18d2c05722c4a",
308+
"created_by": "0ad8b77c49fb459a84b18d2c05722c4a",
297309
"deleted": False,
298310
"completed": False,
299311
"deletion_request_time": None,
@@ -302,7 +314,7 @@ def test_read_inst_all_output_files(client: TestClient):
302314
],
303315
"files": [
304316
{
305-
"name": "file_output_one",
317+
"name": "file_output_three",
306318
"data_id": "fbe67a2e50e040c7b7b807043cb813a5",
307319
"batch_ids": ["5b2420f3103546ab90eb74d5df97de43"],
308320
"inst_id": "1d7c75c33eda42949c6675ea8af97b55",
@@ -315,7 +327,22 @@ def test_read_inst_all_output_files(client: TestClient):
315327
"sst_generated": True,
316328
"valid": True,
317329
"uploaded_date": "2024-12-24T20:22:20.132022",
318-
}
330+
},
331+
{
332+
"name": "file_output_four",
333+
"data_id": "e4862c62829440d8ab4c9c298f02f619",
334+
"batch_ids": [],
335+
"inst_id": "1d7c75c33eda42949c6675ea8af97b55",
336+
"description": None,
337+
"uploader": "",
338+
"source": None,
339+
"deleted": False,
340+
"deletion_request_time": None,
341+
"retention_days": None,
342+
"sst_generated": True,
343+
"valid": True,
344+
"uploaded_date": "2024-12-24T20:22:20.132022",
345+
},
319346
],
320347
},
321348
)
@@ -352,11 +379,11 @@ def test_read_batch_info(client: TestClient):
352379
"inst_id": "1d7c75c33eda42949c6675ea8af97b55",
353380
"file_names_to_ids": [
354381
{"file_input_one": "f0bb3a206d924254afed6a72f43c562a"},
355-
{"file_output_one": "fbe67a2e50e040c7b7b807043cb813a5"},
382+
{"file_output_three": "fbe67a2e50e040c7b7b807043cb813a5"},
356383
],
357384
"name": "batch_foo",
358385
"description": None,
359-
"creator": "0ad8b77c49fb459a84b18d2c05722c4a",
386+
"created_by": "0ad8b77c49fb459a84b18d2c05722c4a",
360387
"deleted": False,
361388
"completed": False,
362389
"deletion_request_time": None,
@@ -365,7 +392,7 @@ def test_read_batch_info(client: TestClient):
365392
],
366393
"files": [
367394
{
368-
"name": "file_output_one",
395+
"name": "file_output_three",
369396
"data_id": "fbe67a2e50e040c7b7b807043cb813a5",
370397
"batch_ids": ["5b2420f3103546ab90eb74d5df97de43"],
371398
"inst_id": "1d7c75c33eda42949c6675ea8af97b55",
@@ -461,13 +488,13 @@ def test_create_batch(client: TestClient):
461488
"description": "",
462489
"batch_disabled": "False",
463490
"file_ids": [uuid_to_str(FILE_UUID_1)],
464-
"file_names": ["file_input_one", "file_input_two"],
491+
"file_names": ["file_input_one", "file_input_two", "file_input_four"],
465492
},
466493
)
467494
assert response.status_code == 200
468495
assert response.json()["name"] == "batch_foobar"
469496
assert response.json()["description"] == ""
470-
assert response.json()["creator"] == uuid_to_str(USER_UUID)
497+
assert response.json()["created_by"] == uuid_to_str(USER_UUID)
471498
assert response.json()["deleted"] == False
472499
assert response.json()["completed"] == False
473500
assert response.json()["deletion_request_time"] == None
@@ -511,7 +538,7 @@ def test_update_batch(client: TestClient):
511538
assert response.status_code == 200
512539
assert response.json()["name"] == "batch_name_updated_foo"
513540
assert response.json()["description"] == None
514-
assert response.json()["creator"] == uuid_to_str(CREATOR_UUID)
541+
assert response.json()["created_by"] == uuid_to_str(CREATOR_UUID)
515542
assert response.json()["deleted"] == None
516543
assert response.json()["completed"] == True
517544
assert response.json()["deletion_request_time"] == None

0 commit comments

Comments
 (0)