Skip to content

Commit a8b8a21

Browse files
committed
api: Add tracking_id query param to API endpoints
Signed-off-by: Phoevos Kalemkeris <[email protected]>
1 parent ee9f369 commit a8b8a21

File tree

6 files changed

+36
-17
lines changed

6 files changed

+36
-17
lines changed

app/api/routers/evaluation.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import uuid
55
import tempfile
66

7-
from typing import List
7+
from typing import List, Union
88
from starlette.status import HTTP_202_ACCEPTED, HTTP_503_SERVICE_UNAVAILABLE
99
from typing_extensions import Annotated
1010
from fastapi import APIRouter, Query, Depends, UploadFile, Request, File
@@ -34,6 +34,7 @@
3434
description="Evaluate the model being served with a trainer export")
3535
async def get_evaluation_with_trainer_export(request: Request,
3636
trainer_export: Annotated[List[UploadFile], File(description="One or more trainer export files to be uploaded")],
37+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the evaluation task")] = None,
3738
model_service: AbstractModelService = Depends(cms_globals.model_service_dep)) -> JSONResponse:
3839
files = []
3940
file_names = []
@@ -54,7 +55,7 @@ async def get_evaluation_with_trainer_export(request: Request,
5455
json.dump(concatenated, data_file)
5556
data_file.flush()
5657
data_file.seek(0)
57-
evaluation_id = str(uuid.uuid4())
58+
evaluation_id = tracking_id or str(uuid.uuid4())
5859
evaluation_accepted = model_service.train_supervised(data_file, 0, sys.maxsize, evaluation_id, ",".join(file_names))
5960
if evaluation_accepted:
6061
return JSONResponse(content={"message": "Your evaluation started successfully.", "evaluation_id": evaluation_id}, status_code=HTTP_202_ACCEPTED)
@@ -69,6 +70,7 @@ async def get_evaluation_with_trainer_export(request: Request,
6970
description="Sanity check the model being served with a trainer export")
7071
def get_sanity_check_with_trainer_export(request: Request,
7172
trainer_export: Annotated[List[UploadFile], File(description="One or more trainer export files to be uploaded")],
73+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the sanity check task")] = None,
7274
model_service: AbstractModelService = Depends(cms_globals.model_service_dep)) -> StreamingResponse:
7375
files = []
7476
file_names = []
@@ -88,8 +90,9 @@ def get_sanity_check_with_trainer_export(request: Request,
8890
metrics = sanity_check_model_with_trainer_export(concatenated, model_service, return_df=True, include_anchors=False)
8991
stream = io.StringIO()
9092
metrics.to_csv(stream, index=False)
93+
tracking_id = tracking_id or str(uuid.uuid4())
9194
response = StreamingResponse(iter([stream.getvalue()]), media_type="text/csv")
92-
response.headers["Content-Disposition"] = f'attachment ; filename="sanity_check_{str(uuid.uuid4())}.csv"'
95+
response.headers["Content-Disposition"] = f'attachment ; filename="sanity_check_{tracking_id}.csv"'
9396
return response
9497

9598

@@ -102,7 +105,8 @@ def get_inter_annotator_agreement_scores(request: Request,
102105
trainer_export: Annotated[List[UploadFile], File(description="A list of trainer export files to be uploaded")],
103106
annotator_a_project_id: Annotated[int, Query(description="The project ID from one annotator")],
104107
annotator_b_project_id: Annotated[int, Query(description="The project ID from another annotator")],
105-
scope: Annotated[str, Query(enum=[s.value for s in Scope], description="The scope for which the score will be calculated, e.g., per_concept, per_document or per_span")]) -> StreamingResponse:
108+
scope: Annotated[str, Query(enum=[s.value for s in Scope], description="The scope for which the score will be calculated, e.g., per_concept, per_document or per_span")],
109+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the IAA task")] = None) -> StreamingResponse:
106110
files = []
107111
for te in trainer_export:
108112
temp_te = tempfile.NamedTemporaryFile()
@@ -126,8 +130,9 @@ def get_inter_annotator_agreement_scores(request: Request,
126130
raise AnnotationException(f'Unknown scope: "{scope}"')
127131
stream = io.StringIO()
128132
iaa_scores.to_csv(stream, index=False)
133+
tracking_id = tracking_id or str(uuid.uuid4())
129134
response = StreamingResponse(iter([stream.getvalue()]), media_type="text/csv")
130-
response.headers["Content-Disposition"] = f'attachment ; filename="iaa_{str(uuid.uuid4())}.csv"'
135+
response.headers["Content-Disposition"] = f'attachment ; filename="iaa_{tracking_id}.csv"'
131136
return response
132137

133138

@@ -137,7 +142,8 @@ def get_inter_annotator_agreement_scores(request: Request,
137142
dependencies=[Depends(cms_globals.props.current_active_user)],
138143
description="Concatenate multiple trainer export files into a single file for download")
139144
def get_concatenated_trainer_exports(request: Request,
140-
trainer_export: Annotated[List[UploadFile], File(description="A list of trainer export files to be uploaded")]) -> JSONResponse:
145+
trainer_export: Annotated[List[UploadFile], File(description="A list of trainer export files to be uploaded")],
146+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the concatenation task")] = None) -> JSONResponse:
141147
files = []
142148
for te in trainer_export:
143149
temp_te = tempfile.NamedTemporaryFile()
@@ -148,8 +154,9 @@ def get_concatenated_trainer_exports(request: Request,
148154
concatenated = concat_trainer_exports([file.name for file in files], allow_recurring_doc_ids=False)
149155
for file in files:
150156
file.close()
157+
tracking_id = tracking_id or str(uuid.uuid4())
151158
response = JSONResponse(concatenated, media_type="application/json; charset=utf-8")
152-
response.headers["Content-Disposition"] = f'attachment ; filename="concatenated_{str(uuid.uuid4())}.json"'
159+
response.headers["Content-Disposition"] = f'attachment ; filename="concatenated_{tracking_id}.json"'
153160
return response
154161

155162

@@ -159,7 +166,8 @@ def get_concatenated_trainer_exports(request: Request,
159166
dependencies=[Depends(cms_globals.props.current_active_user)],
160167
description="Get annotation stats of trainer export files")
161168
def get_annotation_stats(request: Request,
162-
trainer_export: Annotated[List[UploadFile], File(description="One or more trainer export files to be uploaded")]) -> StreamingResponse:
169+
trainer_export: Annotated[List[UploadFile], File(description="One or more trainer export files to be uploaded")],
170+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the annotation stats task")] = None) -> StreamingResponse:
163171
files = []
164172
file_names = []
165173
for te in trainer_export:
@@ -177,6 +185,7 @@ def get_annotation_stats(request: Request,
177185
stats = get_stats_from_trainer_export(concatenated, return_df=True)
178186
stream = io.StringIO()
179187
stats.to_csv(stream, index=False)
188+
tracking_id = tracking_id or str(uuid.uuid4())
180189
response = StreamingResponse(iter([stream.getvalue()]), media_type="text/csv")
181-
response.headers["Content-Disposition"] = f'attachment ; filename="stats_{str(uuid.uuid4())}.csv"'
190+
response.headers["Content-Disposition"] = f'attachment ; filename="stats_{tracking_id}.csv"'
182191
return response

app/api/routers/invocation.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def get_entities_from_multiple_texts(request: Request,
132132
description="Upload a file containing a list of plain text and extract the NER entities in JSON")
133133
def extract_entities_from_multi_text_file(request: Request,
134134
multi_text_file: Annotated[UploadFile, File(description="A file containing a list of plain texts, in the format of [\"text_1\", \"text_2\", ..., \"text_n\"]")],
135+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the bulk processing task")] = None,
135136
model_service: AbstractModelService = Depends(cms_globals.model_service_dep)) -> StreamingResponse:
136137
with tempfile.NamedTemporaryFile() as data_file:
137138
for line in multi_text_file.file:
@@ -160,8 +161,9 @@ def extract_entities_from_multi_text_file(request: Request,
160161
output = json.dumps(body)
161162
logger.debug(output)
162163
json_file = BytesIO(output.encode())
164+
tracking_id = tracking_id or str(uuid.uuid4())
163165
response = StreamingResponse(json_file, media_type="application/json")
164-
response.headers["Content-Disposition"] = f'attachment ; filename="concatenated_{str(uuid.uuid4())}.json"'
166+
response.headers["Content-Disposition"] = f'attachment ; filename="concatenated_{tracking_id}.json"'
165167
return response
166168

167169

app/api/routers/metacat_training.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ async def train_metacat(request: Request,
2929
epochs: Annotated[int, Query(description="The number of training epochs", ge=0)] = 1,
3030
log_frequency: Annotated[int, Query(description="The number of processed documents after which training metrics will be logged", ge=1)] = 1,
3131
description: Annotated[Union[str, None], Query(description="The description on the training or change logs")] = None,
32+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the training task")] = None,
3233
model_service: AbstractModelService = Depends(cms_globals.model_service_dep)) -> JSONResponse:
3334
files = []
3435
file_names = []
@@ -49,7 +50,7 @@ async def train_metacat(request: Request,
4950
json.dump(concatenated, data_file)
5051
data_file.flush()
5152
data_file.seek(0)
52-
training_id = str(uuid.uuid4())
53+
training_id = tracking_id or str(uuid.uuid4())
5354
try:
5455
training_accepted = model_service.train_metacat(data_file,
5556
epochs,

app/api/routers/preview.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,16 @@
2727
description="Extract the NER entities in HTML for preview")
2828
async def get_rendered_entities_from_text(request: Request,
2929
text: Annotated[str, Body(description="The text to be sent to the model for NER", media_type="text/plain")],
30+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the preview task")] = None,
3031
model_service: AbstractModelService = Depends(cms_globals.model_service_dep)) -> StreamingResponse:
3132
annotations = model_service.annotate(text)
3233
entities = annotations_to_entities(annotations, model_service.model_name)
3334
logger.debug("Entities extracted for previewing %s", entities)
3435
ent_input = Doc(text=text, ents=entities)
3536
data = displacy.render(ent_input.dict(), style="ent", manual=True)
37+
tracking_id = tracking_id or str(uuid.uuid4())
3638
response = StreamingResponse(BytesIO(data.encode()), media_type="application/octet-stream")
37-
response.headers["Content-Disposition"] = f'attachment ; filename="preview_{str(uuid.uuid4())}.html"'
39+
response.headers["Content-Disposition"] = f'attachment ; filename="preview_{tracking_id}.html"'
3840
return response
3941

4042

@@ -47,7 +49,8 @@ def get_rendered_entities_from_trainer_export(request: Request,
4749
trainer_export: Annotated[List[UploadFile], File(description="One or more trainer export files to be uploaded")] = [],
4850
trainer_export_str: Annotated[str, Form(description="The trainer export raw JSON string")] = "{\"projects\": []}",
4951
project_id: Annotated[Union[int, None], Query(description="The target project ID, and if not provided, all projects will be included")] = None,
50-
document_id: Annotated[Union[int, None], Query(description="The target document ID, and if not provided, all documents of the target project(s) will be included")] = None) -> Response:
52+
document_id: Annotated[Union[int, None], Query(description="The target document ID, and if not provided, all documents of the target project(s) will be included")] = None,
53+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the trainer export preview task")] = None) -> Response:
5154
data: Dict = {"projects": []}
5255
if trainer_export is not None:
5356
files = []
@@ -88,8 +91,9 @@ def get_rendered_entities_from_trainer_export(request: Request,
8891
doc = Doc(text=document["text"], ents=entities, title=f"P{project['id']}/D{document['id']}")
8992
htmls.append(displacy.render(doc.dict(), style="ent", manual=True))
9093
if htmls:
94+
tracking_id = tracking_id or str(uuid.uuid4())
9195
response = StreamingResponse(BytesIO("<br/>".join(htmls).encode()), media_type="application/octet-stream")
92-
response.headers["Content-Disposition"] = f'attachment ; filename="preview_{str(uuid.uuid4())}.html"'
96+
response.headers["Content-Disposition"] = f'attachment ; filename="preview_{tracking_id}.html"'
9397
else:
9498
logger.debug("Cannot find any matching documents to preview")
9599
return JSONResponse(content={"message": "Cannot find any matching documents to preview"}, status_code=HTTP_404_NOT_FOUND)

app/api/routers/supervised_training.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ async def train_supervised(request: Request,
3232
test_size: Annotated[Union[float, None], Query(description="The override of the test size in percentage. (For a 'huggingface-ner' model, a negative value can be used to apply the train-validation-test split if implicitly defined in trainer export: 'projects[0]' is used for training, 'projects[1]' for validation, and 'projects[2]' for testing)")] = 0.2,
3333
log_frequency: Annotated[int, Query(description="The number of processed documents after which training metrics will be logged", ge=1)] = 1,
3434
description: Annotated[Union[str, None], Form(description="The description of the training or change logs")] = None,
35+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the training task")] = None,
3536
model_service: AbstractModelService = Depends(cms_globals.model_service_dep)) -> JSONResponse:
3637
files = []
3738
file_names = []
@@ -51,7 +52,7 @@ async def train_supervised(request: Request,
5152
json.dump(concatenated, data_file)
5253
data_file.flush()
5354
data_file.seek(0)
54-
training_id = str(uuid.uuid4())
55+
training_id = tracking_id or str(uuid.uuid4())
5556
try:
5657
training_accepted = model_service.train_supervised(data_file,
5758
epochs,

app/api/routers/unsupervised_training.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ async def train_unsupervised(request: Request,
3333
test_size: Annotated[Union[float, None], Query(description="The override of the test size in percentage", ge=0.0)] = 0.2,
3434
log_frequency: Annotated[int, Query(description="The number of processed documents after which training metrics will be logged", ge=1)] = 1000,
3535
description: Annotated[Union[str, None], Query(description="The description of the training or change logs")] = None,
36+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the training task")] = None,
3637
model_service: AbstractModelService = Depends(cms_globals.model_service_dep)) -> JSONResponse:
3738
"""
3839
Upload one or more plain text files and trigger the unsupervised training
@@ -61,7 +62,7 @@ async def train_unsupervised(request: Request,
6162
logger.debug("Training data concatenated")
6263
data_file.flush()
6364
data_file.seek(0)
64-
training_id = str(uuid.uuid4())
65+
training_id = tracking_id or str(uuid.uuid4())
6566
try:
6667
training_accepted = model_service.train_unsupervised(data_file,
6768
epochs,
@@ -96,6 +97,7 @@ async def train_unsupervised_with_hf_dataset(request: Request,
9697
test_size: Annotated[Union[float, None], Query(description="The override of the test size in percentage will only take effect if the dataset does not have predefined validation or test splits", ge=0.0)] = 0.2,
9798
log_frequency: Annotated[int, Query(description="The number of processed documents after which training metrics will be logged", ge=1)] = 1000,
9899
description: Annotated[Union[str, None], Query(description="The description of the training or change logs")] = None,
100+
tracking_id: Annotated[Union[str, None], Query(description="The tracking ID of the training task")] = None,
99101
model_service: AbstractModelService = Depends(cms_globals.model_service_dep)) -> JSONResponse:
100102
"""
101103
Trigger the unsupervised training with a dataset from Hugging Face Hub
@@ -129,7 +131,7 @@ async def train_unsupervised_with_hf_dataset(request: Request,
129131
logger.debug("Training dataset downloaded and transformed")
130132
hf_dataset.save_to_disk(data_dir.name)
131133

132-
training_id = str(uuid.uuid4())
134+
training_id = tracking_id or str(uuid.uuid4())
133135
training_accepted = model_service.train_unsupervised(data_dir,
134136
epochs,
135137
log_frequency,

0 commit comments

Comments
 (0)