Skip to content

Commit 089e86c

Browse files
release updates (#52)
* Solves NER cl labels * check calculated attributes, exit if nothing returned * Adds dummy link check to link collection * Refreshes progress on crowd labeler is change * Adds AUTOMATICALLY_CREATED state for attributes * Fixes attribute selection issue * wait until all records tokenized * Changes the default zero shot percantage to round to 1 deciaml * adds f string interpolation for attribute calculation * changes the resolve logic for projects without a slice * Adds something to calc payload stats * Submodule change * fixes progress when switching labeling tasks * updates submodules Co-authored-by: felix0496 <[email protected]>
1 parent aa5558b commit 089e86c

File tree

11 files changed

+155
-70
lines changed

11 files changed

+155
-70
lines changed

api/project.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def get(self, request) -> JSONResponse:
2424
except exceptions.AccessDeniedException:
2525
return JSONResponse({"error": "Access denied"}, status_code=403)
2626
project = project_manager.get_project(project_id)
27-
attributes = attribute_manager.get_all_attributes(project_id)
27+
attributes = attribute_manager.get_all_attributes(project_id, ["ALL"])
2828
result = {
2929
"name": project.name,
3030
"description": project.description,
@@ -34,6 +34,7 @@ def get(self, request) -> JSONResponse:
3434
"name": attribute.name,
3535
"data_type": attribute.data_type,
3636
"is_primary_key": attribute.is_primary_key,
37+
"state": attribute.state,
3738
}
3839
for attribute in attributes
3940
],

controller/attribute/manager.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import time
12
from typing import List, Tuple
23
from controller.tokenization.tokenization_service import request_tokenize_project
34
from submodules.model.business_objects import attribute, record, tokenization
@@ -21,6 +22,14 @@ def get_all_attributes_by_names(
2122
def get_all_attributes(
2223
project_id: str, state_filter: List[str] = None
2324
) -> List[Attribute]:
25+
if not state_filter:
26+
state_filter = [
27+
AttributeState.UPLOADED.value,
28+
AttributeState.USABLE.value,
29+
AttributeState.AUTOMATICALLY_CREATED.value,
30+
]
31+
if len(state_filter) == 1 and state_filter[0].upper() == "ALL":
32+
state_filter = [e.value for e in AttributeState]
2433
return attribute.get_all_ordered(project_id, True, state_filter)
2534

2635

@@ -64,7 +73,7 @@ def create_user_attribute(project_id: str) -> Attribute:
6473
with_commit=True,
6574
)
6675
notification.send_organization_update(
67-
project_id=project_id, message="calculate_attribute:created:{attribute_id}"
76+
project_id=project_id, message=f"calculate_attribute:created:{str(attribute_item.id)}"
6877
)
6978

7079
return attribute_item
@@ -89,6 +98,7 @@ def update_attribute(
8998
)
9099
if attribute.get(project_id, attribute_id).state in [
91100
AttributeState.UPLOADED.value,
101+
AttributeState.AUTOMATICALLY_CREATED.value,
92102
AttributeState.USABLE.value,
93103
]:
94104
notification.send_organization_update(project_id, "attributes_updated")
@@ -104,7 +114,7 @@ def delete_attribute(project_id: str, attribute_id: str) -> None:
104114
)
105115
attribute.delete(project_id, attribute_id, with_commit=True)
106116
notification.send_organization_update(
107-
project_id=project_id, message="calculate_attribute:deleted:{attribute_id}"
117+
project_id=project_id, message=f"calculate_attribute:deleted:{attribute_id}"
108118
)
109119
if is_usable:
110120
notification.send_organization_update(
@@ -155,14 +165,28 @@ def calculate_user_attribute_all_records(
155165
)
156166
return
157167

168+
attribute_item = attribute.get(project_id, attribute_id)
169+
equally_named_attributes = attribute.get_all_by_names(
170+
project_id, [attribute_item.name]
171+
)
172+
usable_attributes = attribute.get_all(project_id)
173+
if len(set(equally_named_attributes) & set(usable_attributes)) > 1:
174+
__notify_attribute_calculation_failed(
175+
project_id=project_id,
176+
attribute_id=attribute_id,
177+
log="Calculation of attribute failed. Another attribute with the same name is already in state usable or uploaded.",
178+
append_to_logs=False,
179+
)
180+
return
181+
158182
attribute.update(
159183
project_id=project_id,
160184
attribute_id=attribute_id,
161185
state=AttributeState.RUNNING.value,
162186
with_commit=True,
163187
)
164188
notification.send_organization_update(
165-
project_id=project_id, message="calculate_attribute:started:{attribute_id}"
189+
project_id=project_id, message=f"calculate_attribute:started:{attribute_id}"
166190
)
167191
daemon.run(
168192
__calculate_user_attribute_all_records,
@@ -180,6 +204,13 @@ def __calculate_user_attribute_all_records(
180204
calculated_attributes = util.run_attribute_calculation_exec_env(
181205
attribute_id=attribute_id, project_id=project_id, doc_bin="docbin_full"
182206
)
207+
if not calculated_attributes:
208+
__notify_attribute_calculation_failed(
209+
project_id=project_id,
210+
attribute_id=attribute_id,
211+
log="Calculation of attribute failed.",
212+
)
213+
return
183214
except Exception:
184215
__notify_attribute_calculation_failed(
185216
project_id=project_id,
@@ -216,7 +247,10 @@ def __calculate_user_attribute_all_records(
216247
util.add_log_to_attribute_logs(project_id, attribute_id, "Triggering tokenization.")
217248
tokenization.delete_docbins(project_id, with_commit=True)
218249
tokenization.delete_token_statistics_for_project(project_id, with_commit=True)
219-
tokenization.delete_tokenization_tasks(project_id, with_commit=True)
250+
251+
while record.count_tokenized_records(project_id) > 0:
252+
time.sleep(2)
253+
220254
request_tokenize_project(project_id, user_id)
221255

222256
attribute.update(
@@ -242,7 +276,7 @@ def __notify_attribute_calculation_failed(
242276
with_commit=True,
243277
)
244278
notification.send_organization_update(
245-
project_id=project_id, message="calculate_attribute:error:{attribute_id}"
279+
project_id=project_id, message=f"calculate_attribute:error:{attribute_id}"
246280
)
247281

248282

controller/attribute/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def run_attribute_calculation_exec_env(
125125
try:
126126
payload = s3.get_object(org_id, project_id + "/" + prefixed_payload)
127127
calculated_attributes = json.loads(payload)
128-
except:
128+
except Exception:
129129
print("Could not grab data from s3 -- attribute calculation")
130130
calculated_attributes = {}
131131

controller/information_source/manager.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,15 @@
22
from typing import List, Optional
33
from controller.information_source.util import resolve_source_return_type
44
from submodules.model import InformationSource, LabelingTask, enums
5-
from submodules.model.business_objects import general, labeling_task, information_source
5+
from submodules.model.business_objects import (
6+
general,
7+
labeling_task,
8+
information_source,
9+
payload,
10+
)
611
from controller.labeling_access_link import manager as link_manager
12+
from controller.record_label_association import manager as rla_manager
13+
from controller.payload import manager as payload_manager
714

815

916
def get_information_source(project_id: str, source_id: str) -> InformationSource:
@@ -84,7 +91,11 @@ def update_information_source(
8491
) -> None:
8592
labeling_task_item: LabelingTask = labeling_task.get(project_id, labeling_task_id)
8693
return_type: str = resolve_source_return_type(labeling_task_item)
87-
information_source.update(
94+
item = information_source.get(project_id, source_id)
95+
new_payload_needed = (
96+
str(item.source_code) != code or str(item.labeling_task_id) != labeling_task_id
97+
)
98+
item = information_source.update(
8899
project_id,
89100
source_id,
90101
labeling_task_id=labeling_task_id,
@@ -94,13 +105,33 @@ def update_information_source(
94105
name=name,
95106
with_commit=True,
96107
)
108+
109+
if item.type == enums.InformationSourceType.CROWD_LABELER.value:
110+
slice_id = json.loads(item.source_code)["data_slice_id"]
111+
if slice_id:
112+
if new_payload_needed and len(item.payloads) > 0:
113+
delete_information_source_payload(
114+
project_id, source_id, str(item.payloads[0].id)
115+
)
116+
rla_manager.update_annotator_progress(
117+
project_id, source_id, item.created_by
118+
)
97119
link_manager.set_changed_for(project_id, enums.LinkTypes.HEURISTIC, source_id)
98120

99121

100122
def delete_information_source(project_id: str, source_id: str) -> None:
101123
information_source.delete(project_id, source_id, with_commit=True)
102124

103125

126+
def delete_information_source_payload(
127+
project_id: str, information_source_id: str, payload_id: str
128+
) -> None:
129+
information_source_item = information_source.get(project_id, information_source_id)
130+
if information_source_item.type != enums.InformationSourceType.CROWD_LABELER.value:
131+
raise ValueError("Information source is not a crowd labeler")
132+
payload.remove(project_id, information_source_id, payload_id, with_commit=True)
133+
134+
104135
def toggle_information_source(project_id: str, source_id: str) -> None:
105136
information_source.toggle(project_id, source_id, with_commit=True)
106137

controller/project/manager.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
from util import daemon
1616
from controller.tokenization.tokenization_service import request_tokenize_project
1717
from submodules.model.business_objects import data_slice as ds_manager
18-
from submodules.model.business_objects import information_source as information_source_manager
18+
from submodules.model.business_objects import (
19+
information_source as information_source_manager,
20+
)
1921
from submodules.model.business_objects import util as db_util
2022
from submodules.s3 import controller as s3
2123
from service.search import search
@@ -150,21 +152,28 @@ def resolve_request_huddle_data(
150152
source_id = None
151153
if __no_huddle_id(data_id):
152154
data_id = __get_first_data_id(project_id, user_id, huddle_type)
153-
if huddle_type == enums.LinkTypes.DATA_SLICE.value:
155+
156+
if huddle_type == enums.LinkTypes.DATA_SLICE.value and data_id:
154157
slice_id = data_id
155-
elif huddle_type == enums.LinkTypes.HEURISTIC.value:
158+
elif huddle_type == enums.LinkTypes.HEURISTIC.value and data_id:
156159
information_source_data = __get_crowd_label_is_data(project_id, data_id)
157160
slice_id = information_source_data["data_slice_id"]
158161
huddle.allowed_task = information_source_data["labeling_task_id"]
159162
huddle.can_edit = information_source_data["annotator_id"] == user_id
160163
source_type = enums.LabelSource.INFORMATION_SOURCE
161164
source_id = data_id
162-
(
163-
huddle.record_ids,
164-
huddle.start_pos,
165-
) = ds_manager.get_record_ids_and_first_unlabeled_pos(
166-
project_id, user_id, slice_id, source_type, source_id
167-
)
165+
if data_id:
166+
(
167+
huddle.record_ids,
168+
huddle.start_pos,
169+
) = ds_manager.get_record_ids_and_first_unlabeled_pos(
170+
project_id,
171+
user_id,
172+
slice_id,
173+
source_type,
174+
source_id,
175+
huddle.allowed_task,
176+
)
168177
huddle.huddle_id = data_id
169178
huddle.checked_at = db_util.get_db_now()
170179
return huddle
@@ -196,6 +205,8 @@ def __get_first_data_id(project_id: str, user_id: str, huddle_type: str) -> str:
196205
if slices and len(slices) > 0:
197206
return slices[0].id
198207
elif huddle_type == enums.LinkTypes.HEURISTIC.value:
199-
return information_source_manager.get_first_crowd_is_for_annotator(project_id, user_id)
208+
return information_source_manager.get_first_crowd_is_for_annotator(
209+
project_id, user_id
210+
)
200211
else:
201212
raise ValueError("invalid huddle type")

0 commit comments

Comments
 (0)