code-kern-ai
diff --git a/‎api/project.py‎
Lines changed: 2 additions & 1 deletion b/‎api/project.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎controller/attribute/manager.py‎
Lines changed: 39 additions & 5 deletions b/‎controller/attribute/manager.py‎
Lines changed: 39 additions & 5 deletions
diff --git a/‎controller/attribute/util.py‎
Lines changed: 1 addition & 1 deletion b/‎controller/attribute/util.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎controller/information_source/manager.py‎
Lines changed: 33 additions & 2 deletions b/‎controller/information_source/manager.py‎
Lines changed: 33 additions & 2 deletions
diff --git a/‎controller/project/manager.py‎
Lines changed: 21 additions & 10 deletions b/‎controller/project/manager.py‎
Lines changed: 21 additions & 10 deletions
@@ -24,7 +24,7 @@ def get(self, request) -> JSONResponse:
         except exceptions.AccessDeniedException:
             return JSONResponse({"error": "Access denied"}, status_code=403)
         project = project_manager.get_project(project_id)
-        attributes = attribute_manager.get_all_attributes(project_id)
+        attributes = attribute_manager.get_all_attributes(project_id, ["ALL"])
         result = {
             "name": project.name,
             "description": project.description,
@@ -34,6 +34,7 @@ def get(self, request) -> JSONResponse:
                     "name": attribute.name,
                     "data_type": attribute.data_type,
                     "is_primary_key": attribute.is_primary_key,
+                    "state": attribute.state,
                 }
                 for attribute in attributes
             ],
 
@@ -1,3 +1,4 @@
+import time
 from typing import List, Tuple
 from controller.tokenization.tokenization_service import request_tokenize_project
 from submodules.model.business_objects import attribute, record, tokenization
@@ -21,6 +22,14 @@ def get_all_attributes_by_names(
 def get_all_attributes(
     project_id: str, state_filter: List[str] = None
 ) -> List[Attribute]:
+    if not state_filter:
+        state_filter = [
+            AttributeState.UPLOADED.value,
+            AttributeState.USABLE.value,
+            AttributeState.AUTOMATICALLY_CREATED.value,
+        ]
+    if len(state_filter) == 1 and state_filter[0].upper() == "ALL":
+        state_filter = [e.value for e in AttributeState]
     return attribute.get_all_ordered(project_id, True, state_filter)
 
 
@@ -64,7 +73,7 @@ def create_user_attribute(project_id: str) -> Attribute:
         with_commit=True,
     )
     notification.send_organization_update(
-        project_id=project_id, message="calculate_attribute:created:{attribute_id}"
+        project_id=project_id, message=f"calculate_attribute:created:{str(attribute_item.id)}"
     )
 
     return attribute_item
@@ -89,6 +98,7 @@ def update_attribute(
     )
     if attribute.get(project_id, attribute_id).state in [
         AttributeState.UPLOADED.value,
+        AttributeState.AUTOMATICALLY_CREATED.value,
         AttributeState.USABLE.value,
     ]:
         notification.send_organization_update(project_id, "attributes_updated")
@@ -104,7 +114,7 @@ def delete_attribute(project_id: str, attribute_id: str) -> None:
             )
         attribute.delete(project_id, attribute_id, with_commit=True)
         notification.send_organization_update(
-            project_id=project_id, message="calculate_attribute:deleted:{attribute_id}"
+            project_id=project_id, message=f"calculate_attribute:deleted:{attribute_id}"
         )
         if is_usable:
             notification.send_organization_update(
@@ -155,14 +165,28 @@ def calculate_user_attribute_all_records(
         )
         return
 
+    attribute_item = attribute.get(project_id, attribute_id)
+    equally_named_attributes = attribute.get_all_by_names(
+        project_id, [attribute_item.name]
+    )
+    usable_attributes = attribute.get_all(project_id)
+    if len(set(equally_named_attributes) & set(usable_attributes)) > 1:
+        __notify_attribute_calculation_failed(
+            project_id=project_id,
+            attribute_id=attribute_id,
+            log="Calculation of attribute failed. Another attribute with the same name is already in state usable or uploaded.",
+            append_to_logs=False,
+        )
+        return
+
     attribute.update(
         project_id=project_id,
         attribute_id=attribute_id,
         state=AttributeState.RUNNING.value,
         with_commit=True,
     )
     notification.send_organization_update(
-        project_id=project_id, message="calculate_attribute:started:{attribute_id}"
+        project_id=project_id, message=f"calculate_attribute:started:{attribute_id}"
     )
     daemon.run(
         __calculate_user_attribute_all_records,
@@ -180,6 +204,13 @@ def __calculate_user_attribute_all_records(
         calculated_attributes = util.run_attribute_calculation_exec_env(
             attribute_id=attribute_id, project_id=project_id, doc_bin="docbin_full"
         )
+        if not calculated_attributes:
+            __notify_attribute_calculation_failed(
+                project_id=project_id,
+                attribute_id=attribute_id,
+                log="Calculation of attribute failed.",
+            )
+            return
     except Exception:
         __notify_attribute_calculation_failed(
             project_id=project_id,
@@ -216,7 +247,10 @@ def __calculate_user_attribute_all_records(
     util.add_log_to_attribute_logs(project_id, attribute_id, "Triggering tokenization.")
     tokenization.delete_docbins(project_id, with_commit=True)
     tokenization.delete_token_statistics_for_project(project_id, with_commit=True)
-    tokenization.delete_tokenization_tasks(project_id, with_commit=True)
+
+    while record.count_tokenized_records(project_id) > 0:
+        time.sleep(2)
+
     request_tokenize_project(project_id, user_id)
 
     attribute.update(
@@ -242,7 +276,7 @@ def __notify_attribute_calculation_failed(
         with_commit=True,
     )
     notification.send_organization_update(
-        project_id=project_id, message="calculate_attribute:error:{attribute_id}"
+        project_id=project_id, message=f"calculate_attribute:error:{attribute_id}"
     )
 
 
 
@@ -125,7 +125,7 @@ def run_attribute_calculation_exec_env(
     try:
         payload = s3.get_object(org_id, project_id + "/" + prefixed_payload)
         calculated_attributes = json.loads(payload)
-    except:
+    except Exception:
         print("Could not grab data from s3 -- attribute calculation")
         calculated_attributes = {}
 
 
@@ -2,8 +2,15 @@
 from typing import List, Optional
 from controller.information_source.util import resolve_source_return_type
 from submodules.model import InformationSource, LabelingTask, enums
-from submodules.model.business_objects import general, labeling_task, information_source
+from submodules.model.business_objects import (
+    general,
+    labeling_task,
+    information_source,
+    payload,
+)
 from controller.labeling_access_link import manager as link_manager
+from controller.record_label_association import manager as rla_manager
+from controller.payload import manager as payload_manager
 
 
 def get_information_source(project_id: str, source_id: str) -> InformationSource:
@@ -84,7 +91,11 @@ def update_information_source(
 ) -> None:
     labeling_task_item: LabelingTask = labeling_task.get(project_id, labeling_task_id)
     return_type: str = resolve_source_return_type(labeling_task_item)
-    information_source.update(
+    item = information_source.get(project_id, source_id)
+    new_payload_needed = (
+        str(item.source_code) != code or str(item.labeling_task_id) != labeling_task_id
+    )
+    item = information_source.update(
         project_id,
         source_id,
         labeling_task_id=labeling_task_id,
@@ -94,13 +105,33 @@ def update_information_source(
         name=name,
         with_commit=True,
     )
+
+    if item.type == enums.InformationSourceType.CROWD_LABELER.value:
+        slice_id = json.loads(item.source_code)["data_slice_id"]
+        if slice_id:
+            if new_payload_needed and len(item.payloads) > 0:
+                delete_information_source_payload(
+                    project_id, source_id, str(item.payloads[0].id)
+                )
+            rla_manager.update_annotator_progress(
+                project_id, source_id, item.created_by
+            )
     link_manager.set_changed_for(project_id, enums.LinkTypes.HEURISTIC, source_id)
 
 
 def delete_information_source(project_id: str, source_id: str) -> None:
     information_source.delete(project_id, source_id, with_commit=True)
 
 
+def delete_information_source_payload(
+    project_id: str, information_source_id: str, payload_id: str
+) -> None:
+    information_source_item = information_source.get(project_id, information_source_id)
+    if information_source_item.type != enums.InformationSourceType.CROWD_LABELER.value:
+        raise ValueError("Information source is not a crowd labeler")
+    payload.remove(project_id, information_source_id, payload_id, with_commit=True)
+
+
 def toggle_information_source(project_id: str, source_id: str) -> None:
     information_source.toggle(project_id, source_id, with_commit=True)
 
 
@@ -15,7 +15,9 @@
 from util import daemon
 from controller.tokenization.tokenization_service import request_tokenize_project
 from submodules.model.business_objects import data_slice as ds_manager
-from submodules.model.business_objects import information_source as information_source_manager
+from submodules.model.business_objects import (
+    information_source as information_source_manager,
+)
 from submodules.model.business_objects import util as db_util
 from submodules.s3 import controller as s3
 from service.search import search
@@ -150,21 +152,28 @@ def resolve_request_huddle_data(
         source_id = None
         if __no_huddle_id(data_id):
             data_id = __get_first_data_id(project_id, user_id, huddle_type)
-        if huddle_type == enums.LinkTypes.DATA_SLICE.value:
+
+        if huddle_type == enums.LinkTypes.DATA_SLICE.value and data_id:
             slice_id = data_id
-        elif huddle_type == enums.LinkTypes.HEURISTIC.value:
+        elif huddle_type == enums.LinkTypes.HEURISTIC.value and data_id:
             information_source_data = __get_crowd_label_is_data(project_id, data_id)
             slice_id = information_source_data["data_slice_id"]
             huddle.allowed_task = information_source_data["labeling_task_id"]
             huddle.can_edit = information_source_data["annotator_id"] == user_id
             source_type = enums.LabelSource.INFORMATION_SOURCE
             source_id = data_id
-        (
-            huddle.record_ids,
-            huddle.start_pos,
-        ) = ds_manager.get_record_ids_and_first_unlabeled_pos(
-            project_id, user_id, slice_id, source_type, source_id
-        )
+        if data_id:
+            (
+                huddle.record_ids,
+                huddle.start_pos,
+            ) = ds_manager.get_record_ids_and_first_unlabeled_pos(
+                project_id,
+                user_id,
+                slice_id,
+                source_type,
+                source_id,
+                huddle.allowed_task,
+            )
     huddle.huddle_id = data_id
     huddle.checked_at = db_util.get_db_now()
     return huddle
@@ -196,6 +205,8 @@ def __get_first_data_id(project_id: str, user_id: str, huddle_type: str) -> str:
         if slices and len(slices) > 0:
             return slices[0].id
     elif huddle_type == enums.LinkTypes.HEURISTIC.value:
-        return information_source_manager.get_first_crowd_is_for_annotator(project_id, user_id)
+        return information_source_manager.get_first_crowd_is_for_annotator(
+            project_id, user_id
+        )
     else:
         raise ValueError("invalid huddle type")