Skip to content

Commit aa5558b

Browse files
Create new attribute (#42)
* Added alembic version * Added attributeId as a return value when a new attribute is created * Update name for attributes * Last run by attribute_id with dummy data * Added mutation for running attribute on all records * Run on 10 records query with dummy data * renames graphql endpoints, renames added db fields for attribute * implements attribute calculation for sample records * implements attribute calculation on all records * adds only usable flag to project details * adds tokenization * changes attribute states according to changes in the model * adds state filter for attribute by project id query * fixes numbering of created attributes * adds error handling for exec env and db error * adds cascading delete for embeddings and labeling tasks on deltion of attribute * bugfix attribute update * changes sqlalchemy add attribute id as fk to embedding table * adds attribute changes to project transfer manager * bugfix attribute calculation run on 10 * fixes bug for appending logs to attribute logs * error on unfinished tokenization and other running attribute calculation * check for deleted attributes in tokenized records query * create notfication for attribute calculation on create and delete * modifies attributes_updated notification * fix bug in project export * revert changes in requirements and start script * remove superfluous notification * Replaced exception with a console information * pr comments * update model version Co-authored-by: felix0496 <[email protected]>
1 parent 05ed153 commit aa5558b

File tree

10 files changed

+502
-40
lines changed

10 files changed

+502
-40
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""Adds created attributes columns
2+
3+
Revision ID: 87f463aa5112
4+
Revises: 9618924f9679
5+
Create Date: 2022-09-13 08:22:52.181427
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
from sqlalchemy.dialects import postgresql
11+
12+
13+
# revision identifiers, used by Alembic.
14+
revision = "87f463aa5112"
15+
down_revision = "9618924f9679"
16+
branch_labels = None
17+
depends_on = None
18+
19+
20+
def upgrade():
21+
# ### commands auto generated by Alembic - please adjust! ###
22+
op.add_column("attribute", sa.Column("user_created", sa.Boolean(), nullable=True))
23+
op.add_column("attribute", sa.Column("source_code", sa.String(), nullable=True))
24+
op.add_column("attribute", sa.Column("state", sa.String(), nullable=True))
25+
op.add_column("attribute", sa.Column("logs", sa.ARRAY(sa.String()), nullable=True))
26+
op.add_column(
27+
"embedding",
28+
sa.Column("attribute_id", postgresql.UUID(as_uuid=True), nullable=True),
29+
)
30+
op.create_foreign_key(
31+
"embedding_attribute_id_fkey",
32+
"embedding",
33+
"attribute",
34+
["attribute_id"],
35+
["id"],
36+
ondelete="CASCADE",
37+
),
38+
# ### end Alembic commands ###
39+
40+
41+
def downgrade():
42+
# ### commands auto generated by Alembic - please adjust! ###
43+
op.drop_column("attribute", "user_created")
44+
op.drop_column("attribute", "source_code")
45+
op.drop_column("attribute", "state")
46+
op.drop_column("attribute", "logs")
47+
op.drop_column("embedding", "attribute_id")
48+
op.drop_constraint("embedding_attribute_id_fkey", "embedding", type_="foreignkey")
49+
# ### end Alembic commands ###

controller/attribute/manager.py

Lines changed: 208 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1-
from typing import List
1+
from typing import List, Tuple
22
from controller.tokenization.tokenization_service import request_tokenize_project
3-
from submodules.model.business_objects import attribute, general
3+
from submodules.model.business_objects import attribute, record, tokenization
44
from submodules.model.models import Attribute
5-
from util import daemon
5+
from submodules.model.enums import AttributeState, DataTypes
6+
from util import daemon, notification
7+
8+
from . import util
69

710

811
def get_attribute(project_id: str, attribute_id: str) -> Attribute:
@@ -15,34 +18,100 @@ def get_all_attributes_by_names(
1518
return attribute.get_all_by_names(project_id, attribute_names)
1619

1720

18-
def get_all_attributes(project_id: str) -> List[Attribute]:
19-
return attribute.get_all_ordered(project_id, True)
21+
def get_all_attributes(
22+
project_id: str, state_filter: List[str] = None
23+
) -> List[Attribute]:
24+
return attribute.get_all_ordered(project_id, True, state_filter)
2025

2126

2227
def check_composite_key(project_id: str) -> bool:
2328
return attribute.check_composite_key_is_valid(project_id)
2429

2530

26-
def create_attribute(project_id: str, name: str) -> None:
27-
relative_position: int = attribute.get_relative_position(project_id)
28-
if relative_position is None:
31+
def create_attribute(project_id: str, name: str) -> Attribute:
32+
prev_relative_position: int = attribute.get_relative_position(project_id)
33+
if prev_relative_position is None:
2934
relative_position = 1
3035
else:
31-
relative_position += 1
36+
relative_position = prev_relative_position + 1
37+
38+
attribute_item: Attribute = attribute.create(
39+
project_id,
40+
name,
41+
relative_position,
42+
with_commit=True,
43+
)
44+
return attribute_item
3245

33-
attribute.create(project_id, name, relative_position, with_commit=True)
46+
47+
def create_user_attribute(project_id: str) -> Attribute:
48+
prev_relative_position: int = attribute.get_relative_position(project_id)
49+
if prev_relative_position is None:
50+
relative_position = 1
51+
else:
52+
relative_position = prev_relative_position + 1
53+
54+
name = util.find_free_name(project_id)
55+
56+
attribute_item: Attribute = attribute.create(
57+
project_id,
58+
name,
59+
relative_position,
60+
data_type=DataTypes.TEXT.value,
61+
is_primary_key=False,
62+
user_created=True,
63+
state=AttributeState.INITIAL.value,
64+
with_commit=True,
65+
)
66+
notification.send_organization_update(
67+
project_id=project_id, message="calculate_attribute:created:{attribute_id}"
68+
)
69+
70+
return attribute_item
3471

3572

3673
def update_attribute(
37-
project_id: str, attribute_id: str, data_type: str, is_primary_key: bool
74+
project_id: str,
75+
attribute_id: str,
76+
data_type: str,
77+
is_primary_key: bool,
78+
name: str,
79+
source_code: str,
3880
) -> None:
3981
attribute.update(
40-
project_id, attribute_id, data_type, is_primary_key, with_commit=True
82+
project_id,
83+
attribute_id,
84+
data_type,
85+
is_primary_key,
86+
name,
87+
source_code,
88+
with_commit=True,
4189
)
90+
if attribute.get(project_id, attribute_id).state in [
91+
AttributeState.UPLOADED.value,
92+
AttributeState.USABLE.value,
93+
]:
94+
notification.send_organization_update(project_id, "attributes_updated")
4295

4396

4497
def delete_attribute(project_id: str, attribute_id: str) -> None:
45-
attribute.delete(project_id, attribute_id, with_commit=True)
98+
attribute_item = attribute.get(project_id, attribute_id)
99+
if attribute_item.user_created:
100+
is_usable = attribute_item.state == AttributeState.USABLE.value
101+
if is_usable:
102+
record.delete_user_created_attribute(
103+
project_id=project_id, attribute_id=attribute_id, with_commit=True
104+
)
105+
attribute.delete(project_id, attribute_id, with_commit=True)
106+
notification.send_organization_update(
107+
project_id=project_id, message="calculate_attribute:deleted:{attribute_id}"
108+
)
109+
if is_usable:
110+
notification.send_organization_update(
111+
project_id=project_id, message="attributes_updated"
112+
)
113+
else:
114+
raise ValueError("Attribute is not user created")
46115

47116

48117
def add_running_id(
@@ -61,3 +130,129 @@ def add_running_id(
61130
project_id,
62131
user_id,
63132
)
133+
134+
135+
def calculate_user_attribute_all_records(
136+
project_id: str, user_id: str, attribute_id: str
137+
) -> None:
138+
if attribute.get_all(
139+
project_id=project_id, state_filter=[AttributeState.RUNNING.value]
140+
):
141+
__notify_attribute_calculation_failed(
142+
project_id=project_id,
143+
attribute_id=attribute_id,
144+
log="Calculation of attribute failed. Another attribute is already running.",
145+
append_to_logs=False,
146+
)
147+
return
148+
149+
if tokenization.get_doc_bin_progress(project_id):
150+
__notify_attribute_calculation_failed(
151+
project_id=project_id,
152+
attribute_id=attribute_id,
153+
log="Tokenization is not finished",
154+
append_to_logs=False,
155+
)
156+
return
157+
158+
attribute.update(
159+
project_id=project_id,
160+
attribute_id=attribute_id,
161+
state=AttributeState.RUNNING.value,
162+
with_commit=True,
163+
)
164+
notification.send_organization_update(
165+
project_id=project_id, message="calculate_attribute:started:{attribute_id}"
166+
)
167+
daemon.run(
168+
__calculate_user_attribute_all_records,
169+
project_id,
170+
user_id,
171+
attribute_id,
172+
)
173+
174+
175+
def __calculate_user_attribute_all_records(
176+
project_id: str, user_id: str, attribute_id: str
177+
) -> None:
178+
179+
try:
180+
calculated_attributes = util.run_attribute_calculation_exec_env(
181+
attribute_id=attribute_id, project_id=project_id, doc_bin="docbin_full"
182+
)
183+
except Exception:
184+
__notify_attribute_calculation_failed(
185+
project_id=project_id,
186+
attribute_id=attribute_id,
187+
log="Attribute calculation failed",
188+
)
189+
return
190+
191+
util.add_log_to_attribute_logs(
192+
project_id, attribute_id, "Writing results to the database."
193+
)
194+
# add calculated attributes to database
195+
try:
196+
record.update_add_user_created_attribute(
197+
project_id=project_id,
198+
attribute_id=attribute_id,
199+
calculated_attributes=calculated_attributes,
200+
with_commit=True,
201+
)
202+
except Exception:
203+
record.delete_user_created_attribute(
204+
project_id=project_id,
205+
attribute_id=attribute_id,
206+
with_commit=True,
207+
)
208+
__notify_attribute_calculation_failed(
209+
project_id=project_id,
210+
attribute_id=attribute_id,
211+
log="Writing to the database failed.",
212+
)
213+
return
214+
util.add_log_to_attribute_logs(project_id, attribute_id, "Finished writing.")
215+
216+
util.add_log_to_attribute_logs(project_id, attribute_id, "Triggering tokenization.")
217+
tokenization.delete_docbins(project_id, with_commit=True)
218+
tokenization.delete_token_statistics_for_project(project_id, with_commit=True)
219+
tokenization.delete_tokenization_tasks(project_id, with_commit=True)
220+
request_tokenize_project(project_id, user_id)
221+
222+
attribute.update(
223+
project_id=project_id,
224+
attribute_id=attribute_id,
225+
state=AttributeState.USABLE.value,
226+
with_commit=True,
227+
)
228+
229+
notification.send_organization_update(
230+
project_id, f"calculate_attribute:finished:{attribute_id}"
231+
)
232+
233+
234+
def __notify_attribute_calculation_failed(
235+
project_id: str, attribute_id: str, log: str, append_to_logs: bool = True
236+
) -> None:
237+
util.add_log_to_attribute_logs(project_id, attribute_id, log, append_to_logs)
238+
attribute.update(
239+
project_id=project_id,
240+
attribute_id=attribute_id,
241+
state=AttributeState.FAILED.value,
242+
with_commit=True,
243+
)
244+
notification.send_organization_update(
245+
project_id=project_id, message="calculate_attribute:error:{attribute_id}"
246+
)
247+
248+
249+
def calculate_user_attribute_sample_records(
250+
project_id: str, attribute_id: str
251+
) -> Tuple[List[str], List[str]]:
252+
doc_bin_samples = util.prepare_sample_records_doc_bin(
253+
attribute_id=attribute_id, project_id=project_id
254+
)
255+
calculated_attributes = util.run_attribute_calculation_exec_env(
256+
attribute_id=attribute_id, project_id=project_id, doc_bin=doc_bin_samples
257+
)
258+
return list(calculated_attributes.keys()), list(calculated_attributes.values())

0 commit comments

Comments
 (0)