Skip to content

Commit d8361a2

Browse files
authored
release v1.3.0 (#9)
Release v1.3.0
1 parent 89b02b2 commit d8361a2

File tree

3 files changed

+27
-6
lines changed

3 files changed

+27
-6
lines changed

submodules/s3

Submodule s3 updated 1 file

util.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import spacy
1+
import time
22
from spacy.language import Language
33
from spacy.tokens import DocBin, Doc
44
from spacy.vocab import Vocab
@@ -13,10 +13,10 @@
1313
send_notification_created,
1414
)
1515
from submodules.model import (
16-
RecordTokenizationTask,
1716
RecordTokenized,
1817
RecordAttributeTokenStatistics,
1918
)
19+
from submodules.model.enums import AttributeState
2020
from submodules.model.business_objects import (
2121
project,
2222
attribute,
@@ -231,11 +231,25 @@ def create_rats_entries(
231231
project_id, False, ["rats", "state", str(tokenization_task.state)]
232232
)
233233
general.commit()
234+
i = 0
235+
while initial_count > record.count_tokenized_records(project_id):
236+
if i > 9:
237+
print("Docbins missing", flush=True)
238+
raise Exception("Docbins missing")
239+
time.sleep(1)
240+
i += 1
234241
if attribute_id:
235242
text_attribute = attribute.get(project_id, attribute_id)
236243
text_attributes = {text_attribute.name: text_attribute.id}
237244
else:
238-
text_attributes = attribute.get_text_attributes(project_id)
245+
text_attributes = attribute.get_text_attributes(
246+
project_id,
247+
state_filter=[
248+
AttributeState.UPLOADED.value,
249+
AttributeState.USABLE.value,
250+
AttributeState.RUNNING.value,
251+
],
252+
)
239253
vocab = get_tokenizer_by_project(project_id).vocab
240254
record_set = record.get_missing_rats_records(project_id, 100, attribute_id)
241255
chunk = 0
@@ -350,7 +364,14 @@ def tokenize_record(project_id: str, record_id: str) -> int:
350364
if record_id not in __prioritized_records[project_id]:
351365
__prioritized_records[project_id][record_id] = True
352366

353-
text_attributes = attribute.get_text_attributes(project_id)
367+
text_attributes = attribute.get_text_attributes(
368+
project_id,
369+
state_filter=[
370+
AttributeState.UPLOADED.value,
371+
AttributeState.USABLE.value,
372+
AttributeState.RUNNING.value,
373+
],
374+
)
354375
tokenizer = get_tokenizer_by_project(project_id)
355376
record_item = record.get(project_id, record_id)
356377
columns = []

0 commit comments

Comments
 (0)