Skip to content

Commit 3a31dcc

Browse files
committed
Automatically issue persistent public token IDs for tokens in annotation resources
Fixes #1418
1 parent 60f5f58 commit 3a31dcc

File tree

4 files changed

+69
-1
lines changed

4 files changed

+69
-1
lines changed

Tekst-API/openapi.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18058,6 +18058,20 @@
1805818058
},
1805918059
"TextAnnotationToken": {
1806018060
"properties": {
18061+
"id": {
18062+
"anyOf": [
18063+
{
18064+
"type": "null"
18065+
},
18066+
{
18067+
"type": "string",
18068+
"maxLength": 256,
18069+
"minLength": 1
18070+
}
18071+
],
18072+
"title": "Id",
18073+
"description": "Unique ID of the token (will be generated if unset)"
18074+
},
1806118075
"annotations": {
1806218076
"items": {
1806318077
"$ref": "#/components/schemas/TextAnnotationEntry"

Tekst-API/tekst/resources/text_annotation.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
import csv
2+
import random
3+
import string
24

35
from collections.abc import Callable
46
from datetime import UTC, datetime
57
from pathlib import Path
68
from typing import Annotated, Any, Literal
79
from uuid import uuid4
810

11+
from beanie.operators import Eq
912
from pydantic import BeforeValidator, Field
1013

1114
from tekst.logs import log, log_op_end, log_op_start
@@ -126,7 +129,7 @@ def rtype_es_queries(
126129
es_queries = []
127130
strict_suffix = ".strict" if strict else ""
128131
res_id = str(query.common.resource_id)
129-
q_id = str(uuid4())
132+
q_id = uuid4().hex
130133

131134
annos_usr_q = query.resource_type_specific.annotations or []
132135
tokens_field_path = f"resources.{res_id}.tokens"
@@ -300,6 +303,7 @@ async def _export_csv(
300303
"LOCATION",
301304
"SORT",
302305
"POSITION",
306+
"TOKEN_ID",
303307
*anno_keys,
304308
"AUTHORS_COMMENT",
305309
"EDITORS_COMMENTS",
@@ -324,6 +328,7 @@ async def _export_csv(
324328
full_loc_labels.get(str(content.location_id), ""),
325329
sort_num,
326330
i,
331+
token.id,
327332
*csv_annos,
328333
content.authors_comment,
329334
editors_comments,
@@ -495,12 +500,35 @@ async def _update_aggregations(
495500
precomp_doc.created_at = datetime.now(UTC)
496501
await precomp_doc.save()
497502

503+
async def _ensure_token_ids(self):
504+
"""Checks if all tokens have a token_id annotation, and if not, adds one"""
505+
text_slug = None
506+
alphabet = string.ascii_lowercase + string.ascii_uppercase + string.digits
507+
async for content in ContentBaseDocument.find(
508+
Eq(ContentBaseDocument.resource_id, self.id),
509+
with_children=True,
510+
):
511+
dirty = False
512+
for token in content.tokens:
513+
if not token.id:
514+
if not text_slug:
515+
text_doc = await TextDocument.get(self.text_id)
516+
text_slug = text_doc.slug
517+
pre = f"{text_slug}_{self.id}_"
518+
suff = "".join(random.choices(alphabet, k=8))
519+
token.id = pre + suff
520+
dirty = True
521+
if dirty:
522+
await content.save()
523+
498524
async def resource_precompute_hook(
499525
self,
500526
*,
501527
force: bool = False,
502528
) -> None:
503529
await super().resource_precompute_hook(force=force)
530+
531+
# update aggregations
504532
op_id = log_op_start(f"Generate aggregations for resource {str(self.id)}")
505533
try:
506534
await self._update_aggregations(force=force)
@@ -509,6 +537,11 @@ async def resource_precompute_hook(
509537
raise e
510538
log_op_end(op_id)
511539

540+
# ensure token IDs
541+
op_id = log_op_start(f"Ensure token IDs for resource {str(self.id)}")
542+
await self._ensure_token_ids()
543+
log_op_end(op_id)
544+
512545

513546
type TextAnnotationValue = Annotated[
514547
ConStr(max_length=256, cleanup="oneline"),
@@ -538,6 +571,15 @@ class TextAnnotationEntry(ModelBase):
538571

539572

540573
class TextAnnotationToken(ModelBase):
574+
id: Annotated[
575+
ConStrOrNone(
576+
max_length=256,
577+
cleanup="oneline",
578+
),
579+
Field(
580+
description="Unique ID of the token (will be generated if unset)",
581+
),
582+
] = None
541583
annotations: Annotated[
542584
list[TextAnnotationEntry],
543585
Field(

Tekst-Web/src/api/schema.d.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6282,6 +6282,11 @@ export interface components {
62826282
};
62836283
/** TextAnnotationToken */
62846284
TextAnnotationToken: {
6285+
/**
6286+
* Id
6287+
* @description Unique ID of the token (will be generated if unset)
6288+
*/
6289+
id?: null | string;
62856290
/**
62866291
* Annotations
62876292
* @description List of annotations on a token

Tekst-Web/src/components/content/TextAnnotationContent.vue

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ interface AnnotationDisplay {
5353
}
5454
5555
interface TokenDetails {
56+
id?: string;
5657
form: string;
5758
comment?: string;
5859
annotations?: {
@@ -300,6 +301,7 @@ const contents = computed(() => {
300301
return {
301302
...c,
302303
tokens: c.tokens.map((t, i) => ({
304+
id: t.id,
303305
form:
304306
t.annotations
305307
.find((a) => a.key === 'form')
@@ -355,6 +357,7 @@ function handleTokenClick(token: Token) {
355357
if (!token.annotations.length) return;
356358
const annos = token.annotations.filter((a) => a.key !== 'comment');
357359
tokenDetails.value = {
360+
id: token.id ?? undefined,
358361
form:
359362
token.annotations
360363
.find((a) => a.key === 'form')
@@ -622,6 +625,10 @@ function generatePlaintextAnno(): string {
622625
</template>
623626
</template>
624627
</n-table>
628+
629+
<div v-if="tokenDetails?.id" class="mt-lg text-mini translucent" style="text-align: center">
630+
<b>ID:</b> {{ tokenDetails.id }}
631+
</div>
625632
</generic-modal>
626633

627634
<n-dropdown

0 commit comments

Comments
 (0)