Automatically issue persistent public token IDs for tokens in annotation resources

bkis · bkis · commit 3a31dccc91a8 · 2026-01-08T16:12:31.000+01:00
Fixes #1418
diff --git a/Tekst-API/openapi.json b/Tekst-API/openapi.json
@@ -18058,6 +18058,20 @@
       },
       "TextAnnotationToken": {
         "properties": {
+          "id": {
+            "anyOf": [
+              {
+                "type": "null"
+              },
+              {
+                "type": "string",
+                "maxLength": 256,
+                "minLength": 1
+              }
+            ],
+            "title": "Id",
+            "description": "Unique ID of the token (will be generated if unset)"
+          },
           "annotations": {
             "items": {
               "$ref": "#/components/schemas/TextAnnotationEntry"
diff --git a/Tekst-API/tekst/resources/text_annotation.py b/Tekst-API/tekst/resources/text_annotation.py
@@ -1,11 +1,14 @@
 import csv
+import random
+import string
 
 from collections.abc import Callable
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import Annotated, Any, Literal
 from uuid import uuid4
 
+from beanie.operators import Eq
 from pydantic import BeforeValidator, Field
 
 from tekst.logs import log, log_op_end, log_op_start
@@ -126,7 +129,7 @@ def rtype_es_queries(
         es_queries = []
         strict_suffix = ".strict" if strict else ""
         res_id = str(query.common.resource_id)
-        q_id = str(uuid4())
+        q_id = uuid4().hex
 
         annos_usr_q = query.resource_type_specific.annotations or []
         tokens_field_path = f"resources.{res_id}.tokens"
@@ -300,6 +303,7 @@ async def _export_csv(
                     "LOCATION",
                     "SORT",
                     "POSITION",
+                    "TOKEN_ID",
                     *anno_keys,
                     "AUTHORS_COMMENT",
                     "EDITORS_COMMENTS",
@@ -324,6 +328,7 @@ async def _export_csv(
                             full_loc_labels.get(str(content.location_id), ""),
                             sort_num,
                             i,
+                            token.id,
                             *csv_annos,
                             content.authors_comment,
                             editors_comments,
@@ -495,12 +500,35 @@ async def _update_aggregations(
         precomp_doc.created_at = datetime.now(UTC)
         await precomp_doc.save()
 
+    async def _ensure_token_ids(self):
+        """Checks if all tokens have a token_id annotation, and if not, adds one"""
+        text_slug = None
+        alphabet = string.ascii_lowercase + string.ascii_uppercase + string.digits
+        async for content in ContentBaseDocument.find(
+            Eq(ContentBaseDocument.resource_id, self.id),
+            with_children=True,
+        ):
+            dirty = False
+            for token in content.tokens:
+                if not token.id:
+                    if not text_slug:
+                        text_doc = await TextDocument.get(self.text_id)
+                        text_slug = text_doc.slug
+                    pre = f"{text_slug}_{self.id}_"
+                    suff = "".join(random.choices(alphabet, k=8))
+                    token.id = pre + suff
+                    dirty = True
+            if dirty:
+                await content.save()
+
     async def resource_precompute_hook(
         self,
         *,
         force: bool = False,
     ) -> None:
         await super().resource_precompute_hook(force=force)
+
+        # update aggregations
         op_id = log_op_start(f"Generate aggregations for resource {str(self.id)}")
         try:
             await self._update_aggregations(force=force)
@@ -509,6 +537,11 @@ async def resource_precompute_hook(
             raise e
         log_op_end(op_id)
 
+        # ensure token IDs
+        op_id = log_op_start(f"Ensure token IDs for resource {str(self.id)}")
+        await self._ensure_token_ids()
+        log_op_end(op_id)
+
 
 type TextAnnotationValue = Annotated[
     ConStr(max_length=256, cleanup="oneline"),
@@ -538,6 +571,15 @@ class TextAnnotationEntry(ModelBase):
 
 
 class TextAnnotationToken(ModelBase):
+    id: Annotated[
+        ConStrOrNone(
+            max_length=256,
+            cleanup="oneline",
+        ),
+        Field(
+            description="Unique ID of the token (will be generated if unset)",
+        ),
+    ] = None
     annotations: Annotated[
         list[TextAnnotationEntry],
         Field(
diff --git a/Tekst-Web/src/api/schema.d.ts b/Tekst-Web/src/api/schema.d.ts
@@ -6282,6 +6282,11 @@ export interface components {
     };
     /** TextAnnotationToken */
     TextAnnotationToken: {
+      /**
+       * Id
+       * @description Unique ID of the token (will be generated if unset)
+       */
+      id?: null | string;
       /**
        * Annotations
        * @description List of annotations on a token
diff --git a/Tekst-Web/src/components/content/TextAnnotationContent.vue b/Tekst-Web/src/components/content/TextAnnotationContent.vue
@@ -53,6 +53,7 @@ interface AnnotationDisplay {
 }
 
 interface TokenDetails {
+  id?: string;
   form: string;
   comment?: string;
   annotations?: {
@@ -300,6 +301,7 @@ const contents = computed(() => {
       return {
         ...c,
         tokens: c.tokens.map((t, i) => ({
+          id: t.id,
           form:
             t.annotations
               .find((a) => a.key === 'form')
@@ -355,6 +357,7 @@ function handleTokenClick(token: Token) {
   if (!token.annotations.length) return;
   const annos = token.annotations.filter((a) => a.key !== 'comment');
   tokenDetails.value = {
+    id: token.id ?? undefined,
     form:
       token.annotations
         .find((a) => a.key === 'form')
@@ -622,6 +625,10 @@ function generatePlaintextAnno(): string {
           </template>
         </template>
       </n-table>
+
+      <div v-if="tokenDetails?.id" class="mt-lg text-mini translucent" style="text-align: center">
+        <b>ID:</b> {{ tokenDetails.id }}
+      </div>
     </generic-modal>
 
     <n-dropdown