Skip to content

Commit 90e0944

Browse files
[DOP-27352] Add GET /v1/tags endpoint (#289)
1 parent 43473b0 commit 90e0944

36 files changed

+966
-63
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ db-cleanup-partitions: ##@DB Clean partitions
6666
db-cleanup-partitions-ci: ##@DB Clean partitions in CI
6767
${PYTHON} -m data_rentgen.db.scripts.cleanup_partitions $(ARGS)
6868
db-views: ##@DB Create views
69-
${POETRY} run coveratge run python -m data_rentgen.db.scripts.refresh_analytic_views $(ARGS)
69+
${POETRY} run coverage run python -m data_rentgen.db.scripts.refresh_analytic_views $(ARGS)
7070

7171
db-seed: ##@DB Seed database with random data
7272
${PYTHON} -m data_rentgen.db.scripts.seed $(ARGS)
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# SPDX-FileCopyrightText: 2024-2025 MTS PJSC
2+
# SPDX-License-Identifier: Apache-2.0
3+
"""Add search for tags
4+
5+
Revision ID: 52fb9d8765fd
6+
Revises: 386ed362284c
7+
Create Date: 2025-08-17 16:46:39.203240
8+
9+
"""
10+
11+
import sqlalchemy as sa
12+
from alembic import op
13+
from sqlalchemy.dialects import postgresql
14+
15+
# revision identifiers, used by Alembic.
16+
revision = "52fb9d8765fd"
17+
down_revision = "386ed362284c"
18+
branch_labels = None
19+
depends_on = None
20+
21+
22+
def upgrade() -> None:
23+
op.create_unique_constraint(op.f("uq__tag__name"), "tag", ["name"])
24+
op.add_column(
25+
"tag",
26+
sa.Column(
27+
"search_vector",
28+
postgresql.TSVECTOR(),
29+
sa.Computed(
30+
"to_tsvector('simple'::regconfig, name || ' ' || (translate(name, '.', ' ')))",
31+
persisted=True,
32+
),
33+
nullable=False,
34+
),
35+
)
36+
op.create_index("ix__tag__search_vector", "tag", ["search_vector"], unique=False, postgresql_using="gin")
37+
op.add_column(
38+
"tag_value",
39+
sa.Column(
40+
"search_vector",
41+
postgresql.TSVECTOR(),
42+
sa.Computed(
43+
"to_tsvector('simple'::regconfig, value || ' ' || (translate(value, '.', ' ')))",
44+
persisted=True,
45+
),
46+
nullable=False,
47+
),
48+
)
49+
op.create_index(
50+
"ix__tag_value__search_vector",
51+
"tag_value",
52+
["search_vector"],
53+
unique=False,
54+
postgresql_using="gin",
55+
)
56+
57+
58+
def downgrade() -> None:
59+
op.drop_index("ix__tag_value__search_vector", table_name="tag_value", postgresql_using="gin")
60+
op.drop_column("tag_value", "search_vector")
61+
op.drop_index("ix__tag__search_vector", table_name="tag", postgresql_using="gin")
62+
op.drop_column("tag", "search_vector")
63+
op.drop_constraint(op.f("uq__tag__name"), "tag", type_="unique")

data_rentgen/db/models/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from data_rentgen.db.models.column_lineage import ColumnLineage
77
from data_rentgen.db.models.custom_properties import CustomProperties
88
from data_rentgen.db.models.custom_user_properties import CustomUserProperties
9-
from data_rentgen.db.models.dataset import Dataset
9+
from data_rentgen.db.models.dataset import Dataset, dataset_tags_table
1010
from data_rentgen.db.models.dataset_column_relation import (
1111
DatasetColumnRelation,
1212
DatasetColumnRelationType,
@@ -55,4 +55,5 @@
5555
"Tag",
5656
"TagValue",
5757
"User",
58+
"dataset_tags_table",
5859
]

data_rentgen/db/models/tag.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,37 @@
22
# SPDX-License-Identifier: Apache-2.0
33
from __future__ import annotations
44

5-
from sqlalchemy import BigInteger, String
6-
from sqlalchemy.orm import Mapped, mapped_column
5+
from typing import TYPE_CHECKING
6+
7+
from sqlalchemy import BigInteger, Computed, Index, String
8+
from sqlalchemy.dialects.postgresql import TSVECTOR
9+
from sqlalchemy.orm import Mapped, mapped_column, relationship
710

811
from data_rentgen.db.models.base import Base
912

13+
if TYPE_CHECKING:
14+
from data_rentgen.db.models.tag_value import TagValue
15+
1016

1117
class Tag(Base):
1218
__tablename__ = "tag"
19+
__table_args__ = (Index("ix__tag__search_vector", "search_vector", postgresql_using="gin"),)
1320

1421
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
15-
name: Mapped[str] = mapped_column(String(32), nullable=False)
22+
name: Mapped[str] = mapped_column(String(32), nullable=False, unique=True)
23+
tag_values: Mapped[list[TagValue]] = relationship(
24+
"TagValue",
25+
lazy="noload",
26+
back_populates="tag",
27+
order_by="TagValue.value",
28+
)
29+
search_vector: Mapped[str] = mapped_column(
30+
TSVECTOR,
31+
Computed(
32+
"to_tsvector('simple'::regconfig, name || ' ' || translate(name, '.', ' '))",
33+
persisted=True,
34+
),
35+
nullable=False,
36+
deferred=True,
37+
doc="Full-text search vector for tag name",
38+
)

data_rentgen/db/models/tag_value.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
# SPDX-License-Identifier: Apache-2.0
33
from __future__ import annotations
44

5-
from sqlalchemy import BigInteger, ForeignKey, String, UniqueConstraint
5+
from sqlalchemy import BigInteger, Computed, ForeignKey, Index, String, UniqueConstraint
6+
from sqlalchemy.dialects.postgresql import TSVECTOR
67
from sqlalchemy.orm import Mapped, mapped_column, relationship
78

89
from data_rentgen.db.models.base import Base
@@ -11,7 +12,10 @@
1112

1213
class TagValue(Base):
1314
__tablename__ = "tag_value"
14-
__table_args__ = (UniqueConstraint("tag_id", "value"),)
15+
__table_args__ = (
16+
UniqueConstraint("tag_id", "value"),
17+
Index("ix__tag_value__search_vector", "search_vector", postgresql_using="gin"),
18+
)
1519

1620
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
1721
tag_id: Mapped[int] = mapped_column(
@@ -24,6 +28,18 @@ class TagValue(Base):
2428
tag: Mapped[Tag] = relationship(
2529
Tag,
2630
lazy="noload",
31+
back_populates="tag_values",
2732
foreign_keys=[tag_id],
2833
)
2934
value: Mapped[str] = mapped_column(String(256), nullable=False)
35+
36+
search_vector: Mapped[str] = mapped_column(
37+
TSVECTOR,
38+
Computed(
39+
"to_tsvector('simple'::regconfig, value || ' ' || translate(value, '.', ' '))",
40+
persisted=True,
41+
),
42+
nullable=False,
43+
deferred=True,
44+
doc="Full-text search vector for tag value",
45+
)

data_rentgen/db/repositories/dataset.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
any_,
1212
asc,
1313
desc,
14+
distinct,
1415
func,
1516
select,
1617
union,
@@ -42,12 +43,25 @@ async def paginate(
4243
page: int,
4344
page_size: int,
4445
dataset_ids: Collection[int],
46+
tag_value_ids: Collection[int],
4547
search_query: str | None,
4648
) -> PaginationDTO[Dataset]:
4749
where = []
4850
if dataset_ids:
4951
where.append(Dataset.id == any_(list(dataset_ids))) # type: ignore[arg-type]
5052

53+
if tag_value_ids:
54+
tv_ids = list(tag_value_ids)
55+
dataset_ids_subq = (
56+
select(Dataset.id)
57+
.join(Dataset.tags)
58+
.where(TagValue.id.in_(tv_ids))
59+
.group_by(Dataset.id)
60+
# If multiple tag values are passed, dataset should have both of them (AND, not OR)
61+
.having(func.count(distinct(TagValue.id)) == len(tv_ids))
62+
)
63+
where.append(Dataset.id.in_(dataset_ids_subq))
64+
5165
query: Select | CompoundSelect
5266
order_by: list[ColumnElement | SQLColumnExpression]
5367
if search_query:
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# SPDX-FileCopyrightText: 2024-2025 MTS PJSC
2+
# SPDX-License-Identifier: Apache-2.0
3+
from __future__ import annotations
4+
5+
from collections.abc import Collection
6+
7+
from sqlalchemy import ColumnElement, CompoundSelect, Select, SQLColumnExpression, any_, asc, desc, func, select
8+
from sqlalchemy.orm import selectinload
9+
10+
from data_rentgen.db.models.tag import Tag
11+
from data_rentgen.db.models.tag_value import TagValue
12+
from data_rentgen.db.repositories.base import Repository
13+
from data_rentgen.db.utils.search import make_tsquery, ts_match, ts_rank
14+
from data_rentgen.dto.pagination import PaginationDTO
15+
16+
17+
class TagRepository(Repository[Tag]):
18+
async def paginate(
19+
self,
20+
page: int,
21+
page_size: int,
22+
tag_ids: Collection[int],
23+
search_query: str | None,
24+
) -> PaginationDTO[Tag]:
25+
where = []
26+
if tag_ids:
27+
where.append(Tag.id == any_(list(tag_ids))) # type: ignore[arg-type]
28+
29+
query: Select | CompoundSelect
30+
order_by: list[ColumnElement | SQLColumnExpression]
31+
if search_query:
32+
tsquery = make_tsquery(search_query)
33+
34+
tag_stmt = select(Tag.id, Tag.name, ts_rank(Tag.search_vector, tsquery).label("search_rank")).where(
35+
ts_match(Tag.search_vector, tsquery),
36+
*where,
37+
)
38+
value_stmt = (
39+
select(Tag.id, Tag.name, ts_rank(TagValue.search_vector, tsquery).label("search_rank"))
40+
.join(TagValue, TagValue.tag_id == Tag.id)
41+
.where(ts_match(TagValue.search_vector, tsquery), *where)
42+
)
43+
union_cte = tag_stmt.union_all(value_stmt).cte("tag_union")
44+
query = select(
45+
union_cte.c.id,
46+
union_cte.c.name,
47+
func.max(union_cte.c.search_rank).label("search_rank"),
48+
).group_by(union_cte.c.id, union_cte.c.name)
49+
50+
order_by = [desc("search_rank"), asc("name")]
51+
else:
52+
query = select(Tag).where(*where)
53+
order_by = [Tag.name]
54+
55+
options = [
56+
selectinload(Tag.tag_values),
57+
]
58+
59+
return await self._paginate_by_query(
60+
query=query,
61+
order_by=order_by,
62+
options=options,
63+
page=page,
64+
page_size=page_size,
65+
)

data_rentgen/server/api/v1/router/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from data_rentgen.server.api.v1.router.operation import router as operation_router
1010
from data_rentgen.server.api.v1.router.personal_token import router as personal_token_router
1111
from data_rentgen.server.api.v1.router.run import router as run_router
12+
from data_rentgen.server.api.v1.router.tag import router as tag_router
1213
from data_rentgen.server.api.v1.router.user import router as user_router
1314

1415
router = APIRouter(prefix="/v1")
@@ -20,3 +21,4 @@
2021
router.include_router(run_router)
2122
router.include_router(user_router)
2223
router.include_router(personal_token_router)
24+
router.include_router(tag_router)

data_rentgen/server/api/v1/router/dataset.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ async def paginate_datasets(
3737
page=query_args.page,
3838
page_size=query_args.page_size,
3939
dataset_ids=query_args.dataset_id,
40+
tag_value_ids=query_args.tag_value_id,
4041
search_query=query_args.search_query,
4142
)
4243
return PageResponseV1[DatasetDetailedResponseV1].from_pagination(pagination)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# SPDX-FileCopyrightText: 2024-2025 MTS PJSC
2+
# SPDX-License-Identifier: Apache-2.0
3+
from typing import Annotated
4+
5+
from fastapi import APIRouter, Depends
6+
7+
from data_rentgen.db.models.user import User
8+
from data_rentgen.server.errors import get_error_responses
9+
from data_rentgen.server.errors.schemas import InvalidRequestSchema, NotAuthorizedRedirectSchema, NotAuthorizedSchema
10+
from data_rentgen.server.schemas.v1 import (
11+
PageResponseV1,
12+
TagDetailedResponseV1,
13+
)
14+
from data_rentgen.server.schemas.v1.tag import TagPaginateQueryV1
15+
from data_rentgen.server.services import get_user
16+
from data_rentgen.server.services.tag import TagService
17+
18+
router = APIRouter(
19+
prefix="/tags",
20+
tags=["Tags"],
21+
responses=get_error_responses(include={NotAuthorizedSchema, NotAuthorizedRedirectSchema, InvalidRequestSchema}),
22+
)
23+
24+
25+
@router.get("", summary="Paginated list of Tags")
26+
async def paginate_tags(
27+
query_args: Annotated[TagPaginateQueryV1, Depends()],
28+
tag_service: Annotated[TagService, Depends()],
29+
current_user: Annotated[User, Depends(get_user())],
30+
) -> PageResponseV1[TagDetailedResponseV1]:
31+
pagination = await tag_service.paginate(
32+
page=query_args.page,
33+
page_size=query_args.page_size,
34+
tag_ids=query_args.tag_id,
35+
search_query=query_args.search_query,
36+
)
37+
return PageResponseV1[TagDetailedResponseV1].from_pagination(pagination)

0 commit comments

Comments
 (0)