Skip to content

Commit b41a1d6

Browse files
anmarhindiLennartSchmidtKernlumburovskalina
authored
Question Playground (#288)
* Initial playground * Add playground route and search question model * initial eval set endpoint * Upgrade alembic version for eval set * similar records * restructure * Remove alembic upgrade due to db arch changes * dummy endpoints * adds_refinery_playground_evaluation * Remove old matching set * correct names * evaluation runs * sub ref * Add getters to playground manager * renaming. init run * improve results * creation * init run * simple contains search * user fix * fix search, expand * fix formato, add limit * limit * Remove unused auth manager * Add records batch endpoint * fix evaluation run * Add Evaluation set by group endpoint * Add auth check for project access endpoints * delete endpoints * Get evaluation run by id * change result data * snake case results * fix naming * filter * Fixed evaluation run by id request * Delete evaluation runs * fix recrods * concurrency and limits * threshold * threshold run * user id fix * reformulation prompt * requirements * change output * api key dyn * Simple error handling for reformulation * group fetch * improve record fetching * record batch * simplifiy args * fix exception * Remove redundant check, delegate to json loads raise * Add playground question model * Change endpoint handling of saveQuestion * Add initial get playground questions endpoint * sub ref * sub ref * Delegate save question to neural search * Update model and manager * sub ref * Enable project access check * list comp * Use adjusted run state from enum * Rename base uri * eval run function * Resolve tensor via connector * Get tensor for text via connector * Remove unused import * Get enum value for state * sub ref * sub ref * sub ref * sub ref * Add traceback to exception handling * Deleting questions and order desc * sub ref --------- Co-authored-by: LennartSchmidtKern <[email protected]> Co-authored-by: Lina <[email protected]>
1 parent 7751b74 commit b41a1d6

File tree

15 files changed

+946
-11
lines changed

15 files changed

+946
-11
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""adds_playground_question
2+
3+
Revision ID: 0c8eb3ff1c71
4+
Revises: ac97442726d2
5+
Create Date: 2025-02-04 09:48:41.971287
6+
7+
"""
8+
9+
from alembic import op
10+
import sqlalchemy as sa
11+
from sqlalchemy.dialects import postgresql
12+
13+
# revision identifiers, used by Alembic.
14+
revision = "0c8eb3ff1c71"
15+
down_revision = "ac97442726d2"
16+
branch_labels = None
17+
depends_on = None
18+
19+
20+
def upgrade():
21+
# ### commands auto generated by Alembic - please adjust! ###
22+
op.create_table(
23+
"playground_question",
24+
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
25+
sa.Column("question", sa.String(), nullable=True),
26+
sa.Column("created_at", sa.DateTime(), nullable=True),
27+
sa.Column("project_id", postgresql.UUID(as_uuid=True), nullable=True),
28+
sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"),
29+
sa.PrimaryKeyConstraint("id"),
30+
)
31+
op.create_index(
32+
op.f("ix_playground_question_project_id"),
33+
"playground_question",
34+
["project_id"],
35+
unique=False,
36+
)
37+
# ### end Alembic commands ###
38+
39+
40+
def downgrade():
41+
# ### commands auto generated by Alembic - please adjust! ###
42+
op.drop_index(
43+
op.f("ix_playground_question_project_id"), table_name="playground_question"
44+
)
45+
op.drop_table("playground_question")
46+
# ### end Alembic commands ###
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""adds_refinery_playground_evaluation
2+
3+
Revision ID: ac97442726d2
4+
Revises: eb5ecbee5090
5+
Create Date: 2025-01-16 13:26:43.059523
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
from sqlalchemy.dialects import postgresql
11+
12+
# revision identifiers, used by Alembic.
13+
revision = 'ac97442726d2'
14+
down_revision = 'eb5ecbee5090'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.create_table('evaluation_group',
22+
sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
23+
sa.Column('name', sa.String(), nullable=True),
24+
sa.Column('created_at', sa.DateTime(), nullable=True),
25+
sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True),
26+
sa.Column('project_id', postgresql.UUID(as_uuid=True), nullable=True),
27+
sa.Column('evaluation_set_ids', sa.JSON(), nullable=True),
28+
sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'),
29+
sa.ForeignKeyConstraint(['project_id'], ['project.id'], ondelete='CASCADE'),
30+
sa.PrimaryKeyConstraint('id')
31+
)
32+
op.create_index(op.f('ix_evaluation_group_created_by'), 'evaluation_group', ['created_by'], unique=False)
33+
op.create_index(op.f('ix_evaluation_group_project_id'), 'evaluation_group', ['project_id'], unique=False)
34+
op.create_table('evaluation_set',
35+
sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
36+
sa.Column('question', sa.String(), nullable=True),
37+
sa.Column('created_at', sa.DateTime(), nullable=True),
38+
sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True),
39+
sa.Column('project_id', postgresql.UUID(as_uuid=True), nullable=True),
40+
sa.Column('record_ids', sa.JSON(), nullable=True),
41+
sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'),
42+
sa.ForeignKeyConstraint(['project_id'], ['project.id'], ondelete='CASCADE'),
43+
sa.PrimaryKeyConstraint('id')
44+
)
45+
op.create_index(op.f('ix_evaluation_set_created_by'), 'evaluation_set', ['created_by'], unique=False)
46+
op.create_index(op.f('ix_evaluation_set_project_id'), 'evaluation_set', ['project_id'], unique=False)
47+
op.create_table('evaluation_run',
48+
sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
49+
sa.Column('evaluation_group_id', postgresql.UUID(as_uuid=True), nullable=True),
50+
sa.Column('created_at', sa.DateTime(), nullable=True),
51+
sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True),
52+
sa.Column('project_id', postgresql.UUID(as_uuid=True), nullable=True),
53+
sa.Column('embedding_id', postgresql.UUID(as_uuid=True), nullable=True),
54+
sa.Column('state', sa.String(), nullable=True),
55+
sa.Column('results', sa.JSON(), nullable=True),
56+
sa.Column('meta_info', sa.JSON(), nullable=True),
57+
sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'),
58+
sa.ForeignKeyConstraint(['embedding_id'], ['embedding.id'], ondelete='SET NULL'),
59+
sa.ForeignKeyConstraint(['evaluation_group_id'], ['evaluation_group.id'], ondelete='SET NULL'),
60+
sa.ForeignKeyConstraint(['project_id'], ['project.id'], ondelete='CASCADE'),
61+
sa.PrimaryKeyConstraint('id')
62+
)
63+
op.create_index(op.f('ix_evaluation_run_created_by'), 'evaluation_run', ['created_by'], unique=False)
64+
op.create_index(op.f('ix_evaluation_run_embedding_id'), 'evaluation_run', ['embedding_id'], unique=False)
65+
op.create_index(op.f('ix_evaluation_run_evaluation_group_id'), 'evaluation_run', ['evaluation_group_id'], unique=False)
66+
op.create_index(op.f('ix_evaluation_run_project_id'), 'evaluation_run', ['project_id'], unique=False)
67+
# ### end Alembic commands ###
68+
69+
70+
def downgrade():
71+
# ### commands auto generated by Alembic - please adjust! ###
72+
op.drop_index(op.f('ix_evaluation_run_project_id'), table_name='evaluation_run')
73+
op.drop_index(op.f('ix_evaluation_run_evaluation_group_id'), table_name='evaluation_run')
74+
op.drop_index(op.f('ix_evaluation_run_embedding_id'), table_name='evaluation_run')
75+
op.drop_index(op.f('ix_evaluation_run_created_by'), table_name='evaluation_run')
76+
op.drop_table('evaluation_run')
77+
op.drop_index(op.f('ix_evaluation_set_project_id'), table_name='evaluation_set')
78+
op.drop_index(op.f('ix_evaluation_set_created_by'), table_name='evaluation_set')
79+
op.drop_table('evaluation_set')
80+
op.drop_index(op.f('ix_evaluation_group_project_id'), table_name='evaluation_group')
81+
op.drop_index(op.f('ix_evaluation_group_created_by'), table_name='evaluation_group')
82+
op.drop_table('evaluation_group')
83+
# ### end Alembic commands ###

app.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from fast_api.routes.weak_supervision import router as weak_supervision_router
3131
from fast_api.routes.labeling_tasks import router as labeling_tasks_router
3232
from fast_api.routes.task_execution import router as task_execution_router
33+
from fast_api.routes.playground import router as playground_router
3334
from middleware.database_session import handle_db_session
3435
from middleware.starlette_tmp_middleware import DatabaseSessionHandler
3536
from starlette.applications import Starlette
@@ -55,6 +56,7 @@
5556
PREFIX_WEAK_SUPERVISION,
5657
PREFIX_LABELING_TASKS,
5758
PREFIX_TASK_EXECUTION,
59+
PREFIX_PLAYGROUND,
5860
)
5961
from util import security, clean_up
6062
from middleware import log_storage
@@ -106,7 +108,9 @@
106108
fastapi_app.include_router(
107109
labeling_tasks_router, prefix=PREFIX_LABELING_TASKS, tags=["labeling-tasks"]
108110
)
109-
111+
fastapi_app.include_router(
112+
playground_router, prefix=PREFIX_PLAYGROUND, tags=["playground"]
113+
)
110114
fastapi_app_internal = FastAPI()
111115
fastapi_app_internal.include_router(
112116
task_execution_router, prefix=PREFIX_TASK_EXECUTION, tags=["task-execution"]

controller/embedding/connector.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,17 @@
55
from util import service_requests
66
import requests
77

8-
BASE_URI = os.getenv("EMBEDDING_SERVICE")
8+
EMBEDDING_BASE_URI = os.getenv("EMBEDDING_SERVICE")
99
NEURAL_SEARCH_BASE_URI = os.getenv("NEURAL_SEARCH")
1010

1111

1212
def request_listing_recommended_encoders() -> Any:
13-
url = f"{BASE_URI}/classification/recommend/TEXT" # TODO does here have to be a data type?
13+
url = f"{EMBEDDING_BASE_URI}/classification/recommend/TEXT"
1414
return service_requests.get_call_or_raise(url)
1515

1616

1717
def request_embedding(project_id: str, embedding_id: str) -> Any:
18-
url = f"{BASE_URI}/embed"
18+
url = f"{EMBEDDING_BASE_URI}/embed"
1919
data = {
2020
"project_id": str(project_id),
2121
"embedding_id": str(embedding_id),
@@ -24,12 +24,12 @@ def request_embedding(project_id: str, embedding_id: str) -> Any:
2424

2525

2626
def request_deleting_embedding(project_id: str, embedding_id: str) -> Any:
27-
url = f"{BASE_URI}/delete/{project_id}/{embedding_id}"
27+
url = f"{EMBEDDING_BASE_URI}/delete/{project_id}/{embedding_id}"
2828
return service_requests.delete_call_or_raise(url)
2929

3030

3131
def request_tensor_upload(project_id: str, embedding_id: str) -> None:
32-
url = f"{BASE_URI}/upload_tensor_data/{project_id}/{embedding_id}"
32+
url = f"{EMBEDDING_BASE_URI}/upload_tensor_data/{project_id}/{embedding_id}"
3333
service_requests.post_call_or_raise(url, {})
3434

3535

@@ -39,7 +39,7 @@ def request_re_embed_records(
3939
# example changes structure:
4040
# {"<embedding_id>":[{"record_id":"<record_id>","attribute_name":"<attribute_name>","sub_key":"<sub_key>"}]}
4141
# note that sub_key is optional and only for embedding lists relevant
42-
url = f"{BASE_URI}/re_embed_records/{project_id}"
42+
url = f"{EMBEDDING_BASE_URI}/re_embed_records/{project_id}"
4343
service_requests.post_call_or_raise(url, {"changes": changes})
4444

4545

@@ -96,3 +96,37 @@ def delete_embedding_from_neural_search(embedding_id: str) -> None:
9696
url = f"{NEURAL_SEARCH_BASE_URI}/delete_collection"
9797
params = {"embedding_id": embedding_id}
9898
requests.put(url, params=params)
99+
100+
101+
def request_tensor_for_text(
102+
refinery_project_id: str, embedding_id: str, texts: List[str]
103+
) -> Any:
104+
url = (
105+
f"{EMBEDDING_BASE_URI}/calc-tensor-by-pkl/{refinery_project_id}/{embedding_id}"
106+
)
107+
data = {
108+
"texts": texts,
109+
}
110+
return service_requests.post_call_or_raise(url, data)
111+
112+
113+
def request_most_similar_records(
114+
project_id: str,
115+
embedding_id: str,
116+
embedding_tensor: List[float],
117+
limit: int,
118+
similarity_filter_option: Optional[List[Dict[str, Any]]] = None,
119+
threshold: Optional[float] = None,
120+
question: Optional[str] = None,
121+
) -> Any:
122+
url = f"{NEURAL_SEARCH_BASE_URI}/most_similar_by_embedding?include_scores=true"
123+
data = {
124+
"project_id": project_id,
125+
"embedding_id": embedding_id,
126+
"embedding_tensor": embedding_tensor,
127+
"limit": limit,
128+
"att_filter": similarity_filter_option,
129+
"threshold": threshold,
130+
"question": question,
131+
}
132+
return service_requests.post_call_or_raise(url, data)

controller/playground/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)