Skip to content

Commit a9bd549

Browse files
Byczongdjstrong
andauthored
feat: [sc-26070] Disable adding ".eth" suffix to suggestions (#327)
* remove adding .eth suffix for collection API * refactor models; add separate suggestion type for collection endpoints * disable appending .eth for all endpoints * refactor order; make separate request formatter * delete duplicated fields * fix .eth-related tests * fix deprecation warning * fix test * rename fields and classes in (collection_)models.py; adjust tests * rename --------- Co-authored-by: djstrong <djstrong@gmail.com>
1 parent 75783c6 commit a9bd549

23 files changed

+519
-390
lines changed

collection_models.py

Lines changed: 99 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
1+
from datetime import datetime
12
from typing import Optional, Literal, Union
2-
from namegraph.xcollections.query_builder import SortOrder
3-
from pydantic import BaseModel, Field, field_validator
4-
from pydantic_core.core_schema import FieldValidationInfo
3+
from pydantic import BaseModel, Field, PositiveInt, field_validator
4+
from pydantic_core.core_schema import ValidationInfo
55

6-
from models import UserInfo
6+
from namegraph.xcollections.query_builder import SortOrder
7+
from models import UserInfo, Metadata, RecursiveRelatedCollection
78

89

9-
class CollectionName(BaseModel):
10-
name: str = Field(title='name with `.eth`')
11-
namehash: str = Field(title='namehash of the name')
10+
class CollectionLabel(BaseModel):
11+
label: str = Field(title='label from a collection')
1212

1313

1414
class Collection(BaseModel):
1515
collection_id: str = Field(title='id of the collection')
1616
title: str = Field(title='title of the collection')
1717
owner: str = Field(title='ETH address of the collection owner')
18-
number_of_names: int = Field(title='total number of names in the collection')
18+
number_of_labels: int = Field(title='total number of labels in the collection')
1919
last_updated_timestamp: int = Field(title='timestamp in milliseconds of last collection update')
20-
top_names: list[CollectionName] = Field(
21-
title='top names stored in the collection (limited by `limit_names`)', description='can not be greater than 10')
20+
top_labels: list[CollectionLabel] = Field(
21+
title='top labels stored in the collection (limited by `limit_labels`)', description='can not be greater than 10')
2222
types: list[str] = Field(title='list of types to which the collection belongs',
2323
description='example of type is `human`')
2424
avatar_emoji: str = Field(title='avatar emoji associated with this collection')
@@ -49,7 +49,7 @@ class BaseCollectionRequest(BaseModel):
4949

5050

5151
class BaseCollectionSearchLimitOffsetSort(BaseCollectionRequest):
52-
limit_names: int = Field(10, ge=0, le=10, title='the number of names returned in each collection',
52+
limit_labels: int = Field(10, ge=0, le=10, title='the number of labels returned in each collection',
5353
description='can not be greater than 10')
5454
offset: int = Field(0,
5555
title='offset of the first collection to return (used for pagination)',
@@ -64,9 +64,9 @@ class BaseCollectionSearch(BaseCollectionSearchLimitOffsetSort):
6464
title='number of collections with the same type which are not penalized',
6565
description='* set to null if you want to disable the penalization\n'
6666
'* if the penalization algorithm is turned on then 3 times more results (than max_related_collections) are retrieved from Elasticsearch')
67-
name_diversity_ratio: Optional[float] = Field(None, examples=[0.5], ge=0.0, le=1.0,
68-
title='similarity value used for adding penalty to collections with similar names to other collections',
69-
description='* if more than name_diversity_ratio % of the names have already been used, penalize the collection\n'
67+
label_diversity_ratio: Optional[float] = Field(None, examples=[0.5], ge=0.0, le=1.0,
68+
title='similarity value used for adding penalty to collections with similar labels to other collections',
69+
description='* if more than label_diversity_ratio % of the labels have already been used, penalize the collection\n'
7070
'* set to null if you want disable the penalization\n'
7171
'* if the penalization algorithm is turned on then 3 times more results (than `max_related_collections`) '
7272
'are retrieved from Elasticsearch'
@@ -86,13 +86,15 @@ class BaseCollectionSearchWithOther(BaseCollectionSearch): # instant search, do
8686
'\nif not met, 422 status code is returned')
8787

8888
@field_validator('max_other_collections')
89-
def max_other_between_min_other_and_max_total(cls, v: int, info: FieldValidationInfo) -> int:
89+
@classmethod
90+
def max_other_between_min_other_and_max_total(cls, v: int, info: ValidationInfo) -> int:
9091
if 'min_other_collections' in info.data and info.data['min_other_collections'] > v:
9192
raise ValueError('min_other_collections must not be greater than max_other_collections')
9293
return v
9394

9495
@field_validator('max_total_collections')
95-
def max_related_between_min_other_and_max_total(cls, v: int, info: FieldValidationInfo) -> int:
96+
@classmethod
97+
def max_related_between_min_other_and_max_total(cls, v: int, info: ValidationInfo) -> int:
9698
if 'max_other_collections' in info.data and v < info.data['max_other_collections']:
9799
raise ValueError('max_other_collections must not be greater than max_total_collections')
98100
if 'min_other_collections' in info.data and 'max_related_collections' in info.data and \
@@ -129,18 +131,19 @@ class CollectionCountByStringRequest(BaseCollectionRequest):
129131
pattern='^[^.]+$', examples=['zeus god'])
130132
mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
131133

134+
132135
# ======== Collection Membership ========
133136

134-
class CollectionsContainingNameCountRequest(BaseCollectionRequest):
137+
class CollectionsContainingLabelCountRequest(BaseCollectionRequest):
135138
label: str = Field(title='label for which collection membership will be checked', examples=['zeus'])
136139

137140

138-
class CollectionsContainingNameCountResponse(BaseCollectionQueryResponse):
141+
class CollectionsContainingLabelCountResponse(BaseCollectionQueryResponse):
139142
count: Union[int, str] = Field(
140143
title='count of collections containing input label or `1000+` if more than 1000 results')
141144

142145

143-
class CollectionsContainingNameRequest(BaseCollectionSearchLimitOffsetSort):
146+
class CollectionsContainingLabelRequest(BaseCollectionSearchLimitOffsetSort):
144147
label: str = Field(title='label for which membership will be checked for each collection', examples=['zeus'])
145148
mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
146149
max_results: int = Field(3, ge=0, title='max number of collections to return (for each page)',
@@ -150,9 +153,85 @@ class CollectionsContainingNameRequest(BaseCollectionSearchLimitOffsetSort):
150153
'* if AI - use intelligent endpoint-specific ranking\n'
151154
'* if Relevance - use relevance ranking')
152155

153-
class CollectionsContainingNameResponse(BaseCollectionQueryResponse):
156+
157+
class CollectionsContainingLabelResponse(BaseCollectionQueryResponse):
154158
collections: list[Collection] = Field(title='list of public collections the provided label is a member of')
155159

156160

157161
class GetCollectionByIdRequest(BaseCollectionRequest):
158162
collection_id: str = Field(title='id of the collection to fetch', examples=['ri2QqxnAqZT7'])
163+
164+
165+
# ======== Suggestions from collections ========
166+
167+
class SuggestionFromCollection(BaseModel):
168+
label: str = Field(title="label from a collection")
169+
tokenized_label: list[str] = Field(title="suggested tokenization of label")
170+
metadata: Optional[Metadata] = Field(None, title="information how suggestion was generated",
171+
description="if metadata=False this key is absent")
172+
173+
174+
class CollectionWithSuggestions(BaseModel):
175+
suggestions: list[SuggestionFromCollection] = Field(title='suggestions from a collection')
176+
collection_id: str = Field(title='id of the collection')
177+
collection_title: str = Field(title='title of the collection')
178+
collection_members_count: int = Field(title='number of members in the collection')
179+
related_collections: list[RecursiveRelatedCollection] = Field(title='related collections to this collection')
180+
181+
182+
class SampleCollectionMembers(BaseModel):
183+
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
184+
collection_id: str = Field(title='id of the collection to sample from', examples=['qdeq7I9z0_jv'])
185+
metadata: bool = Field(True, title='return all the metadata in response')
186+
max_sample_size: int = Field(title='the maximum number of members to sample', ge=1, le=100,
187+
description='if the collection has less members than max_sample_size, '
188+
'all the members will be returned', examples=[5])
189+
seed: int = Field(default_factory=lambda: int(datetime.now().timestamp()),
190+
title='seed for random number generator',
191+
description='if not provided (but can\'t be null), random seed will be generated')
192+
193+
194+
class Top10CollectionMembersRequest(BaseModel):
195+
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
196+
collection_id: str = Field(title='id of the collection to fetch labels from', examples=['ri2QqxnAqZT7'])
197+
metadata: bool = Field(True, title='return all the metadata in response')
198+
max_recursive_related_collections: int = Field(3, ge=0, le=10,
199+
title='Set to 0 to disable the "recursive related collection search". '
200+
'When set to a value between 1 and 10, '
201+
'for each related collection we find, '
202+
'we also do a (depth 1 recursive) lookup for this many related collections '
203+
'to the related collection.')
204+
205+
206+
class ScrambleCollectionTokens(BaseModel):
207+
user_info: Optional[UserInfo] = Field(None, title='information about user making request')
208+
collection_id: str = Field(title='id of the collection to take tokens from', examples=['3OB_f2vmyuyp'])
209+
metadata: bool = Field(True, title='return all the metadata in response')
210+
method: Literal['left-right-shuffle', 'left-right-shuffle-with-unigrams', 'full-shuffle'] = \
211+
Field('left-right-shuffle-with-unigrams', title='method used to scramble tokens and generate new suggestions',
212+
description='* left-right-shuffle - tokenize labels as bigrams and shuffle the right-side tokens (do not use unigrams)'
213+
'\n* left-right-shuffle-with-unigrams - same as above, but with some tokens swapped with unigrams'
214+
'\n* full-shuffle - shuffle all tokens from bigrams and unigrams and create random bigrams')
215+
n_top_members: int = Field(25, title='number of collection\'s top members to include in scrambling', ge=1)
216+
max_suggestions: Optional[PositiveInt] = Field(10, title='maximal number of suggestions to generate',
217+
examples=[10], description='must be a positive integer or null\n* number of generated suggestions will be '
218+
'`max_suggestions` or less (exactly `max_suggestions` if there are enough members)\n'
219+
'* if null, no tokens are repeated')
220+
seed: int = Field(default_factory=lambda: int(datetime.now().timestamp()),
221+
title='seed for random number generator',
222+
description='if not provided (but can\'t be null), random seed will be generated')
223+
224+
225+
class FetchCollectionMembersRequest(BaseModel):
226+
collection_id: str = Field(
227+
title='id of the collection to fetch members from', examples=['ri2QqxnAqZT7']
228+
)
229+
offset: int = Field(
230+
0, title='number of members to skip', description='used for pagination', ge=0
231+
)
232+
limit: int = Field(
233+
10, title='maximum number of members to return', description='used for pagination', ge=1,
234+
)
235+
metadata: bool = Field(
236+
True, title='return all the metadata in response'
237+
)

conf/prod_config_new.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,5 +172,5 @@ collections:
172172
other_collections_path: data/collections_data/other_collections.json
173173
collections_limit: 3
174174
suggestions_limit: 25 # per one collections
175-
name_diversity_ratio: 0.5
175+
label_diversity_ratio: 0.5
176176
max_per_type: 2

conf/test_config_new.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,5 +175,5 @@ collections:
175175
other_collections_path: data/collections_data/other_collections.json
176176
collections_limit: 3
177177
suggestions_limit: 25 # per one collections
178-
name_diversity_ratio: 0.5
178+
label_diversity_ratio: 0.5
179179
max_per_type: 2

0 commit comments

Comments
 (0)