README NameGraph modes (#331)

Goader · web-flow · commit 1370d5ff52f9 · 2025-03-11T23:18:11.000+01:00
* added brief modes description

* removed mode from endpoints, where it is unused, added descriptions
diff --git a/collection_models.py b/collection_models.py
@@ -108,7 +108,9 @@ class CollectionSearchByString(BaseCollectionSearchWithOther):  # instant search
     query: str = Field(title='input query (with or without spaces) which is used to search for template collections',
                        description='can not contain dots (.)',
                        pattern='^[^.]+$', examples=['zeus god'])
-    mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
+    mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$',
+                      description='* if instant - Learning to Rank is using a window size of 20\n'
+                                  '* if domain_detail - Learning to Rank is using a window size of 100')
     sort_order: Literal[SortOrder.AZ, SortOrder.ZA, SortOrder.AI, SortOrder.RELEVANCE] = Field(SortOrder.AI, title='order of the resulting collections',
                         description='* if A-Z or Z-A - sort by title (alphabetically ascending/descending)\n'
                                     '* if AI - use intelligent endpoint-specific ranking (with Learning to Rank for optimal results)\n'
@@ -129,7 +131,6 @@ class CollectionCountByStringRequest(BaseCollectionRequest):
     query: str = Field(title='input query (with or without spaces) which is used to search for template collections',
                        description='can not contain dots (.)',
                        pattern='^[^.]+$', examples=['zeus god'])
-    mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
 
 
 # ======== Collection Membership ========
@@ -145,7 +146,6 @@ class CollectionsContainingLabelCountResponse(BaseCollectionQueryResponse):
 
 class CollectionsContainingLabelRequest(BaseCollectionSearchLimitOffsetSort):
     label: str = Field(title='label for which membership will be checked for each collection', examples=['zeus'])
-    mode: str = Field('instant', title='request mode: instant, domain_detail', pattern=r'^(instant|domain_detail)$')
     max_results: int = Field(3, ge=0, title='max number of collections to return (for each page)',
                  description='return collections at [offset, offset + max_results) positions (order as in sort_order)')
     sort_order: Literal[SortOrder.AZ, SortOrder.ZA, SortOrder.AI, SortOrder.RELEVANCE] = Field(SortOrder.AI, title='order of the resulting collections',
diff --git a/models.py b/models.py
@@ -70,7 +70,11 @@ class Params(BaseModel):
                                    examples=['us'])
     mode: str = Field('full', title='request mode: instant, domain_detail, full',
                       pattern=r'^(instant|domain_detail|full)$',
-                      description='for /grouped_by_category endpoint this field will be prefixed with "grouped_"')
+                      description='modifies global limits and sampling weights of different generators:\n'
+                                  '* instant - fastest response, basic generators only\n'
+                                  '* domain_detail - balanced speed/quality, expanded search\n'
+                                  '* full - comprehensive generation with all generators (recommended)\n'
+                                  '(for /grouped_by_category endpoint this field will be prefixed with "grouped_")')
     enable_learning_to_rank: bool = Field(True, title='enable learning to rank',
                                           description='if true, the results will be sorted by '
                                                       'learning to rank algorithm')
@@ -88,7 +92,11 @@ class GroupedParams(BaseModel):
     user_info: Optional[UserInfo] = Field(None, title='information about user making request')
     mode: str = Field('full', title='request mode: instant, domain_detail, full',
                       pattern=r'^(instant|domain_detail|full)$',
-                      description='for /grouped_by_category endpoint this field will be prefixed with "grouped_"')
+                      description='modifies global limits and sampling weights of different generators:\n'
+                                  '* instant - fastest response, basic generators only\n'
+                                  '* domain_detail - balanced speed/quality, expanded search\n'
+                                  '* full - comprehensive generation with all generators (recommended)\n'
+                                  '(for /grouped_by_category endpoint this field will be prefixed with "grouped_")')
     metadata: bool = Field(True, title='return all the metadata in response')
 
 
diff --git a/namegraph/xcollections/api_matcher.py b/namegraph/xcollections/api_matcher.py
@@ -100,7 +100,7 @@ def search_by_string(
             logger.error(f'Elasticsearch search failed [by-string]', exc_info=True)
             raise HTTPException(status_code=503, detail=str(ex)) from ex
 
-    def get_collections_count_by_string(self, query: str, mode: str) -> tuple[Union[int, str], dict]:
+    def get_collections_count_by_string(self, query: str) -> tuple[Union[int, str], dict]:
         tokenized_query = ' '.join(self.tokenizer.tokenize(query)[0])
         if tokenized_query != query:
             query = f'{query} {tokenized_query}'
diff --git a/readme.md b/readme.md
@@ -97,13 +97,13 @@ Authorize to ECR:
 
 Push image to ECR:
 
-`docker push 571094861812.dkr.ecr.us-east-1.amazonaws.com/name-generator:${TAG}
+`docker push 571094861812.dkr.ecr.us-east-1.amazonaws.com/name-generator:${TAG}`
 
 ## Deploy image on remote instance
 
 Set image TAG:
 
-`export TAG=0.1.0
+`export TAG=0.1.0`
 
 Authorize EC2 instance in ECR:
 
@@ -143,6 +143,29 @@ In `conf/pipelines/prod_new.yaml` are defined pipelines. Each pipeline have:
 
 Setting `0` in `mode_weights_multiplier` or `global_limits` disables the pipeline in a given mode.
 
+### Modes
+
+NameGraph supports three modes for processing requests:
+
+- Instant Mode (`instant`):
+  - Fastest response time
+  - More basic name generations
+  - Some advanced generators like W2VGenerator are disabled (weight multiplier = 0)
+  - Often used for real-time suggestions
+
+- Domain Detail Mode (`domain_detail`):
+  - Intermediate between instant and full
+  - More comprehensive than instant, but still optimized for performance
+  - Some generators have reduced weights compared to full mode
+  - Expanded search window for collection ranking and sampling
+
+- Full Mode (`full`):
+  - Most comprehensive name generation
+  - Includes all enabled generators
+  - Uses full weights for most generators
+  - Accesses advanced generators like `Wikipedia2VGenerator` and `W2VGenerator`
+  - Takes longer to process, but provides the most diverse results
+
 ### Sampler
 
 Each request defines:
diff --git a/web_api.py b/web_api.py
@@ -512,8 +512,7 @@ async def get_collections_count_by_string(query: CollectionCountByStringRequest)
         count = 0
         es_response_metadata = {'n_total_hits': 0}
     else:
-        count, es_response_metadata = collections_matcher.get_collections_count_by_string(query.query,
-                                                                                          mode=query.mode)
+        count, es_response_metadata = collections_matcher.get_collections_count_by_string(query.query)
 
     time_elapsed = (perf_counter() - t_before) * 1000