namehash
diff --git a/‎models.py‎
Lines changed: 10 additions & 10 deletions b/‎models.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎namegraph/generation/collection_generator.py‎
Lines changed: 1 addition & 1 deletion b/‎namegraph/generation/collection_generator.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎namegraph/xgenerator.py‎
Lines changed: 6 additions & 6 deletions b/‎namegraph/xgenerator.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎research/elasticsearch/generate-report-only-collections.py‎
Lines changed: 5 additions & 5 deletions b/‎research/elasticsearch/generate-report-only-collections.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎research/elasticsearch/generate.sh‎
Lines changed: 1 addition & 1 deletion b/‎research/elasticsearch/generate.sh‎
Lines changed: 1 addition & 1 deletion
@@ -75,8 +75,8 @@ class Params(BaseModel):
                                           description='if true, the results will be sorted by '
                                                       'learning to rank algorithm')
     label_diversity_ratio: Optional[float] = \
-        Field(0.5, examples=[0.5], ge=0.0, le=1.0, title='collection diversity parameter based on names',
-              description='adds penalty to collections with similar names to other collections\n'
+        Field(0.5, examples=[0.5], ge=0.0, le=1.0, title='collection diversity parameter based on labels',
+              description='adds penalty to collections with similar labels to other collections\n'
                           'if null, then no penalty will be added')
     max_per_type: Optional[int] = \
         Field(2, examples=[2], ge=1, title='collection diversity parameter based on collection types',
@@ -119,8 +119,8 @@ class RelatedCategoryParams(BaseModel):
     max_related_collections: int = Field(6, ge=0, le=10,
                                          title='max number of related collections returned. '
                                                'If 0 it effectively turns off any related collection search.')
-    max_names_per_related_collection: int = Field(10, ge=1, le=10,
-                                                  title='max number of names returned in any related collection')
+    max_labels_per_related_collection: int = Field(10, ge=1, le=10,
+                                                  title='max number of labels returned in any related collection')
     max_recursive_related_collections: int = Field(3, ge=0, le=10,
                                                    title='Set to 0 to disable the "recursive related collection search". '
                                                          'When set to a value between 1 and 10, '
@@ -131,8 +131,8 @@ class RelatedCategoryParams(BaseModel):
                                           description='if true, the results will be sorted by '
                                                       'learning to rank algorithm')
     label_diversity_ratio: Optional[float] = \
-        Field(0.5, examples=[0.5], ge=0.0, le=1.0, title='collection diversity parameter based on names',
-              description='adds penalty to collections with similar names to other collections\n'
+        Field(0.5, examples=[0.5], ge=0.0, le=1.0, title='collection diversity parameter based on labels',
+              description='adds penalty to collections with similar labels to other collections\n'
                           'if null, then no penalty will be added')
     max_per_type: Optional[int] = \
         Field(2, examples=[2], ge=1, title='collection diversity parameter based on collection types',
@@ -158,10 +158,10 @@ class GroupedLabelRequest(BaseModel):
                        description='* cannot contain dots (.)'
                                    '\n* if enclosed in double quotes assuming label is pre-tokenized')
 
-    # min_primary_fraction: float = Field(0.1, title='minimal fraction of primary names',
+    # min_primary_fraction: float = Field(0.1, title='minimal fraction of primary labels',
     #                                     ge=0.0, le=1.0,
     #                                     description='ensures at least `min_suggestions * min_primary_fraction` '
-    #                                                 'primary names will be generated')
+    #                                                 'primary labels will be generated')
     params: GroupedParams = Field(GroupedParams(), title='pipeline parameters',
                                   description='includes all the parameters for all nodes of the pipeline')
 
@@ -179,10 +179,10 @@ class LabelRequest(BaseModel):
                                  ge=1, le=generator.config.generation.limit)
     max_suggestions: int = Field(100, title='maximal number of suggestions to generate',
                                  ge=1)
-    min_primary_fraction: float = Field(0.1, title='minimal fraction of primary names',
+    min_primary_fraction: float = Field(0.1, title='minimal fraction of primary labels',
                                         ge=0.0, le=1.0,
                                         description='ensures at least `min_suggestions * min_primary_fraction` '
-                                                    'primary names will be generated')
+                                                    'primary labels will be generated')
     params: Optional[Params] = Field(None, title='pipeline parameters',
                                      description='includes all the parameters for all nodes of the pipeline')
 
 
@@ -86,7 +86,7 @@ def apply(self, name: InputName, interpretation: Interpretation) -> Iterable[Gen
             tokens.extend(emojis)
 
         params = name.params if name.params is not None else dict()
-        suggestions_limit = max(params.get('max_names_per_related_collection', 0), self.suggestions_limit)
+        suggestions_limit = max(params.get('max_labels_per_related_collection', 0), self.suggestions_limit)
         logger.info(f'CollectionGenerator query: {tokens}')
         collections, _ = self.collection_matcher.search_for_generator(
             tuple(tokens),
 
@@ -154,7 +154,7 @@ def generate_grouped_names(
             self,
             name: str,
             max_related_collections: int = 5,
-            max_names_per_related_collection: int = 5,
+            max_labels_per_related_collection: int = 5,
             max_recursive_related_collections: int = 5,
             categories_params=None,
             min_total_suggestions: int = 50,
@@ -164,7 +164,7 @@ def generate_grouped_names(
         categories_params = categories_params or {}
 
         params['max_related_collections'] = max_related_collections
-        params['max_names_per_related_collection'] = max_names_per_related_collection
+        params['max_labels_per_related_collection'] = max_labels_per_related_collection
         params['max_recursive_related_collections'] = max_recursive_related_collections
         params['categories_params'] = categories_params
         params['min_total_suggestions'] = min_total_suggestions
@@ -201,7 +201,7 @@ def generate_grouped_names(
                     max_suggestions = category_params.max_suggestions
                 except AttributeError:  # RelatedCategoryParams
                     min_suggestions = 0
-                    max_suggestions = 3 * category_params.max_related_collections * max(category_params.max_names_per_related_collection, self.config.collections.suggestions_limit) # 3 interpretations
+                    max_suggestions = 3 * category_params.max_related_collections * max(category_params.max_labels_per_related_collection, self.config.collections.suggestions_limit) # 3 interpretations
 
                 # TODO should they use the same set of suggestions (for deduplications)
                 suggestions = meta_sampler.sample(name, 'weighted-sampling',
@@ -235,7 +235,7 @@ def is_already_sampled(suggestion: str) -> bool:
                         max_suggestions = category_params.max_suggestions
                     except AttributeError:  # RelatedCategoryParams
                         min_suggestions = 0
-                        max_suggestions = 3 * category_params.max_related_collections * max(category_params.max_names_per_related_collection, self.config.collections.suggestions_limit)
+                        max_suggestions = 3 * category_params.max_related_collections * max(category_params.max_labels_per_related_collection, self.config.collections.suggestions_limit)
 
                     futures[executor.submit(meta_sampler.sample, name, 'weighted-sampling',
                                             min_suggestions=min_suggestions, max_suggestions=max_suggestions,
@@ -265,7 +265,7 @@ def is_already_sampled(suggestion: str) -> bool:
                     collections_id2related[suggestion.collection_id] = suggestion.related_collections or []
 
                 collection_suggestions = all_related_suggestions[suggestion.collection_id]
-                if len(collection_suggestions) < max_names_per_related_collection:
+                if len(collection_suggestions) < max_labels_per_related_collection:
                     collection_suggestions.append(suggestion)
             del grouped_suggestions['related']
 
@@ -319,7 +319,7 @@ def is_already_sampled(suggestion: str) -> bool:
 
         category_params = getattr(categories_params, 'related')
         for category, related_suggestions in all_related_suggestions.items():
-            max_suggestions = category_params.max_names_per_related_collection
+            max_suggestions = category_params.max_labels_per_related_collection
             related_suggestions.data = related_suggestions.data[:max_suggestions]
 
         # cap related collections to max_related_collections
 
@@ -107,7 +107,7 @@ def write(s: str):
                 'country': 'pl'
             }})
 
-    input_names = ['fire', 'funny', 'funnyshit', 'funnyshitass', 'funnyshitshit', 'lightwalker', 'josiahadams',
+    input_labels = ['fire', 'funny', 'funnyshit', 'funnyshitass', 'funnyshitshit', 'lightwalker', 'johndoe',
                    'kwrobel', 'krzysztofwrobel', 'pikachu', 'mickey', 'adoreyoureyes', 'face', 'theman', 'goog',
                    'billycorgan', '[003fda97309fd6aa9d7753dcffa37da8bb964d0fb99eba99d0770e76fc5bac91]', 'a' * 101,
                    'dogcat', 'firepower', 'tubeyou', 'fireworks', 'hacker', 'firecar', '😊😊😊', 'anarchy',
@@ -155,7 +155,7 @@ def write(s: str):
     times = []
 
     request_times = collections.defaultdict(list)
-    for input_name in tqdm(input_names):
+    for input_name in tqdm(input_labels):
         write(f'<h1>{input_name}</h1>')
 
         write(f'<section>')
@@ -282,11 +282,11 @@ def write(s: str):
 
     write(f'<h1>Mean share</h1>')
     for generator_name, values in sorted(stats.items(), key=lambda x: sum(x[1]), reverse=True):
-        write(f'<p>{(100 * sum(values) / len(input_names)):.2f}% {generator_name}</p>')
+        write(f'<p>{(100 * sum(values) / len(input_labels)):.2f}% {generator_name}</p>')
 
     write(f'<h1>MRR</h1>')
     for generator_name, values in sorted(mrr.items(), key=lambda x: sum(x[1]), reverse=True):
-        write(f'<p>{(sum(values) / len(input_names)):.2f} {generator_name}</p>')
+        write(f'<p>{(sum(values) / len(input_labels)):.2f} {generator_name}</p>')
 
     write(f'<h1>First position</h1>')
     for generator_name, values in sorted(first_position.items(), key=lambda x: sum(x[1]) / len(x[1]), reverse=False):
@@ -301,7 +301,7 @@ def write(s: str):
             for i, position in enumerate(positions):
                 ap.append((i + 1) / position)
             map.append(sum(ap) / len(ap))
-        maps.append((sum(map) / len(input_names), generator_name))
+        maps.append((sum(map) / len(input_labels), generator_name))
 
     for map, generator_name in sorted(maps, reverse=True):
         write(f'<p>{map:.2f} {generator_name}</p>')
 
@@ -1 +1 @@
-python search.py $1 --host $ES_HOST --port $ES_PORT --username $ES_USERNAME --password $ES_PASSWORD "apple" "apples" "bmw" "hulk" "marvel" "marvel characters" "fruit" "fruits" "britney spears" "bmw car models" "cars" "football players" "cristiano ronaldo" "planets" "countries" "france" "switzerland" "bmw vehicles" "greek gods" "zeus" "athena" "fire" "funny" "funny shit" "funny shit ass" "funny shit shit" "light walker" "josiah adams" "k wrobel" "krzysztof wrobel" "pikachu" "mickey" "adore your eyes" "face" "the man" "goog" "billy corgan" "003 fda 97309 fd 6 a a 9 d 7753 dc ffa 37 da 8 bb 964 d 0 fb 99 eb a 99 d 0770 e 76 fc 5 bac 91" "aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaa a" "dog cat" "firepower" "tube you" "fireworks" "hacker" "fire car" "" "anarchy" "pray for ukraine" "krakow dragon" "fifty six" "" "" "asd" "bartek" "hongkong" "hongkonger" "tyler" "as df as df as df 3453212345" "nine inch nails" "krakow" "joe biden" "european union" "roger federer" "suzuki" "pirates" "doge" "eth corner" "google" "apple" "001" "stop doing fake bids its honestly lame my guy" "kfc so good" "wallet" "" "porno" "sex" "slut wife" "god" "im expensive" "htaccess" "nike" "80000" "starbucks" "ukraine" "" "sony" "kevin" "discord" "monaco" "market" "sports bet" "vol o dy myr ze lensky" "coffee" "gold" "hodl" "yeezy" "brantly" "jeezy" "vitalik" "example registration" "py me" "avalanche" "messy" "messi" "king messi" "abc" "testing" "superman" "facebook" "test" "name hash" "test b" "happy people" "muscle" "billy bob" "quo" "circle ci" "bitcoin mine" "power outage" "shooting arrow at the sky" "pink floyd" "highest mountains"
+python search.py $1 --host $ES_HOST --port $ES_PORT --username $ES_USERNAME --password $ES_PASSWORD "apple" "apples" "bmw" "hulk" "marvel" "marvel characters" "fruit" "fruits" "britney spears" "bmw car models" "cars" "football players" "cristiano ronaldo" "planets" "countries" "france" "switzerland" "bmw vehicles" "greek gods" "zeus" "athena" "fire" "funny" "funny shit" "funny shit ass" "funny shit shit" "light walker" "john doe" "k wrobel" "krzysztof wrobel" "pikachu" "mickey" "adore your eyes" "face" "the man" "goog" "billy corgan" "003 fda 97309 fd 6 a a 9 d 7753 dc ffa 37 da 8 bb 964 d 0 fb 99 eb a 99 d 0770 e 76 fc 5 bac 91" "aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaa a" "dog cat" "firepower" "tube you" "fireworks" "hacker" "fire car" "" "anarchy" "pray for ukraine" "krakow dragon" "fifty six" "" "" "asd" "bartek" "hongkong" "hongkonger" "tyler" "as df as df as df 3453212345" "nine inch nails" "krakow" "joe biden" "european union" "roger federer" "suzuki" "pirates" "doge" "eth corner" "google" "apple" "001" "stop doing fake bids its honestly lame my guy" "kfc so good" "wallet" "" "porno" "sex" "slut wife" "god" "im expensive" "htaccess" "nike" "80000" "starbucks" "ukraine" "" "sony" "kevin" "discord" "monaco" "market" "sports bet" "vol o dy myr ze lensky" "coffee" "gold" "hodl" "yeezy" "brantly" "jeezy" "vitalik" "example registration" "py me" "avalanche" "messy" "messi" "king messi" "abc" "testing" "superman" "facebook" "test" "name hash" "test b" "happy people" "muscle" "billy bob" "quo" "circle ci" "bitcoin mine" "power outage" "shooting arrow at the sky" "pink floyd" "highest mountains"
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-python search.py $1 --host $ES_HOST --port $ES_PORT --username $ES_USERNAME --password $ES_PASSWORD "apple" "apples" "bmw" "hulk" "marvel" "marvel characters" "fruit" "fruits" "britney spears" "bmw car models" "cars" "football players" "cristiano ronaldo" "planets" "countries" "france" "switzerland" "bmw vehicles" "greek gods" "zeus" "athena" "fire" "funny" "funny shit" "funny shit ass" "funny shit shit" "light walker" "josiah adams" "k wrobel" "krzysztof wrobel" "pikachu" "mickey" "adore your eyes" "face" "the man" "goog" "billy corgan" "003 fda 97309 fd 6 a a 9 d 7753 dc ffa 37 da 8 bb 964 d 0 fb 99 eb a 99 d 0770 e 76 fc 5 bac 91" "aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaa a" "dog cat" "firepower" "tube you" "fireworks" "hacker" "fire car" "" "anarchy" "pray for ukraine" "krakow dragon" "fifty six" "" "" "asd" "bartek" "hongkong" "hongkonger" "tyler" "as df as df as df 3453212345" "nine inch nails" "krakow" "joe biden" "european union" "roger federer" "suzuki" "pirates" "doge" "eth corner" "google" "apple" "001" "stop doing fake bids its honestly lame my guy" "kfc so good" "wallet" "" "porno" "sex" "slut wife" "god" "im expensive" "htaccess" "nike" "80000" "starbucks" "ukraine" "" "sony" "kevin" "discord" "monaco" "market" "sports bet" "vol o dy myr ze lensky" "coffee" "gold" "hodl" "yeezy" "brantly" "jeezy" "vitalik" "example registration" "py me" "avalanche" "messy" "messi" "king messi" "abc" "testing" "superman" "facebook" "test" "name hash" "test b" "happy people" "muscle" "billy bob" "quo" "circle ci" "bitcoin mine" "power outage" "shooting arrow at the sky" "pink floyd" "highest mountains"
	`1`	+python search.py $1 --host $ES_HOST --port $ES_PORT --username $ES_USERNAME --password $ES_PASSWORD "apple" "apples" "bmw" "hulk" "marvel" "marvel characters" "fruit" "fruits" "britney spears" "bmw car models" "cars" "football players" "cristiano ronaldo" "planets" "countries" "france" "switzerland" "bmw vehicles" "greek gods" "zeus" "athena" "fire" "funny" "funny shit" "funny shit ass" "funny shit shit" "light walker" "john doe" "k wrobel" "krzysztof wrobel" "pikachu" "mickey" "adore your eyes" "face" "the man" "goog" "billy corgan" "003 fda 97309 fd 6 a a 9 d 7753 dc ffa 37 da 8 bb 964 d 0 fb 99 eb a 99 d 0770 e 76 fc 5 bac 91" "aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaaaa aaaa a" "dog cat" "firepower" "tube you" "fireworks" "hacker" "fire car" "" "anarchy" "pray for ukraine" "krakow dragon" "fifty six" "" "" "asd" "bartek" "hongkong" "hongkonger" "tyler" "as df as df as df 3453212345" "nine inch nails" "krakow" "joe biden" "european union" "roger federer" "suzuki" "pirates" "doge" "eth corner" "google" "apple" "001" "stop doing fake bids its honestly lame my guy" "kfc so good" "wallet" "" "porno" "sex" "slut wife" "god" "im expensive" "htaccess" "nike" "80000" "starbucks" "ukraine" "" "sony" "kevin" "discord" "monaco" "market" "sports bet" "vol o dy myr ze lensky" "coffee" "gold" "hodl" "yeezy" "brantly" "jeezy" "vitalik" "example registration" "py me" "avalanche" "messy" "messi" "king messi" "abc" "testing" "superman" "facebook" "test" "name hash" "test b" "happy people" "muscle" "billy bob" "quo" "circle ci" "bitcoin mine" "power outage" "shooting arrow at the sky" "pink floyd" "highest mountains"