fixing code

andreluiz1987 · andreluiz1987 · commit 64541a8adb93 · 2024-11-04T13:41:54.000-03:00
diff --git a/supporting-blog-content/building-a-recipe-search-with-elasticsearch/elasticsearch_connection.py b/supporting-blog-content/building-a-recipe-search-with-elasticsearch/elasticsearch_connection.py
@@ -5,21 +5,21 @@
 class ElasticsearchConnection:
 
     def __init__(self, config_file="config.yml"):
-        with open(config_file, 'r') as f:
+        with open(config_file, "r") as f:
             config = yaml.safe_load(f)
             self.client = Elasticsearch(
-                cloud_id=config['cloud_id'],
-                api_key=config['api_key']
+                cloud_id=config["cloud_id"],
+                api_key=config["api_key"]
             )
 
     def get_client(self):
         return self.client
 
     def get_async_client(self):
-        with open("config.yml", 'r') as f:
+        with open("config.yml", "r") as f:
             config = yaml.safe_load(f)
             self.client = AsyncElasticsearch(
-                cloud_id=config['cloud_id'],
-                api_key=config['api_key'],
+                cloud_id=config["cloud_id"],
+                api_key=config["api_key"],
                 request_timeout=240)
         return self.client;
diff --git a/supporting-blog-content/building-a-recipe-search-with-elasticsearch/infra.py b/supporting-blog-content/building-a-recipe-search-with-elasticsearch/infra.py
@@ -8,28 +8,13 @@ def create_index_embedding():
         index="grocery-catalog-elser",
         mappings={
             "properties": {
-                "id": {
-                    "type": "integer"
-                },
-                "name": {
-                    "type": "text",
-                },
-                "description": {
-                    "type": "text",
-                    "copy_to": "description_embedding"
-                },
-                "category": {
-                    "type": "keyword"
-                },
-                "brand": {
-                    "type": "keyword"
-                },
-                "price": {
-                    "type": "float"
-                },
-                "unit": {
-                    "type": "keyword"
-                },
+                "id": {"type": "integer"},
+                "name": {"type": "text"},
+                "description": {"type": "text", "copy_to": "description_embedding"},
+                "category": {"type": "keyword"},
+                "brand": {"type": "keyword"},
+                "price": {"type": "float"},
+                "unit": {"type": "keyword"},
                 "description_embedding": {
                     "type": "semantic_text",
                     "inference_id": "elser_embeddings"
@@ -43,18 +28,16 @@ def create_index_embedding():
 def create_inference():
     response = client.inference.put(
         inference_id="elser_embeddings",
-        task_type="sparse_embedding", body={
+        task_type="sparse_embedding",
+        body={
             "service": "elser",
-            "service_settings": {
-                "num_allocations": 1,
-                "num_threads": 1
-            }
+            "service_settings": {"num_allocations": 1, "num_threads": 1}
         })
     print(response)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     create_inference()
 
-    create_index_embedding()
+    create_index_embedding()
diff --git a/supporting-blog-content/building-a-recipe-search-with-elasticsearch/infra_lexical_index.py b/supporting-blog-content/building-a-recipe-search-with-elasticsearch/infra_lexical_index.py
@@ -8,32 +8,18 @@ def create_index():
         index="grocery-catalog",
         mappings={
             "properties": {
-                "id": {
-                    "type": "integer"
-                },
-                "name": {
-                    "type": "text",
-                },
-                "description": {
-                    "type": "text"
-                },
-                "category": {
-                    "type": "keyword"
-                },
-                "brand": {
-                    "type": "keyword"
-                },
-                "price": {
-                    "type": "float"
-                },
-                "unit": {
-                    "type": "keyword"
-                }
+                "id": {"type": "integer"},
+                "name": {"type": "text"},
+                "description": {"type": "text", "copy_to": "description_embedding"},
+                "category": {"type": "keyword"},
+                "brand": {"type": "keyword"},
+                "price": {"type": "float"},
+                "unit": {"type": "keyword"}
             }
         }
     )
     print(response)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     create_index()
diff --git a/supporting-blog-content/building-a-recipe-search-with-elasticsearch/ingestion.py b/supporting-blog-content/building-a-recipe-search-with-elasticsearch/ingestion.py
@@ -14,7 +14,7 @@ def partition_list(lst, chunk_size):
 
 async def index_data():
     global partitions
-    with open('files/output.json', 'r') as file:
+    with open("files/output.json", "r") as file:
         data_json = json.load(file)
     documents = []
     for doc in data_json:
diff --git a/supporting-blog-content/building-a-recipe-search-with-elasticsearch/ingestion_lexical_index.py b/supporting-blog-content/building-a-recipe-search-with-elasticsearch/ingestion_lexical_index.py
@@ -14,7 +14,7 @@ def partition_list(lst, chunk_size):
 
 async def index_data():
     global partitions
-    with open('files/output.json', 'r') as file:
+    with open("files/output.json", "r") as file:
         data_json = json.load(file)
     documents = []
     for doc in data_json:
diff --git a/supporting-blog-content/building-a-recipe-search-with-elasticsearch/search.py b/supporting-blog-content/building-a-recipe-search-with-elasticsearch/search.py
@@ -13,7 +13,7 @@ def format_text(description, line_length=120):
     if len(words) <= line_length:
         return description
     else:
-        return ' '.join(words[:line_length]) + '...'
+        return " ".join(words[:line_length]) + "..."
 
 
 def search_semantic(term):
@@ -36,9 +36,9 @@ def search_semantic(term):
         description = hit["_source"]["description"]
         formatted_description = format_text(description)
         result.append({
-            'score': score,
-            'name': name,
-            'description': formatted_description,
+            "score": score,
+            "name": name,
+            "description": formatted_description,
         })
     return result
 
@@ -64,33 +64,33 @@ def search_lexical(term):
         name = format_text(hit["_source"]["name"], line_length=10)
         description = hit["_source"]["description"]
         result.append({
-            'score': score,
-            'name': name,
-            'description': description,
+            "score": score,
+            "name": name,
+            "description": description,
         })
     return result
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     rs1 = search_semantic(term)
     rs2 = search_lexical(term)
 
-    df1 = pd.DataFrame(rs1)[['name', 'score']] if rs1 else pd.DataFrame(columns=['name', 'score'])
-    df2 = pd.DataFrame(rs2)[['name', 'score']] if rs2 else pd.DataFrame(columns=['name', 'score'])
-    df1 = pd.DataFrame(rs1)[['name', 'score']] if rs1 else pd.DataFrame(columns=['name', 'score'])
-    df1['Search Type'] = 'Semantic'
+    df1 = pd.DataFrame(rs1)[["name", "score"]] if rs1 else pd.DataFrame(columns=["name", "score"])
+    df2 = pd.DataFrame(rs2)[["name", "score"]] if rs2 else pd.DataFrame(columns=["name", "score"])
+    df1 = pd.DataFrame(rs1)[["name", "score"]] if rs1 else pd.DataFrame(columns=["name", "score"])
+    df1["Search Type"] = "Semantic"
 
-    df2 = pd.DataFrame(rs2)[['name', 'score']] if rs2 else pd.DataFrame(columns(['name', 'score']))
-    df2['Search Type'] = 'Lexical'
+    df2 = pd.DataFrame(rs2)[["name", "score"]] if rs2 else pd.DataFrame(columns(["name", "score"]))
+    df2["Search Type"] = "Lexical"
 
     tabela = pd.concat([df1, df2], axis=0).reset_index(drop=True)
 
-    tabela = tabela[['Search Type', 'name', 'score']]
+    tabela = tabela[["Search Type", "name", "score"]]
 
-    tabela.columns = ['Search Type', 'Name', 'Score']
+    tabela.columns = ["Search Type", "Name", "Score"]
 
-    tabela['Search Type'] = tabela['Search Type'].astype(str).str.ljust(0)
-    tabela['Name'] = tabela['Name'].astype(str).str.ljust(15)
-    tabela['Score'] = tabela['Score'].astype(str).str.ljust(5)
+    tabela["Search Type"] = tabela["Search Type"].astype(str).str.ljust(0)
+    tabela["Name"] = tabela["Name"].astype(str).str.ljust(15)
+    tabela["Score"] = tabela["Score"].astype(str).str.ljust(5)
 
     print(tabela.to_string(index=False))