Skip to content

Commit dcb132b

Browse files
authored
chore: remove f-strings from logs for performance reasons (#3212)
* Use the %s syntax on all debug messages * Use the %s syntax on some more debug messages * Use the %s syntax on info messages * Use the %s syntax on warning messages * Use the %s syntax on error and exception messages * mypy * pylint * trogger tutorials execution in CI * trigger tutorials execution on CI * black * remove embeddings from repr * fix Document `__repr__` * address feedback * mypy
1 parent 8fbccbd commit dcb132b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+235
-209
lines changed

haystack/document_stores/deepsetcloud.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def wrapper(self, *args, **kwargs):
2929

3030
args_as_kwargs = args_to_kwargs(args, func)
3131
parameters = {**args_as_kwargs, **kwargs}
32-
logger.info(f"Input to {func.__name__}: {parameters}")
32+
logger.info("Input to %s: %s", func.__name__, parameters)
3333

3434
return wrapper
3535

haystack/document_stores/elasticsearch.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def _create_document_index(self, index_name: str, headers: Optional[Dict[str, st
200200
"""
201201
# Check if index_name refers to an alias
202202
if self.client.indices.exists_alias(name=index_name):
203-
logger.debug(f"Index name {index_name} is an alias.")
203+
logger.debug("Index name %s is an alias.", index_name)
204204

205205
# check if the existing index has the embedding field; if not create it
206206
if self.client.indices.exists(index=index_name, headers=headers):
@@ -949,7 +949,7 @@ def query(
949949
all_terms_must_match=all_terms_must_match,
950950
)
951951

952-
logger.debug(f"Retriever query: {body}")
952+
logger.debug("Retriever query: %s", body)
953953
result = self.client.search(index=index, body=body, headers=headers)["hits"]["hits"]
954954

955955
documents = [
@@ -1086,7 +1086,7 @@ def query_batch(
10861086
body.append(headers)
10871087
body.append(cur_query_body)
10881088

1089-
logger.debug(f"Retriever query: {body}")
1089+
logger.debug("Retriever query: %s", body)
10901090
responses = self.client.msearch(index=index, body=body)
10911091

10921092
all_documents = []
@@ -1287,7 +1287,7 @@ def query_by_embedding(
12871287
if excluded_meta_data:
12881288
body["_source"] = {"excludes": excluded_meta_data}
12891289

1290-
logger.debug(f"Retriever query: {body}")
1290+
logger.debug("Retriever query: %s", body)
12911291
try:
12921292
result = self.client.search(index=index, body=body, request_timeout=300, headers=headers)["hits"]["hits"]
12931293
if len(result) == 0:
@@ -1458,12 +1458,16 @@ def update_embeddings(
14581458

14591459
if update_existing_embeddings:
14601460
document_count = self.get_document_count(index=index, headers=headers)
1461-
logger.info(f"Updating embeddings for all {document_count} docs ...")
14621461
else:
14631462
document_count = self.get_document_count(
14641463
index=index, filters=filters, only_documents_without_embedding=True, headers=headers
14651464
)
1466-
logger.info(f"Updating embeddings for {document_count} docs without embeddings ...")
1465+
1466+
logger.info(
1467+
"Updating embeddings for all %s docs %s...",
1468+
document_count,
1469+
"without embeddings" if not update_existing_embeddings else "",
1470+
)
14671471

14681472
result = self._get_all_documents_in_index(
14691473
index=index,
@@ -1674,7 +1678,7 @@ def delete_index(self, index: str):
16741678
def _delete_index(self, index: str):
16751679
if self.client.indices.exists(index):
16761680
self.client.indices.delete(index=index, ignore=[400, 404])
1677-
logger.info(f"Index '{index}' deleted.")
1681+
logger.info("Index '%s' deleted.", index)
16781682

16791683

16801684
class ElasticsearchDocumentStore(BaseElasticsearchDocumentStore):

haystack/document_stores/faiss.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def update_embeddings(
346346
logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
347347
return
348348

349-
logger.info(f"Updating embeddings for {document_count} docs...")
349+
logger.info("Updating embeddings for %s docs...", document_count)
350350
vector_id = sum(index.ntotal for index in self.faiss_indexes.values())
351351

352352
result = self._query(
@@ -568,7 +568,7 @@ def delete_index(self, index: str):
568568
)
569569
if index in self.faiss_indexes:
570570
del self.faiss_indexes[index]
571-
logger.info(f"Index '{index}' deleted.")
571+
logger.info("Index '%s' deleted.", index)
572572
super().delete_index(index)
573573

574574
def query_by_embedding(

haystack/document_stores/memory.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,10 @@ def update_embeddings(
451451
result = self._query(
452452
index=index, filters=filters, only_documents_without_embedding=not update_existing_embeddings
453453
)
454-
document_count = len(result)
455-
logger.info(f"Updating embeddings for {document_count} docs ...")
454+
logger.info("Updating embeddings for %s docs ...", len(result) if logger.level > logging.DEBUG else 0)
456455
batched_documents = get_batches_from_generator(result, batch_size)
457456
with tqdm(
458-
total=document_count, disable=not self.progress_bar, position=0, unit=" docs", desc="Updating Embedding"
457+
total=len(result), disable=not self.progress_bar, position=0, unit=" docs", desc="Updating Embedding"
459458
) as progress_bar:
460459
for document_batch in batched_documents:
461460
embeddings = retriever.embed_documents(document_batch) # type: ignore
@@ -782,7 +781,7 @@ def delete_index(self, index: str):
782781
"""
783782
if index in self.indexes:
784783
del self.indexes[index]
785-
logger.info(f"Index '{index}' deleted.")
784+
logger.info("Index '%s' deleted.", index)
786785

787786
def delete_labels(
788787
self,

haystack/document_stores/memory_knowledgegraph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def create_index(self, index: Optional[str] = None):
3737
if index not in self.indexes:
3838
self.indexes[index] = Graph()
3939
else:
40-
logger.warning(f"Index '{index}' is already present.")
40+
logger.warning("Index '%s' is already present.", index)
4141

4242
def delete_index(self, index: Optional[str] = None):
4343
"""
@@ -49,7 +49,7 @@ def delete_index(self, index: Optional[str] = None):
4949

5050
if index in self.indexes:
5151
del self.indexes[index]
52-
logger.info(f"Index '{index}' deleted.")
52+
logger.info("Index '%s' deleted.", index)
5353

5454
def import_from_ttl_file(self, path: Path, index: Optional[str] = None):
5555
"""

haystack/document_stores/milvus1.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -313,20 +313,21 @@ def update_embeddings(
313313
index = index or self.index
314314
self._create_collection_and_index_if_not_exist(index)
315315

316-
document_count = self.get_document_count(index=index)
317-
if document_count == 0:
318-
logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
319-
return
320-
321-
logger.info(f"Updating embeddings for {document_count} docs...")
322-
323316
result = self._query(
324317
index=index,
325318
vector_ids=None,
326319
batch_size=batch_size,
327320
filters=filters,
328321
only_documents_without_embedding=not update_existing_embeddings,
329322
)
323+
324+
document_count = len(result)
325+
if document_count == 0:
326+
logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
327+
return
328+
329+
logger.info("Updating embeddings for %s docs...", document_count)
330+
330331
batched_documents = get_batches_from_generator(result, batch_size)
331332
with tqdm(
332333
total=document_count, disable=not self.progress_bar, position=0, unit=" docs", desc="Updating Embedding"
@@ -665,10 +666,10 @@ def get_all_vectors(self, index: Optional[str] = None) -> List[np.ndarray]:
665666
index = index or self.index
666667
status, collection_info = self.milvus_server.get_collection_stats(collection_name=index)
667668
if not status.OK():
668-
logger.info(f"Failed fetch stats from store ...")
669+
logger.info("Failed fetch stats from store ...")
669670
return list()
670671

671-
logger.debug(f"collection_info = {collection_info}")
672+
logger.debug("collection_info = %s", collection_info)
672673

673674
ids = list()
674675
partition_list = collection_info["partitions"]
@@ -679,16 +680,16 @@ def get_all_vectors(self, index: Optional[str] = None) -> List[np.ndarray]:
679680
status, id_list = self.milvus_server.list_id_in_segment(
680681
collection_name=index, segment_name=segment_name
681682
)
682-
logger.debug(f"{status}: segment {segment_name} has {len(id_list)} vectors ...")
683+
logger.debug("%s: segment %s has %s vectors ...", status, segment_name, len(id_list))
683684
ids.extend(id_list)
684685

685686
if len(ids) == 0:
686-
logger.info(f"No documents in the store ...")
687+
logger.info("No documents in the store ...")
687688
return list()
688689

689690
status, vectors = self.milvus_server.get_entity_by_id(collection_name=index, ids=ids)
690691
if not status.OK():
691-
logger.info(f"Failed fetch document for ids {ids} from store ...")
692+
logger.info("Failed fetch document for ids %s from store ...", ids)
692693
return list()
693694

694695
return vectors

haystack/document_stores/milvus2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def _create_collection_and_index(
204204

205205
for field in custom_fields:
206206
if field.name == self.id_field or field.name == self.embedding_field:
207-
logger.warning(f"Skipping `{field.name}` as it is similar to `id_field` or `embedding_field`")
207+
logger.warning("Skipping '%s' as it is similar to 'id_field' or 'embedding_field'", field.name)
208208
else:
209209
fields.append(field)
210210

@@ -353,7 +353,7 @@ def update_embeddings(
353353
logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
354354
return
355355

356-
logger.info(f"Updating embeddings for {document_count} docs...")
356+
logger.info("Updating embeddings for %s docs...", document_count)
357357

358358
result = self._query(
359359
index=index,
@@ -516,7 +516,7 @@ def delete_index(self, index: str):
516516
def _delete_index(self, index: str):
517517
if utility.has_collection(collection_name=index):
518518
utility.drop_collection(collection_name=index)
519-
logger.info(f"Index '{index}' deleted.")
519+
logger.info("Index '%s' deleted.", index)
520520
super().delete_index(index)
521521

522522
def get_all_documents_generator(

haystack/document_stores/opensearch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ def query_by_embedding(
379379
if excluded_meta_data:
380380
body["_source"] = {"excludes": excluded_meta_data}
381381

382-
logger.debug(f"Retriever query: {body}")
382+
logger.debug("Retriever query: %s", body)
383383
result = self.client.search(index=index, body=body, request_timeout=300, headers=headers)["hits"]["hits"]
384384

385385
documents = [
@@ -396,7 +396,7 @@ def _create_document_index(self, index_name: str, headers: Optional[Dict[str, st
396396
"""
397397
# Check if index_name refers to an alias
398398
if self.client.indices.exists_alias(name=index_name):
399-
logger.debug(f"Index name {index_name} is an alias.")
399+
logger.debug("Index name %s is an alias.", index_name)
400400

401401
# check if the existing index has the embedding field; if not create it
402402
if self.client.indices.exists(index=index_name, headers=headers):

haystack/document_stores/pinecone.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def _create_index(
216216
stats = index_connection.describe_index_stats()
217217
dims = stats["dimension"]
218218
count = stats["namespaces"][""]["vector_count"] if stats["namespaces"].get("") else 0
219-
logger.info(f"Index statistics: name: {index}, embedding dimensions: {dims}, record count: {count}")
219+
logger.info("Index statistics: name: %s embedding dimensions: %s, record count: %s", index, dims, count)
220220
# return index connection
221221
return index_connection
222222

@@ -471,7 +471,7 @@ def update_embeddings(
471471
logger.warning("Calling DocumentStore.update_embeddings() on an empty index")
472472
return
473473

474-
logger.info(f"Updating embeddings for {document_count} docs...")
474+
logger.info("Updating embeddings for %s docs...", document_count)
475475

476476
# If the embedding namespace is empty or the user does not want to update existing embeddings, we use document namespace
477477
if self.get_embedding_count(index=index) == 0 or not update_existing_embeddings:
@@ -1003,7 +1003,7 @@ def delete_index(self, index: str):
10031003
index = self._index_name(index)
10041004
if index in pinecone.list_indexes():
10051005
pinecone.delete_index(index)
1006-
logger.info(f"Index '{index}' deleted.")
1006+
logger.info("Index '%s' deleted.", index)
10071007
if index in self.pinecone_indexes:
10081008
del self.pinecone_indexes[index]
10091009
if index in self.all_ids:

haystack/document_stores/sql.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ def write_documents(
402402
try:
403403
meta_orms.append(MetaDocumentORM(name=key, value=value))
404404
except TypeError as ex:
405-
logger.error(f"Document {doc.id} - {ex}")
405+
logger.error("Document %s - %s", doc.id, ex)
406406
doc_mapping = {
407407
"id": doc.id,
408408
"content": doc.to_dict()["content"],
@@ -425,7 +425,7 @@ def write_documents(
425425
try:
426426
self.session.commit()
427427
except Exception as ex:
428-
logger.error(f"Transaction rollback: {ex.__cause__}")
428+
logger.error("Transaction rollback: %s", ex.__cause__)
429429
# Rollback is important here otherwise self.session will be in inconsistent state and next call will fail
430430
self.session.rollback()
431431
raise ex
@@ -495,7 +495,7 @@ def update_vector_ids(self, vector_id_map: Dict[str, str], index: Optional[str]
495495
try:
496496
self.session.commit()
497497
except Exception as ex:
498-
logger.error(f"Transaction rollback: {ex.__cause__}")
498+
logger.error("Transaction rollback: %s", ex.__cause__)
499499
self.session.rollback()
500500
raise ex
501501

0 commit comments

Comments
 (0)