Skip to content

Commit 13c4ff1

Browse files
authored
refactor: remove direct logging without a logger (#4253)
* remove direct logging without a logger * add custom pylint checker * add test * pylint * improve checker message * mypy * remove test * add checker for basicConfig * more logging missed * ignore basicConfig * move out logger * move out statement * remove logging configuration
1 parent 4b189c0 commit 13c4ff1

File tree

19 files changed

+134
-41
lines changed

19 files changed

+134
-41
lines changed

haystack/document_stores/deepsetcloud.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def get_all_documents(
196196
:param batch_size: Number of documents that are passed to bulk function at a time.
197197
:param headers: Custom HTTP headers to pass to document store client if supported (e.g. {'Authorization': 'Basic YWRtaW46cm9vdA=='} for basic authentication)
198198
"""
199-
logging.warning(
199+
logger.warning(
200200
"`get_all_documents()` can get very slow and resource-heavy since all documents must be loaded from deepset Cloud. "
201201
"Consider using `get_all_documents_generator()` instead."
202202
)

haystack/document_stores/weaviate.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def get_document_by_id(
337337
try:
338338
result = self.weaviate_client.data_object.get_by_id(id, class_name=index, with_vector=True)
339339
except weaviate.exceptions.UnexpectedStatusCodeException as usce:
340-
logging.debug("Weaviate could not get the document requested: %s", usce)
340+
logger.debug("Weaviate could not get the document requested: %s", usce)
341341
if result:
342342
document = self._convert_weaviate_result_to_document(result, return_embedding=True)
343343
return document
@@ -364,7 +364,7 @@ def get_documents_by_id(
364364
try:
365365
result = self.weaviate_client.data_object.get_by_id(id, class_name=index, with_vector=True)
366366
except weaviate.exceptions.UnexpectedStatusCodeException as usce:
367-
logging.debug("Weaviate could not get the document requested: %s", usce)
367+
logger.debug("Weaviate could not get the document requested: %s", usce)
368368
if result:
369369
document = self._convert_weaviate_result_to_document(result, return_embedding=True)
370370
documents.append(document)

haystack/modeling/training/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,9 +390,9 @@ def create_or_load_checkpoint(
390390
trainer = cls._load_checkpoint(
391391
path=checkpoint_to_load, data_silo=data_silo, model=model, optimizer=optimizer, local_rank=local_rank
392392
)
393-
logging.info("Resuming training from the train checkpoint at %s ...", checkpoint_to_load)
393+
logger.info("Resuming training from the train checkpoint at %s ...", checkpoint_to_load)
394394
else:
395-
logging.info("No train checkpoints found. Starting a new training ...")
395+
logger.info("No train checkpoints found. Starting a new training ...")
396396
trainer = cls(
397397
data_silo=data_silo,
398398
model=model,

haystack/nodes/_json_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ def load_schema():
414414
"""
415415
schema_file_path = JSON_SCHEMAS_PATH / "haystack-pipeline-main.schema.json"
416416
if not os.path.exists(schema_file_path):
417-
logging.info("Json schema not found, generating one at: %s", schema_file_path)
417+
logger.info("Json schema not found, generating one at: %s", schema_file_path)
418418
try:
419419
update_json_schema(main_only=True)
420420
except Exception as e:

haystack/nodes/audio/_text_to_speech.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,24 @@
77

88
import numpy as np
99
import torch
10+
from pydub import AudioSegment
11+
12+
from haystack.errors import AudioNodeError
13+
from haystack.modeling.utils import initialize_device_settings
14+
15+
16+
logger = logging.getLogger(__name__)
17+
1018

1119
try:
1220
import soundfile as sf
1321
from espnet2.bin.tts_inference import Text2Speech as _Text2SpeechModel
1422

1523
except OSError as ose:
16-
logging.exception(
24+
logger.exception(
1725
"`libsndfile` not found, it's probably not installed. The node will most likely crash. "
1826
"Please install soundfile's dependencies (https://python-soundfile.readthedocs.io/en/latest/)"
1927
)
20-
from pydub import AudioSegment
21-
22-
from haystack.errors import AudioNodeError
23-
from haystack.modeling.utils import initialize_device_settings
24-
25-
logger = logging.getLogger(__name__)
2628

2729

2830
class TextToSpeech:

haystack/nodes/connector/crawler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,11 +365,11 @@ def _write_file(
365365
with open(file_path, "w", encoding="utf-8") as f:
366366
json.dump(document.to_dict(), f)
367367
else:
368-
logging.debug(
368+
logger.debug(
369369
"File '%s' already exists. Set 'overwrite_existing_files=True' to overwrite it.", file_path
370370
)
371371
except Exception:
372-
logging.exception(
372+
logger.exception(
373373
"Crawler can't save the content of '%s' under '%s'. "
374374
"This webpage will be skipped, but links from this page will still be crawled. "
375375
"Make sure the path above is accessible and the file name is valid. "

haystack/nodes/file_classifier/file_type.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,21 @@
44
import logging
55
from pathlib import Path
66

7+
from haystack.nodes.base import BaseComponent
8+
9+
10+
logger = logging.getLogger(__name__)
11+
12+
713
try:
814
import magic
915
except ImportError as ie:
10-
logging.debug(
16+
logger.debug(
1117
"Failed to import 'magic' (from 'python-magic' and 'python-magic-bin' on Windows). "
1218
"FileTypeClassifier will not perform mimetype detection on extensionless files. "
1319
"Please make sure the necessary OS libraries are installed if you need this functionality."
1420
)
1521

16-
from haystack.nodes.base import BaseComponent
17-
18-
19-
logger = logging.getLogger(__name__)
20-
2122

2223
DEFAULT_TYPES = ["txt", "pdf", "md", "docx", "html"]
2324

haystack/pipelines/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,7 @@ def eval_beir(
854854
qrels_new[query_id] = {_id: qrels[query_id][_id] for _id in document_rel_ids_intersection}
855855
qrels = qrels_new
856856
elif num_documents is not None and (num_documents < 1 or num_documents > len(corpus)):
857-
logging.warning(
857+
logger.warning(
858858
"'num_documents' variable should be lower than corpus length and have a positive value, but it's %s."
859859
" Dataset size remains unchanged.",
860860
num_documents,

haystack/pipelines/config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def validate_yaml(
175175
extras=extras,
176176
overwrite_with_env_variables=overwrite_with_env_variables,
177177
)
178-
logging.debug("'%s' contains valid Haystack pipelines.", path)
178+
logger.debug("'%s' contains valid Haystack pipelines.", path)
179179

180180

181181
def validate_config(
@@ -260,7 +260,7 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
260260
)
261261
ok_to_ignore_version = pipeline_version == "ignore" and "rc" in __version__
262262
if not ok_to_ignore_version:
263-
logging.warning(
263+
logger.warning(
264264
"This pipeline is version '%s', but you're using Haystack %s\n"
265265
"This might cause bugs and unexpected behaviors."
266266
"Please check out the release notes (https://github.com/deepset-ai/haystack/releases/latest), "
@@ -318,7 +318,7 @@ def validate_schema(pipeline_config: Dict, strict_version_check: bool = False, e
318318
f"Validation failed. {validation.message}. {error_location} " "See the stacktrace for more information."
319319
) from validation
320320

321-
logging.debug("The given configuration is valid according to the JSON schema.")
321+
logger.debug("The given configuration is valid according to the JSON schema.")
322322

323323

324324
def validate_pipeline_graph(pipeline_definition: Dict[str, Any], component_definitions: Dict[str, Any]):
@@ -332,7 +332,7 @@ def validate_pipeline_graph(pipeline_definition: Dict[str, Any], component_defin
332332
graph = _init_pipeline_graph(root_node_name=root_node_name)
333333
for node in pipeline_definition["nodes"]:
334334
graph = _add_node_to_pipeline_graph(graph=graph, node=node, components=component_definitions)
335-
logging.debug("The graph for pipeline '%s' is valid.", pipeline_definition["name"])
335+
logger.debug("The graph for pipeline '%s' is valid.", pipeline_definition["name"])
336336

337337

338338
def _find_root_in_pipeline_definition(pipeline_definition: Dict[str, Any]):

haystack/utils/docker.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
from haystack.nodes._json_schema import load_schema
44

55

6+
logger = logging.getLogger(__name__)
7+
8+
69
def cache_nltk_model(model: str = "punkt"):
7-
logging.info("Caching %s model...", model)
10+
logger.info("Caching %s model...", model)
811
import nltk
912

1013
nltk.download(model)
@@ -30,7 +33,7 @@ def cache_models(models: Optional[List[str]] = None, use_auth_token: Optional[Un
3033
import transformers
3134

3235
for model_to_cache in models:
33-
logging.info("Caching %s", model_to_cache)
36+
logger.info("Caching %s", model_to_cache)
3437
transformers.AutoTokenizer.from_pretrained(model_to_cache, use_auth_token=use_auth_token)
3538
transformers.AutoModel.from_pretrained(model_to_cache, use_auth_token=use_auth_token)
3639

0 commit comments

Comments
 (0)