Skip to content

Commit cc38a16

Browse files
FIX-#7595: Log backend switching information with the modin logger. (#7597)
Resolves #7595 Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
1 parent 180f51f commit cc38a16

File tree

6 files changed

+105
-11
lines changed

6 files changed

+105
-11
lines changed

modin/conftest.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# type: ignore
1616

1717
import copy
18+
import logging
1819
import os
1920
import platform
2021
import shutil
@@ -792,3 +793,17 @@ def clean_up_auto_backend_switching():
792793
)
793794
):
794795
yield
796+
797+
798+
@pytest.fixture(autouse=True)
799+
def assert_no_root_logging(caplog):
800+
root_logger = logging.getLogger()
801+
# Capture logs at any level, i.e. at level >= logging.NOTSET.
802+
with caplog.at_level(logging.NOTSET):
803+
yield
804+
# Note that because this code is in a fixture, we have to use
805+
# caplog.get_records(when="call") instead of caplog.records:
806+
# https://github.com/pytest-dev/pytest/issues/4033
807+
assert not any(
808+
record.name == root_logger.name for record in caplog.get_records(when="call")
809+
)

modin/core/storage_formats/base/query_compiler_calculator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
all query compilers to determine the best query compiler to use.
2020
"""
2121

22-
import logging
2322
import random
2423
from types import MappingProxyType
2524
from typing import Any, Optional
@@ -28,6 +27,7 @@
2827
BaseQueryCompiler,
2928
QCCoercionCost,
3029
)
30+
from modin.logging import get_logger
3131
from modin.logging.metrics import emit_metric
3232

3333

@@ -147,7 +147,7 @@ def calculate(self) -> str:
147147
self._result_backend = k
148148

149149
if len(self._backend_data) > 1:
150-
logging.info(
150+
get_logger().info(
151151
f"BackendCostCalculator Results: {self._calc_result_log(self._result_backend)}"
152152
)
153153
# Does not need to be secure, should not use system entropy

modin/core/storage_formats/pandas/query_compiler_caster.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import functools
2323
import inspect
24-
import logging
2524
import random
2625
from abc import ABC, abstractmethod
2726
from collections import defaultdict, namedtuple
@@ -42,7 +41,7 @@
4241
BackendCostCalculator,
4342
)
4443
from modin.error_message import ErrorMessage
45-
from modin.logging import disable_logging
44+
from modin.logging import disable_logging, get_logger
4645
from modin.logging.metrics import emit_metric
4746
from modin.utils import sentinel
4847

@@ -812,7 +811,7 @@ def _get_backend_for_auto_switch(
812811
move_stay_delta,
813812
)
814813

815-
logging.info(
814+
get_logger().info(
816815
f"After {class_of_wrapped_fn} function {function_name}, "
817816
+ f"considered moving to backend {backend} with "
818817
+ f"(transfer_cost {move_to_cost} + other_execution_cost {other_execute_cost}) "
@@ -822,10 +821,12 @@ def _get_backend_for_auto_switch(
822821

823822
if best_backend == starting_backend:
824823
emit_metric(f"hybrid.auto.decision.{best_backend}.group.{metrics_group}", 0)
825-
logging.info(f"Chose not to switch backends after operation {function_name}")
824+
get_logger().info(
825+
f"Chose not to switch backends after operation {function_name}"
826+
)
826827
else:
827828
emit_metric(f"hybrid.auto.decision.{best_backend}.group.{metrics_group}", 1)
828-
logging.info(f"Chose to move to backend {best_backend}")
829+
get_logger().info(f"Chose to move to backend {best_backend}")
829830
return best_backend
830831

831832

modin/logging/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# governing permissions and limitations under the License.
1313

1414
from .class_logger import ClassLogger # noqa: F401
15-
from .config import get_logger # noqa: F401
15+
from .config import DEFAULT_LOGGER_NAME, get_logger # noqa: F401
1616
from .logger_decorator import disable_logging, enable_logging # noqa: F401
1717
from .metrics import add_metric_handler, clear_metric_handler, emit_metric
1818

@@ -24,4 +24,5 @@
2424
"emit_metric",
2525
"add_metric_handler",
2626
"clear_metric_handler",
27+
"DEFAULT_LOGGER_NAME",
2728
]

modin/logging/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
import modin
3535
from modin.config import LogFileSize, LogMemoryInterval, LogMode
3636

37+
DEFAULT_LOGGER_NAME = "modin.logger.default"
38+
3739
__LOGGER_CONFIGURED__: bool = False
3840

3941

@@ -166,7 +168,7 @@ def configure_logging() -> None:
166168
job_id = f"{current_timestamp}_{uuid.uuid4().hex}"
167169

168170
logger = _create_logger(
169-
"modin.logger.default",
171+
DEFAULT_LOGGER_NAME,
170172
job_id,
171173
"trace",
172174
LogLevel.INFO,

modin/tests/pandas/native_df_interoperability/test_compiler_caster.py

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import contextlib
1515
import json
16+
import logging
1617
from io import StringIO
1718
from types import MappingProxyType
1819
from typing import Iterator
@@ -44,6 +45,7 @@
4445
register_function_for_post_op_switch,
4546
register_function_for_pre_op_switch,
4647
)
48+
from modin.logging import DEFAULT_LOGGER_NAME
4749
from modin.logging.metrics import add_metric_handler, clear_metric_handler
4850
from modin.pandas.api.extensions import register_pd_accessor
4951
from modin.tests.pandas.utils import create_test_dfs, df_equals, eval_general
@@ -369,12 +371,19 @@ def test_two_same_backend(pico_df):
369371
assert df3.get_backend() == "Pico"
370372

371373

372-
def test_cast_to_second_backend_with_concat(pico_df, cluster_df):
373-
df3 = pd.concat([pico_df, cluster_df], axis=1)
374+
def test_cast_to_second_backend_with_concat(pico_df, cluster_df, caplog):
375+
with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
376+
df3 = pd.concat([pico_df, cluster_df], axis=1)
374377
assert pico_df.get_backend() == "Pico"
375378
assert cluster_df.get_backend() == "Cluster"
376379
assert df3.get_backend() == "Cluster" # result should be on cluster
377380

381+
log_records = caplog.records
382+
assert len(log_records) == 1
383+
assert log_records[0].name == DEFAULT_LOGGER_NAME
384+
assert log_records[0].levelno == logging.INFO
385+
assert log_records[0].message.startswith("BackendCostCalculator Results: ")
386+
378387

379388
def test_cast_to_second_backend_with_concat_uses_second_backend_api_override(
380389
pico_df, cluster_df
@@ -693,6 +702,72 @@ def test_read_json(self):
693702
pd.read_json(StringIO(small_json)).get_backend() == "Small_Data_Local"
694703
)
695704

705+
@backend_test_context(
706+
test_backend="Big_Data_Cloud",
707+
choices=("Big_Data_Cloud", "Small_Data_Local"),
708+
)
709+
def test_read_json_logging_for_post_op_switch(self, caplog):
710+
register_function_for_post_op_switch(
711+
class_name=None, backend="Big_Data_Cloud", method="read_json"
712+
)
713+
with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
714+
assert (
715+
pd.read_json(
716+
StringIO(
717+
json.dumps(
718+
{"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}
719+
)
720+
)
721+
).get_backend()
722+
== "Small_Data_Local"
723+
)
724+
log_records = caplog.records
725+
assert len(log_records) == 2
726+
727+
assert log_records[0].name == DEFAULT_LOGGER_NAME
728+
assert log_records[0].levelno == logging.INFO
729+
assert log_records[0].message.startswith(
730+
"After None function read_json, considered moving to backend Small_Data_Local with"
731+
)
732+
733+
assert log_records[1].name == DEFAULT_LOGGER_NAME
734+
assert log_records[1].levelno == logging.INFO
735+
assert log_records[1].message.startswith(
736+
"Chose to move to backend Small_Data_Local"
737+
)
738+
739+
@backend_test_context(
740+
test_backend="Big_Data_Cloud",
741+
choices=("Big_Data_Cloud", "Small_Data_Local"),
742+
)
743+
def test_read_json_logging_for_post_op_not_switch(self, caplog):
744+
register_function_for_post_op_switch(
745+
class_name=None, backend="Big_Data_Cloud", method="read_json"
746+
)
747+
with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
748+
assert (
749+
pd.read_json(
750+
StringIO(
751+
json.dumps({"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS))})
752+
)
753+
).get_backend()
754+
== "Big_Data_Cloud"
755+
)
756+
log_records = caplog.records
757+
assert len(log_records) == 2
758+
759+
assert log_records[0].name == DEFAULT_LOGGER_NAME
760+
assert log_records[0].levelno == logging.INFO
761+
assert log_records[0].message.startswith(
762+
"After None function read_json, considered moving to backend Small_Data_Local with"
763+
)
764+
765+
assert log_records[1].name == DEFAULT_LOGGER_NAME
766+
assert log_records[1].levelno == logging.INFO
767+
assert log_records[1].message.startswith(
768+
"Chose not to switch backends after operation read_json"
769+
)
770+
696771
def test_agg(self):
697772
with backend_test_context(
698773
test_backend="Big_Data_Cloud",

0 commit comments

Comments
 (0)