Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions modin/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# type: ignore

import copy
import logging
import os
import platform
import shutil
Expand Down Expand Up @@ -792,3 +793,17 @@ def clean_up_auto_backend_switching():
)
):
yield


@pytest.fixture(autouse=True)
def assert_no_root_logging(caplog):
root_logger = logging.getLogger()
# Capture logs at any level, i.e. at level >= logging.NOTSET.
with caplog.at_level(logging.NOTSET):
yield
# Note that because this code is in a fixture, we have to use
# caplog.get_records(when="call") instead of caplog.records:
# https://github.com/pytest-dev/pytest/issues/4033
assert not any(
record.name == root_logger.name for record in caplog.get_records(when="call")
)
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
all query compilers to determine the best query compiler to use.
"""

import logging
import random
from types import MappingProxyType
from typing import Any, Optional
Expand All @@ -28,6 +27,7 @@
BaseQueryCompiler,
QCCoercionCost,
)
from modin.logging import get_logger
from modin.logging.metrics import emit_metric


Expand Down Expand Up @@ -147,7 +147,7 @@ def calculate(self) -> str:
self._result_backend = k

if len(self._backend_data) > 1:
logging.info(
get_logger().info(
f"BackendCostCalculator Results: {self._calc_result_log(self._result_backend)}"
)
# Does not need to be secure, should not use system entropy
Expand Down
11 changes: 6 additions & 5 deletions modin/core/storage_formats/pandas/query_compiler_caster.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import functools
import inspect
import logging
import random
from abc import ABC, abstractmethod
from collections import defaultdict, namedtuple
Expand All @@ -42,7 +41,7 @@
BackendCostCalculator,
)
from modin.error_message import ErrorMessage
from modin.logging import disable_logging
from modin.logging import disable_logging, get_logger
from modin.logging.metrics import emit_metric
from modin.utils import sentinel

Expand Down Expand Up @@ -812,7 +811,7 @@ def _get_backend_for_auto_switch(
move_stay_delta,
)

logging.info(
get_logger().info(
f"After {class_of_wrapped_fn} function {function_name}, "
+ f"considered moving to backend {backend} with "
+ f"(transfer_cost {move_to_cost} + other_execution_cost {other_execute_cost}) "
Expand All @@ -822,10 +821,12 @@ def _get_backend_for_auto_switch(

if best_backend == starting_backend:
emit_metric(f"hybrid.auto.decision.{best_backend}.group.{metrics_group}", 0)
logging.info(f"Chose not to switch backends after operation {function_name}")
get_logger().info(
f"Chose not to switch backends after operation {function_name}"
)
else:
emit_metric(f"hybrid.auto.decision.{best_backend}.group.{metrics_group}", 1)
logging.info(f"Chose to move to backend {best_backend}")
get_logger().info(f"Chose to move to backend {best_backend}")
return best_backend


Expand Down
3 changes: 2 additions & 1 deletion modin/logging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# governing permissions and limitations under the License.

from .class_logger import ClassLogger # noqa: F401
from .config import get_logger # noqa: F401
from .config import DEFAULT_LOGGER_NAME, get_logger # noqa: F401
from .logger_decorator import disable_logging, enable_logging # noqa: F401
from .metrics import add_metric_handler, clear_metric_handler, emit_metric

Expand All @@ -24,4 +24,5 @@
"emit_metric",
"add_metric_handler",
"clear_metric_handler",
"DEFAULT_LOGGER_NAME",
]
4 changes: 3 additions & 1 deletion modin/logging/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
import modin
from modin.config import LogFileSize, LogMemoryInterval, LogMode

DEFAULT_LOGGER_NAME = "modin.logger.default"

__LOGGER_CONFIGURED__: bool = False


Expand Down Expand Up @@ -166,7 +168,7 @@ def configure_logging() -> None:
job_id = f"{current_timestamp}_{uuid.uuid4().hex}"

logger = _create_logger(
"modin.logger.default",
DEFAULT_LOGGER_NAME,
job_id,
"trace",
LogLevel.INFO,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import contextlib
import json
import logging
from io import StringIO
from types import MappingProxyType
from typing import Iterator
Expand Down Expand Up @@ -44,6 +45,7 @@
register_function_for_post_op_switch,
register_function_for_pre_op_switch,
)
from modin.logging import DEFAULT_LOGGER_NAME
from modin.logging.metrics import add_metric_handler, clear_metric_handler
from modin.pandas.api.extensions import register_pd_accessor
from modin.tests.pandas.utils import create_test_dfs, df_equals, eval_general
Expand Down Expand Up @@ -369,12 +371,19 @@ def test_two_same_backend(pico_df):
assert df3.get_backend() == "Pico"


def test_cast_to_second_backend_with_concat(pico_df, cluster_df):
df3 = pd.concat([pico_df, cluster_df], axis=1)
def test_cast_to_second_backend_with_concat(pico_df, cluster_df, caplog):
with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
df3 = pd.concat([pico_df, cluster_df], axis=1)
assert pico_df.get_backend() == "Pico"
assert cluster_df.get_backend() == "Cluster"
assert df3.get_backend() == "Cluster" # result should be on cluster

log_records = caplog.records
assert len(log_records) == 1
assert log_records[0].name == DEFAULT_LOGGER_NAME
assert log_records[0].levelno == logging.INFO
assert log_records[0].message.startswith("BackendCostCalculator Results: ")


def test_cast_to_second_backend_with_concat_uses_second_backend_api_override(
pico_df, cluster_df
Expand Down Expand Up @@ -693,6 +702,72 @@ def test_read_json(self):
pd.read_json(StringIO(small_json)).get_backend() == "Small_Data_Local"
)

@backend_test_context(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This testing seems like a bit of overkill but OK.

test_backend="Big_Data_Cloud",
choices=("Big_Data_Cloud", "Small_Data_Local"),
)
def test_read_json_logging_for_post_op_switch(self, caplog):
register_function_for_post_op_switch(
class_name=None, backend="Big_Data_Cloud", method="read_json"
)
with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
assert (
pd.read_json(
StringIO(
json.dumps(
{"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS - 1))}
)
)
).get_backend()
== "Small_Data_Local"
)
log_records = caplog.records
assert len(log_records) == 2

assert log_records[0].name == DEFAULT_LOGGER_NAME
assert log_records[0].levelno == logging.INFO
assert log_records[0].message.startswith(
"After None function read_json, considered moving to backend Small_Data_Local with"
)

assert log_records[1].name == DEFAULT_LOGGER_NAME
assert log_records[1].levelno == logging.INFO
assert log_records[1].message.startswith(
"Chose to move to backend Small_Data_Local"
)

@backend_test_context(
test_backend="Big_Data_Cloud",
choices=("Big_Data_Cloud", "Small_Data_Local"),
)
def test_read_json_logging_for_post_op_not_switch(self, caplog):
register_function_for_post_op_switch(
class_name=None, backend="Big_Data_Cloud", method="read_json"
)
with caplog.at_level(level=logging.INFO, logger=DEFAULT_LOGGER_NAME):
assert (
pd.read_json(
StringIO(
json.dumps({"col0": list(range(BIG_DATA_CLOUD_MIN_NUM_ROWS))})
)
).get_backend()
== "Big_Data_Cloud"
)
log_records = caplog.records
assert len(log_records) == 2

assert log_records[0].name == DEFAULT_LOGGER_NAME
assert log_records[0].levelno == logging.INFO
assert log_records[0].message.startswith(
"After None function read_json, considered moving to backend Small_Data_Local with"
)

assert log_records[1].name == DEFAULT_LOGGER_NAME
assert log_records[1].levelno == logging.INFO
assert log_records[1].message.startswith(
"Chose not to switch backends after operation read_json"
)

def test_agg(self):
with backend_test_context(
test_backend="Big_Data_Cloud",
Expand Down
Loading