Skip to content

Commit 5585f7a

Browse files
Genesis929tswast
andauthored
chore: replace api_name with updated method_logger (#1660)
* chore: replace api_name with updated method_logger * fix * not supported error fix * fix for static methods * update unimplemented tracking to include pandas.xxx * update comments * Update bigframes/core/blocks.py * add escaping * add log name override * fix test --------- Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent 0e1a2c6 commit 5585f7a

File tree

15 files changed

+191
-188
lines changed

15 files changed

+191
-188
lines changed

bigframes/core/blocks.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2644,9 +2644,8 @@ def _get_rows_as_json_values(self) -> Block:
26442644
SELECT {select_columns_csv} FROM T1
26452645
"""
26462646
# The only ways this code is used is through df.apply(axis=1) cope path
2647-
# TODO: Stop using internal API
26482647
destination, query_job = self.session._loader._query_to_destination(
2649-
json_sql, cluster_candidates=[ordering_column_name], api_name="apply"
2648+
json_sql, cluster_candidates=[ordering_column_name]
26502649
)
26512650
if not destination:
26522651
raise ValueError(f"Query job {query_job} did not produce result table")

bigframes/core/log_adapter.py

Lines changed: 77 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import functools
1616
import inspect
1717
import threading
18-
from typing import List
18+
from typing import List, Optional
1919

2020
from google.cloud import bigquery
2121
import pandas
@@ -28,6 +28,7 @@
2828
MAX_LABELS_COUNT = 64 - 8
2929
PANDAS_API_TRACKING_TASK = "pandas_api_tracking"
3030
PANDAS_PARAM_TRACKING_TASK = "pandas_param_tracking"
31+
LOG_OVERRIDE_NAME = "__log_override_name__"
3132

3233
_api_methods: List = []
3334
_excluded_methods = ["__setattr__", "__getattr__"]
@@ -37,8 +38,8 @@
3738

3839

3940
def submit_pandas_labels(
40-
bq_client: bigquery.Client,
41-
class_name: str,
41+
bq_client: Optional[bigquery.Client],
42+
base_name: str,
4243
method_name: str,
4344
args=(),
4445
kwargs={},
@@ -54,7 +55,7 @@ def submit_pandas_labels(
5455
5556
Args:
5657
bq_client (bigquery.Client): The client used to interact with BigQuery.
57-
class_name (str): The name of the pandas class being used.
58+
base_name (str): The name of the pandas class/module being used.
5859
method_name (str): The name of the method being invoked.
5960
args (tuple): The positional arguments passed to the method.
6061
kwargs (dict): The keyword arguments passed to the method.
@@ -63,25 +64,29 @@ def submit_pandas_labels(
6364
- 'PANDAS_PARAM_TRACKING_TASK': Indicates that the unimplemented feature is a
6465
parameter of a method.
6566
"""
66-
if method_name.startswith("_") and not method_name.startswith("__"):
67+
if bq_client is None or (
68+
method_name.startswith("_") and not method_name.startswith("__")
69+
):
6770
return
6871

6972
labels_dict = {
7073
"task": task,
71-
"class_name": class_name.lower(),
74+
"class_name": base_name.lower(),
7275
"method_name": method_name.lower(),
7376
"args_count": len(args),
7477
}
7578

76-
if hasattr(pandas, class_name):
77-
cls = getattr(pandas, class_name)
79+
# getattr(pandas, "pandas") returns pandas
80+
# so we can also use this for pandas.function
81+
if hasattr(pandas, base_name):
82+
base = getattr(pandas, base_name)
7883
else:
7984
return
8085

8186
# Omit __call__, because its not implemented on the actual instances of
8287
# DataFrame/Series, only as the constructor.
83-
if method_name != "__call__" and hasattr(cls, method_name):
84-
method = getattr(cls, method_name)
88+
if method_name != "__call__" and hasattr(base, method_name):
89+
method = getattr(base, method_name)
8590
else:
8691
return
8792

@@ -110,30 +115,29 @@ def submit_pandas_labels(
110115
bq_client.query(query, job_config=job_config)
111116

112117

113-
def class_logger(decorated_cls=None, /, *, include_internal_calls=False):
118+
def class_logger(decorated_cls=None):
114119
"""Decorator that adds logging functionality to each method of the class."""
115120

116121
def wrap(cls):
117122
for attr_name, attr_value in cls.__dict__.items():
118123
if callable(attr_value) and (attr_name not in _excluded_methods):
119124
if isinstance(attr_value, staticmethod):
120-
# TODO(b/390244171) support for staticmethod
121-
pass
125+
setattr(
126+
cls,
127+
attr_name,
128+
staticmethod(method_logger(attr_value)),
129+
)
122130
else:
123131
setattr(
124132
cls,
125133
attr_name,
126-
method_logger(
127-
attr_value,
128-
cls,
129-
include_internal_calls,
130-
),
134+
method_logger(attr_value),
131135
)
132136
elif isinstance(attr_value, property):
133137
setattr(
134138
cls,
135139
attr_name,
136-
property_logger(attr_value, cls, include_internal_calls),
140+
property_logger(attr_value),
137141
)
138142
return cls
139143

@@ -145,33 +149,39 @@ def wrap(cls):
145149
return wrap(decorated_cls)
146150

147151

148-
def method_logger(method, decorated_cls, include_internal_calls: bool):
152+
def method_logger(method, /, *, custom_base_name: Optional[str] = None):
149153
"""Decorator that adds logging functionality to a method."""
150154

151155
@functools.wraps(method)
152-
def wrapper(self, *args, **kwargs):
153-
class_name = decorated_cls.__name__ # Access decorated class name
154-
api_method_name = str(method.__name__)
155-
full_method_name = f"{class_name.lower()}-{api_method_name}"
156-
156+
def wrapper(*args, **kwargs):
157+
api_method_name = getattr(method, LOG_OVERRIDE_NAME, method.__name__)
158+
if custom_base_name is None:
159+
qualname_parts = getattr(method, "__qualname__", method.__name__).split(".")
160+
class_name = qualname_parts[-2] if len(qualname_parts) > 1 else ""
161+
base_name = (
162+
class_name if class_name else "_".join(method.__module__.split(".")[1:])
163+
)
164+
else:
165+
base_name = custom_base_name
166+
167+
full_method_name = f"{base_name.lower()}-{api_method_name}"
157168
# Track directly called methods
158-
if len(_call_stack) == 0 or include_internal_calls:
169+
if len(_call_stack) == 0:
159170
add_api_method(full_method_name)
160171

161172
_call_stack.append(full_method_name)
162173

163174
try:
164-
return method(self, *args, **kwargs)
175+
return method(*args, **kwargs)
165176
except (NotImplementedError, TypeError) as e:
166177
# Log method parameters that are implemented in pandas but either missing (TypeError)
167178
# or not fully supported (NotImplementedError) in BigFrames.
168179
# Logging is currently supported only when we can access the bqclient through
169-
# self._block.expr.session.bqclient. Also, to avoid generating multiple queries
170-
# because of internal calls, we log only when the method is directly invoked.
171-
if hasattr(self, "_block") and len(_call_stack) == 1:
180+
# _block.session.bqclient.
181+
if len(_call_stack) == 1:
172182
submit_pandas_labels(
173-
self._block.expr.session.bqclient,
174-
class_name,
183+
_get_bq_client(*args, **kwargs),
184+
base_name,
175185
api_method_name,
176186
args,
177187
kwargs,
@@ -184,22 +194,23 @@ def wrapper(self, *args, **kwargs):
184194
return wrapper
185195

186196

187-
def property_logger(prop, decorated_cls, include_internal_calls: bool):
197+
def property_logger(prop):
188198
"""Decorator that adds logging functionality to a property."""
189199

190-
def shared_wrapper(f):
191-
@functools.wraps(f)
200+
def shared_wrapper(prop):
201+
@functools.wraps(prop)
192202
def wrapped(*args, **kwargs):
193-
class_name = decorated_cls.__name__
194-
property_name = f.__name__
203+
qualname_parts = getattr(prop, "__qualname__", prop.__name__).split(".")
204+
class_name = qualname_parts[-2] if len(qualname_parts) > 1 else ""
205+
property_name = prop.__name__
195206
full_property_name = f"{class_name.lower()}-{property_name.lower()}"
196207

197-
if len(_call_stack) == 0 or include_internal_calls:
208+
if len(_call_stack) == 0:
198209
add_api_method(full_property_name)
199210

200211
_call_stack.append(full_property_name)
201212
try:
202-
return f(*args, **kwargs)
213+
return prop(*args, **kwargs)
203214
finally:
204215
_call_stack.pop()
205216

@@ -213,12 +224,24 @@ def wrapped(*args, **kwargs):
213224
)
214225

215226

227+
def log_name_override(name: str):
228+
"""
229+
Attaches a custom name to be used by logger.
230+
"""
231+
232+
def wrapper(func):
233+
setattr(func, LOG_OVERRIDE_NAME, name)
234+
return func
235+
236+
return wrapper
237+
238+
216239
def add_api_method(api_method_name):
217240
global _lock
218241
global _api_methods
219242
with _lock:
220243
# Push the method to the front of the _api_methods list
221-
_api_methods.insert(0, api_method_name)
244+
_api_methods.insert(0, api_method_name.replace("<", "").replace(">", ""))
222245
# Keep the list length within the maximum limit (adjust MAX_LABELS_COUNT as needed)
223246
_api_methods = _api_methods[:MAX_LABELS_COUNT]
224247

@@ -232,3 +255,17 @@ def get_and_reset_api_methods(dry_run: bool = False):
232255
if not dry_run:
233256
_api_methods.clear()
234257
return previous_api_methods
258+
259+
260+
def _get_bq_client(*args, **kwargs):
261+
# Assumes that on BigFrames API errors (TypeError/NotImplementedError),
262+
# an input arg (likely the first, e.g., 'self') has `_block.session.bqclient`
263+
for argv in args:
264+
if hasattr(argv, "_block"):
265+
return argv._block.session.bqclient
266+
267+
for kwargv in kwargs.values():
268+
if hasattr(kwargv, "_block"):
269+
return kwargv._block.session.bqclient
270+
271+
return None

bigframes/pandas/__init__.py

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import pandas
2828

2929
import bigframes._config as config
30+
from bigframes.core import log_adapter
3031
import bigframes.core.blocks
3132
import bigframes.core.global_session as global_session
3233
import bigframes.core.indexes
@@ -199,6 +200,7 @@ def get_default_session_id() -> str:
199200
return get_global_session().session_id
200201

201202

203+
@log_adapter.method_logger
202204
def clean_up_by_session_id(
203205
session_id: str,
204206
location: Optional[str] = None,
@@ -245,7 +247,6 @@ def clean_up_by_session_id(
245247
session.bqclient,
246248
location=location,
247249
project=project,
248-
api_name="clean_up_by_session_id",
249250
)
250251

251252
bigframes.session._io.bigquery.delete_tables_matching_session_id(
@@ -322,31 +323,33 @@ def reset_session():
322323
except Exception:
323324
pass
324325

325-
# Use __all__ to let type checkers know what is part of the public API.
326-
__all__ = [
327-
# Functions
328-
"clean_up_by_session_id",
329-
"concat",
330-
"cut",
331-
"get_default_session_id",
332-
"get_dummies",
333-
"merge",
334-
"qcut",
335-
"read_csv",
336-
"read_gbq",
337-
"read_gbq_function",
338-
"read_gbq_model",
339-
"read_gbq_object_table",
340-
"read_gbq_query",
341-
"read_gbq_table",
342-
"read_json",
343-
"read_pandas",
344-
"read_parquet",
345-
"read_pickle",
346-
"remote_function",
347-
"to_datetime",
348-
"to_timedelta",
349-
"from_glob_path",
326+
_functions = [
327+
clean_up_by_session_id,
328+
concat,
329+
cut,
330+
get_default_session_id,
331+
get_dummies,
332+
merge,
333+
qcut,
334+
read_csv,
335+
read_gbq,
336+
read_gbq_function,
337+
read_gbq_model,
338+
read_gbq_object_table,
339+
read_gbq_query,
340+
read_gbq_table,
341+
read_json,
342+
read_pandas,
343+
read_parquet,
344+
read_pickle,
345+
remote_function,
346+
to_datetime,
347+
to_timedelta,
348+
from_glob_path,
349+
]
350+
351+
_function_names = [_function.__name__ for _function in _functions]
352+
_other_names = [
350353
# pandas dtype attributes
351354
"NA",
352355
"BooleanDtype",
@@ -371,3 +374,12 @@ def reset_session():
371374
"reset_session",
372375
"udf",
373376
]
377+
378+
# Use __all__ to let type checkers know what is part of the public API.
379+
__all__ = _function_names + _other_names
380+
381+
_module = sys.modules[__name__]
382+
383+
for _function in _functions:
384+
_decorated_object = log_adapter.method_logger(_function, custom_base_name="pandas")
385+
setattr(_module, _function.__name__, _decorated_object)

bigframes/pandas/io/api.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,8 @@
4444
)
4545

4646
import bigframes._config as config
47-
import bigframes.core.blocks
4847
import bigframes.core.global_session as global_session
4948
import bigframes.core.indexes
50-
import bigframes.core.reshape
51-
import bigframes.core.tools
5249
import bigframes.dataframe
5350
import bigframes.enums
5451
import bigframes.series

bigframes/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1492,7 +1492,7 @@ def __getattr__(self, key: str):
14921492
raise AttributeError(key)
14931493
elif hasattr(pandas.Series, key):
14941494
log_adapter.submit_pandas_labels(
1495-
self._block.expr.session.bqclient, self.__class__.__name__, key
1495+
self._block.session.bqclient, self.__class__.__name__, key
14961496
)
14971497
raise AttributeError(
14981498
textwrap.dedent(

0 commit comments

Comments
 (0)