15
15
import functools
16
16
import inspect
17
17
import threading
18
- from typing import List
18
+ from typing import List , Optional
19
19
20
20
from google .cloud import bigquery
21
21
import pandas
28
28
MAX_LABELS_COUNT = 64 - 8
29
29
PANDAS_API_TRACKING_TASK = "pandas_api_tracking"
30
30
PANDAS_PARAM_TRACKING_TASK = "pandas_param_tracking"
31
+ LOG_OVERRIDE_NAME = "__log_override_name__"
31
32
32
33
_api_methods : List = []
33
34
_excluded_methods = ["__setattr__" , "__getattr__" ]
37
38
38
39
39
40
def submit_pandas_labels (
40
- bq_client : bigquery .Client ,
41
- class_name : str ,
41
+ bq_client : Optional [ bigquery .Client ] ,
42
+ base_name : str ,
42
43
method_name : str ,
43
44
args = (),
44
45
kwargs = {},
@@ -54,7 +55,7 @@ def submit_pandas_labels(
54
55
55
56
Args:
56
57
bq_client (bigquery.Client): The client used to interact with BigQuery.
57
- class_name (str): The name of the pandas class being used.
58
+ base_name (str): The name of the pandas class/module being used.
58
59
method_name (str): The name of the method being invoked.
59
60
args (tuple): The positional arguments passed to the method.
60
61
kwargs (dict): The keyword arguments passed to the method.
@@ -63,25 +64,29 @@ def submit_pandas_labels(
63
64
- 'PANDAS_PARAM_TRACKING_TASK': Indicates that the unimplemented feature is a
64
65
parameter of a method.
65
66
"""
66
- if method_name .startswith ("_" ) and not method_name .startswith ("__" ):
67
+ if bq_client is None or (
68
+ method_name .startswith ("_" ) and not method_name .startswith ("__" )
69
+ ):
67
70
return
68
71
69
72
labels_dict = {
70
73
"task" : task ,
71
- "class_name" : class_name .lower (),
74
+ "class_name" : base_name .lower (),
72
75
"method_name" : method_name .lower (),
73
76
"args_count" : len (args ),
74
77
}
75
78
76
- if hasattr (pandas , class_name ):
77
- cls = getattr (pandas , class_name )
79
+ # getattr(pandas, "pandas") returns pandas
80
+ # so we can also use this for pandas.function
81
+ if hasattr (pandas , base_name ):
82
+ base = getattr (pandas , base_name )
78
83
else :
79
84
return
80
85
81
86
# Omit __call__, because its not implemented on the actual instances of
82
87
# DataFrame/Series, only as the constructor.
83
- if method_name != "__call__" and hasattr (cls , method_name ):
84
- method = getattr (cls , method_name )
88
+ if method_name != "__call__" and hasattr (base , method_name ):
89
+ method = getattr (base , method_name )
85
90
else :
86
91
return
87
92
@@ -110,30 +115,29 @@ def submit_pandas_labels(
110
115
bq_client .query (query , job_config = job_config )
111
116
112
117
113
- def class_logger (decorated_cls = None , / , * , include_internal_calls = False ):
118
+ def class_logger (decorated_cls = None ):
114
119
"""Decorator that adds logging functionality to each method of the class."""
115
120
116
121
def wrap (cls ):
117
122
for attr_name , attr_value in cls .__dict__ .items ():
118
123
if callable (attr_value ) and (attr_name not in _excluded_methods ):
119
124
if isinstance (attr_value , staticmethod ):
120
- # TODO(b/390244171) support for staticmethod
121
- pass
125
+ setattr (
126
+ cls ,
127
+ attr_name ,
128
+ staticmethod (method_logger (attr_value )),
129
+ )
122
130
else :
123
131
setattr (
124
132
cls ,
125
133
attr_name ,
126
- method_logger (
127
- attr_value ,
128
- cls ,
129
- include_internal_calls ,
130
- ),
134
+ method_logger (attr_value ),
131
135
)
132
136
elif isinstance (attr_value , property ):
133
137
setattr (
134
138
cls ,
135
139
attr_name ,
136
- property_logger (attr_value , cls , include_internal_calls ),
140
+ property_logger (attr_value ),
137
141
)
138
142
return cls
139
143
@@ -145,33 +149,39 @@ def wrap(cls):
145
149
return wrap (decorated_cls )
146
150
147
151
148
- def method_logger (method , decorated_cls , include_internal_calls : bool ):
152
+ def method_logger (method , / , * , custom_base_name : Optional [ str ] = None ):
149
153
"""Decorator that adds logging functionality to a method."""
150
154
151
155
@functools .wraps (method )
152
- def wrapper (self , * args , ** kwargs ):
153
- class_name = decorated_cls .__name__ # Access decorated class name
154
- api_method_name = str (method .__name__ )
155
- full_method_name = f"{ class_name .lower ()} -{ api_method_name } "
156
-
156
+ def wrapper (* args , ** kwargs ):
157
+ api_method_name = getattr (method , LOG_OVERRIDE_NAME , method .__name__ )
158
+ if custom_base_name is None :
159
+ qualname_parts = getattr (method , "__qualname__" , method .__name__ ).split ("." )
160
+ class_name = qualname_parts [- 2 ] if len (qualname_parts ) > 1 else ""
161
+ base_name = (
162
+ class_name if class_name else "_" .join (method .__module__ .split ("." )[1 :])
163
+ )
164
+ else :
165
+ base_name = custom_base_name
166
+
167
+ full_method_name = f"{ base_name .lower ()} -{ api_method_name } "
157
168
# Track directly called methods
158
- if len (_call_stack ) == 0 or include_internal_calls :
169
+ if len (_call_stack ) == 0 :
159
170
add_api_method (full_method_name )
160
171
161
172
_call_stack .append (full_method_name )
162
173
163
174
try :
164
- return method (self , * args , ** kwargs )
175
+ return method (* args , ** kwargs )
165
176
except (NotImplementedError , TypeError ) as e :
166
177
# Log method parameters that are implemented in pandas but either missing (TypeError)
167
178
# or not fully supported (NotImplementedError) in BigFrames.
168
179
# Logging is currently supported only when we can access the bqclient through
169
- # self._block.expr.session.bqclient. Also, to avoid generating multiple queries
170
- # because of internal calls, we log only when the method is directly invoked.
171
- if hasattr (self , "_block" ) and len (_call_stack ) == 1 :
180
+ # _block.session.bqclient.
181
+ if len (_call_stack ) == 1 :
172
182
submit_pandas_labels (
173
- self . _block . expr . session . bqclient ,
174
- class_name ,
183
+ _get_bq_client ( * args , ** kwargs ) ,
184
+ base_name ,
175
185
api_method_name ,
176
186
args ,
177
187
kwargs ,
@@ -184,22 +194,23 @@ def wrapper(self, *args, **kwargs):
184
194
return wrapper
185
195
186
196
187
- def property_logger (prop , decorated_cls , include_internal_calls : bool ):
197
+ def property_logger (prop ):
188
198
"""Decorator that adds logging functionality to a property."""
189
199
190
- def shared_wrapper (f ):
191
- @functools .wraps (f )
200
+ def shared_wrapper (prop ):
201
+ @functools .wraps (prop )
192
202
def wrapped (* args , ** kwargs ):
193
- class_name = decorated_cls .__name__
194
- property_name = f .__name__
203
+ qualname_parts = getattr (prop , "__qualname__" , prop .__name__ ).split ("." )
204
+ class_name = qualname_parts [- 2 ] if len (qualname_parts ) > 1 else ""
205
+ property_name = prop .__name__
195
206
full_property_name = f"{ class_name .lower ()} -{ property_name .lower ()} "
196
207
197
- if len (_call_stack ) == 0 or include_internal_calls :
208
+ if len (_call_stack ) == 0 :
198
209
add_api_method (full_property_name )
199
210
200
211
_call_stack .append (full_property_name )
201
212
try :
202
- return f (* args , ** kwargs )
213
+ return prop (* args , ** kwargs )
203
214
finally :
204
215
_call_stack .pop ()
205
216
@@ -213,12 +224,24 @@ def wrapped(*args, **kwargs):
213
224
)
214
225
215
226
227
+ def log_name_override (name : str ):
228
+ """
229
+ Attaches a custom name to be used by logger.
230
+ """
231
+
232
+ def wrapper (func ):
233
+ setattr (func , LOG_OVERRIDE_NAME , name )
234
+ return func
235
+
236
+ return wrapper
237
+
238
+
216
239
def add_api_method (api_method_name ):
217
240
global _lock
218
241
global _api_methods
219
242
with _lock :
220
243
# Push the method to the front of the _api_methods list
221
- _api_methods .insert (0 , api_method_name )
244
+ _api_methods .insert (0 , api_method_name . replace ( "<" , "" ). replace ( ">" , "" ) )
222
245
# Keep the list length within the maximum limit (adjust MAX_LABELS_COUNT as needed)
223
246
_api_methods = _api_methods [:MAX_LABELS_COUNT ]
224
247
@@ -232,3 +255,17 @@ def get_and_reset_api_methods(dry_run: bool = False):
232
255
if not dry_run :
233
256
_api_methods .clear ()
234
257
return previous_api_methods
258
+
259
+
260
+ def _get_bq_client (* args , ** kwargs ):
261
+ # Assumes that on BigFrames API errors (TypeError/NotImplementedError),
262
+ # an input arg (likely the first, e.g., 'self') has `_block.session.bqclient`
263
+ for argv in args :
264
+ if hasattr (argv , "_block" ):
265
+ return argv ._block .session .bqclient
266
+
267
+ for kwargv in kwargs .values ():
268
+ if hasattr (kwargv , "_block" ):
269
+ return kwargv ._block .session .bqclient
270
+
271
+ return None
0 commit comments