@@ -312,6 +312,7 @@ class ColumnBase(Serializable, BinaryOperand, Reducible):
312312 plc_column : plc .Column
313313 _dtype : DtypeObj
314314 _distinct_count : dict [bool , int ]
315+ _has_nulls : dict [bool , bool ]
315316 _exposed_buffers : set [Buffer ]
316317 _CACHED_PROPERTY_NAMES : ClassVar [frozenset [str ]] = frozenset ()
317318
@@ -357,16 +358,16 @@ def _PANDAS_NA_VALUE(self) -> ScalarLike:
357358 def dtype (self ) -> DtypeObj :
358359 return self ._dtype
359360
360- @property
361+ @cached_property
361362 def size (self ) -> int :
362363 return self .plc_column .size ()
363364
364- @property
365+ @cached_property
365366 def data (self ) -> None | Buffer :
366367 """Get data buffer from pylibcudf column."""
367368 return cast ("Buffer | None" , self .plc_column .data ())
368369
369- @property
370+ @cached_property
370371 def nullable (self ) -> bool :
371372 return self .mask is not None
372373
@@ -375,9 +376,14 @@ def has_nulls(self, include_nan: bool = False) -> bool:
375376
376377 NaN inclusion is supported for specific dtypes only.
377378 """
378- return int (self .null_count ) != 0
379+ try :
380+ return self ._has_nulls [include_nan ]
381+ except KeyError :
382+ result = int (self .null_count ) != 0
383+ self ._has_nulls [include_nan ] = result
384+ return result
379385
380- @property
386+ @cached_property
381387 def is_all_null (self ) -> bool :
382388 """Check if all values in the column are null.
383389
@@ -386,7 +392,7 @@ def is_all_null(self) -> bool:
386392 """
387393 return self .null_count == len (self )
388394
389- @property
395+ @cached_property
390396 def valid_count (self ) -> int :
391397 """Return the number of non-null values in the column.
392398
@@ -395,7 +401,7 @@ def valid_count(self) -> int:
395401 """
396402 return len (self ) - self .null_count
397403
398- @property
404+ @cached_property
399405 def mask (self ) -> None | Buffer :
400406 """Get mask buffer from pylibcudf column."""
401407 return cast ("Buffer | None" , self .plc_column .null_mask ())
@@ -425,6 +431,7 @@ def access(self, **kwargs: Any) -> _ColumnAccessContext:
425431
426432 def _clear_cache (self ) -> None :
427433 self ._distinct_count .clear ()
434+ self ._has_nulls .clear ()
428435 for attr_name in self ._CACHED_PROPERTY_NAMES :
429436 try :
430437 delattr (self , attr_name )
@@ -458,11 +465,11 @@ def set_mask(self, mask: Buffer | None, null_count: int) -> Self:
458465 ColumnBase .create (new_plc_column , self .dtype ),
459466 )
460467
461- @property
468+ @cached_property
462469 def null_count (self ) -> int :
463470 return self .plc_column .null_count ()
464471
465- @property
472+ @cached_property
466473 def offset (self ) -> int :
467474 return self .plc_column .offset ()
468475
@@ -800,6 +807,7 @@ def _from_preprocessed(
800807 self .plc_column = plc_column
801808 self ._dtype = dtype
802809 self ._distinct_count = {}
810+ self ._has_nulls = {}
803811 # The set of exposed buffers associated with this column. These buffers must be
804812 # kept alive for the lifetime of this column since anything that accessed the
805813 # CAI of this column will still be pointing to those buffers. As such objects
@@ -1883,7 +1891,6 @@ def as_mask(self) -> tuple[Buffer, int]:
18831891
18841892 @property
18851893 def is_unique (self ) -> bool :
1886- # distinct_count might already be cached
18871894 return self .distinct_count (dropna = False ) == len (self )
18881895
18891896 @cached_property
0 commit comments