Skip to content

Commit 4f0367a

Browse files
gab23rgabriel.g.robinAndreasAlbertQCborchero
authored
refactor: Prepare nullable column by default (#31)
Co-authored-by: gabriel.g.robin <[email protected]> Co-authored-by: Andreas Albert <[email protected]> Co-authored-by: Oliver Borchert <[email protected]>
1 parent 90f1833 commit 4f0367a

File tree

12 files changed

+123
-14
lines changed

12 files changed

+123
-14
lines changed

dataframely/_deprecation.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright (c) QuantCo 2025-2025
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
import os
5+
import warnings
6+
from collections.abc import Callable
7+
from functools import wraps
8+
9+
TRUTHY_VALUES = ["1", "true"]
10+
11+
12+
def skip_if(env: str) -> Callable:
13+
"""Decorator to skip warnings based on environment variable.
14+
15+
If the environment variable is equivalent to any of TRUTHY_VALUES, the wrapped
16+
function is skipped.
17+
"""
18+
19+
def decorator(fun: Callable) -> Callable:
20+
@wraps(fun)
21+
def wrapper() -> None:
22+
if os.getenv(env, "").lower() in TRUTHY_VALUES:
23+
return
24+
fun()
25+
26+
return wrapper
27+
28+
return decorator
29+
30+
31+
@skip_if(env="DATAFRAMELY_NO_FUTURE_WARNINGS")
32+
def warn_nullable_default_change() -> None:
33+
warnings.warn(
34+
"The 'nullable' argument was not explicitly set. In a future release, "
35+
"'nullable=False' will be the default if 'nullable' is not specified. "
36+
"Explicitly set 'nullable=True' if you want your column to be nullable.",
37+
FutureWarning,
38+
stacklevel=4,
39+
)

dataframely/columns/_base.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import polars as pl
1111

1212
from dataframely._compat import pa, sa, sa_TypeEngine
13+
from dataframely._deprecation import warn_nullable_default_change
1314
from dataframely._polars import PolarsDataType
1415
from dataframely.random import Generator
1516

@@ -28,7 +29,7 @@ class Column(ABC):
2829
def __init__(
2930
self,
3031
*,
31-
nullable: bool = True,
32+
nullable: bool | None = None,
3233
primary_key: bool = False,
3334
check: Callable[[pl.Expr], pl.Expr] | None = None,
3435
alias: str | None = None,
@@ -37,6 +38,9 @@ def __init__(
3738
"""
3839
Args:
3940
nullable: Whether this column may contain null values.
41+
Explicitly set `nullable=True` if you want your column to be nullable.
42+
In a future release, `nullable=False` will be the default if `nullable`
43+
is not specified.
4044
primary_key: Whether this column is part of the primary key of the schema.
4145
If ``True``, ``nullable`` is automatically set to ``False``.
4246
check: A custom check to run for this column. Must return a non-aggregated
@@ -48,6 +52,10 @@ def __init__(
4852
internally sets the alias to the column's name in the parent schema.
4953
metadata: A dictionary of metadata to attach to the column.
5054
"""
55+
if nullable is None:
56+
warn_nullable_default_change()
57+
nullable = True
58+
5159
self.nullable = nullable and not primary_key
5260
self.primary_key = primary_key
5361
self.check = check

dataframely/columns/datetime.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class Date(OrdinalMixin[dt.date], Column):
3232
def __init__(
3333
self,
3434
*,
35-
nullable: bool = True,
35+
nullable: bool | None = None,
3636
primary_key: bool = False,
3737
min: dt.date | None = None,
3838
min_exclusive: dt.date | None = None,
@@ -46,6 +46,9 @@ def __init__(
4646
"""
4747
Args:
4848
nullable: Whether this column may contain null values.
49+
Explicitly set `nullable=True` if you want your column to be nullable.
50+
In a future release, `nullable=False` will be the default if `nullable`
51+
is not specified.
4952
primary_key: Whether this column is part of the primary key of the schema.
5053
If ``True``, ``nullable`` is automatically set to ``False``.
5154
min: The minimum date for dates in this column (inclusive).
@@ -142,7 +145,7 @@ class Time(OrdinalMixin[dt.time], Column):
142145
def __init__(
143146
self,
144147
*,
145-
nullable: bool = True,
148+
nullable: bool | None = None,
146149
primary_key: bool = False,
147150
min: dt.time | None = None,
148151
min_exclusive: dt.time | None = None,
@@ -156,6 +159,9 @@ def __init__(
156159
"""
157160
Args:
158161
nullable: Whether this column may contain null values.
162+
Explicitly set `nullable=True` if you want your column to be nullable.
163+
In a future release, `nullable=False` will be the default if `nullable`
164+
is not specified.
159165
primary_key: Whether this column is part of the primary key of the schema.
160166
If ``True``, ``nullable`` is automatically set to ``False``.
161167
min: The minimum time for times in this column (inclusive).
@@ -258,7 +264,7 @@ class Datetime(OrdinalMixin[dt.datetime], Column):
258264
def __init__(
259265
self,
260266
*,
261-
nullable: bool = True,
267+
nullable: bool | None = None,
262268
primary_key: bool = False,
263269
min: dt.datetime | None = None,
264270
min_exclusive: dt.datetime | None = None,
@@ -272,6 +278,9 @@ def __init__(
272278
"""
273279
Args:
274280
nullable: Whether this column may contain null values.
281+
Explicitly set `nullable=True` if you want your column to be nullable.
282+
In a future release, `nullable=False` will be the default if `nullable`
283+
is not specified.
275284
primary_key: Whether this column is part of the primary key of the schema.
276285
If ``True``, ``nullable`` is automatically set to ``False``.
277286
min: The minimum datetime for datetimes in this column (inclusive).
@@ -364,7 +373,7 @@ class Duration(OrdinalMixin[dt.timedelta], Column):
364373
def __init__(
365374
self,
366375
*,
367-
nullable: bool = True,
376+
nullable: bool | None = None,
368377
primary_key: bool = False,
369378
min: dt.timedelta | None = None,
370379
min_exclusive: dt.timedelta | None = None,
@@ -378,6 +387,9 @@ def __init__(
378387
"""
379388
Args:
380389
nullable: Whether this column may contain null values.
390+
Explicitly set `nullable=True` if you want your column to be nullable.
391+
In a future release, `nullable=False` will be the default if `nullable`
392+
is not specified.
381393
primary_key: Whether this column is part of the primary key of the schema.
382394
If ``True``, ``nullable`` is automatically set to ``False``.
383395
min: The minimum duration for durations in this column (inclusive).

dataframely/columns/decimal.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def __init__(
2727
precision: int | None = None,
2828
scale: int = 0,
2929
*,
30-
nullable: bool = True,
30+
nullable: bool | None = None,
3131
primary_key: bool = False,
3232
min: decimal.Decimal | None = None,
3333
min_exclusive: decimal.Decimal | None = None,
@@ -42,6 +42,9 @@ def __init__(
4242
precision: Maximum number of digits in each number.
4343
scale: Number of digits to the right of the decimal point in each number.
4444
nullable: Whether this column may contain null values.
45+
Explicitly set `nullable=True` if you want your column to be nullable.
46+
In a future release, `nullable=False` will be the default if `nullable`
47+
is not specified.
4548
primary_key: Whether this column is part of the primary key of the schema.
4649
If ``True``, ``nullable`` is automatically set to ``False``.
4750
min: The minimum value for decimals in this column (inclusive).

dataframely/columns/enum.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(
2222
self,
2323
categories: Sequence[str],
2424
*,
25-
nullable: bool = True,
25+
nullable: bool | None = None,
2626
primary_key: bool = False,
2727
check: Callable[[pl.Expr], pl.Expr] | None = None,
2828
alias: str | None = None,
@@ -32,6 +32,9 @@ def __init__(
3232
Args:
3333
categories: The list of valid categories for the enum.
3434
nullable: Whether this column may contain null values.
35+
Explicitly set `nullable=True` if you want your column to be nullable.
36+
In a future release, `nullable=False` will be the default if `nullable`
37+
is not specified.
3538
primary_key: Whether this column is part of the primary key of the schema.
3639
If ``True``, ``nullable`` is automatically set to ``False``.
3740
check: A custom check to run for this column. Must return a non-aggregated

dataframely/columns/float.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class _BaseFloat(OrdinalMixin[float], Column):
2626
def __init__(
2727
self,
2828
*,
29-
nullable: bool = True,
29+
nullable: bool | None = None,
3030
primary_key: bool = False,
3131
allow_inf_nan: bool = False,
3232
min: float | None = None,
@@ -40,6 +40,9 @@ def __init__(
4040
"""
4141
Args:
4242
nullable: Whether this column may contain null values.
43+
Explicitly set `nullable=True` if you want your column to be nullable.
44+
In a future release, `nullable=False` will be the default if `nullable`
45+
is not specified.
4346
primary_key: Whether this column is part of the primary key of the schema.
4447
If ``True``, ``nullable`` is automatically set to ``False``.
4548
allow_inf_nan: Whether this column may contain NaN and infinity values.

dataframely/columns/integer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class _BaseInteger(IsInMixin[int], OrdinalMixin[int], Column):
2323
def __init__(
2424
self,
2525
*,
26-
nullable: bool = True,
26+
nullable: bool | None = None,
2727
primary_key: bool = False,
2828
min: int | None = None,
2929
min_exclusive: int | None = None,
@@ -37,6 +37,9 @@ def __init__(
3737
"""
3838
Args:
3939
nullable: Whether this column may contain null values.
40+
Explicitly set `nullable=True` if you want your column to be nullable.
41+
In a future release, `nullable=False` will be the default if `nullable`
42+
is not specified.
4043
primary_key: Whether this column is part of the primary key of the schema.
4144
If ``True``, ``nullable`` is automatically set to ``False``.
4245
min: The minimum value for integers in this column (inclusive).

dataframely/columns/list.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def __init__(
2424
self,
2525
inner: Column,
2626
*,
27-
nullable: bool = True,
27+
nullable: bool | None = None,
2828
primary_key: bool = False,
2929
check: Callable[[pl.Expr], pl.Expr] | None = None,
3030
alias: str | None = None,
@@ -40,6 +40,9 @@ def __init__(
4040
must be unique across all list items. Note that if the struct itself has
4141
``primary_key=True`` set, the fields' settings do not take effect.
4242
nullable: Whether this column may contain null values.
43+
Explicitly set `nullable=True` if you want your column to be nullable.
44+
In a future release, `nullable=False` will be the default if `nullable`
45+
is not specified.
4346
primary_key: Whether this column is part of the primary key of the schema.
4447
check: A custom check to run for this column. Must return a non-aggregated
4548
boolean expression.

dataframely/columns/string.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class String(Column):
2323
def __init__(
2424
self,
2525
*,
26-
nullable: bool = True,
26+
nullable: bool | None = None,
2727
primary_key: bool = False,
2828
min_length: int | None = None,
2929
max_length: int | None = None,
@@ -35,6 +35,9 @@ def __init__(
3535
"""
3636
Args:
3737
nullable: Whether this column may contain null values.
38+
Explicitly set `nullable=True` if you want your column to be nullable.
39+
In a future release, `nullable=False` will be the default if `nullable`
40+
is not specified.
3841
primary_key: Whether this column is part of the primary key of the schema.
3942
min_length: The minimum byte-length of string values in this column.
4043
max_length: The maximum byte-length of string values in this column.

dataframely/columns/struct.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(
2222
self,
2323
inner: dict[str, Column],
2424
*,
25-
nullable: bool = True,
25+
nullable: bool | None = None,
2626
primary_key: bool = False,
2727
check: Callable[[pl.Expr], pl.Expr] | None = None,
2828
alias: str | None = None,
@@ -35,6 +35,9 @@ def __init__(
3535
struct is nested inside a list. In this case, the list items must be
3636
unique wrt. the struct fields that have ``primary_key=True`` set.
3737
nullable: Whether this column may contain null values.
38+
Explicitly set `nullable=True` if you want your column to be nullable.
39+
In a future release, `nullable=False` will be the default if `nullable`
40+
is not specified.
3841
primary_key: Whether this column is part of the primary key of the schema.
3942
check: A custom check to run for this column. Must return a non-aggregated
4043
boolean expression.

0 commit comments

Comments
 (0)