Skip to content

Commit a356774

Browse files
committed
feat(datatypes): support ibis.dtype(nullable=None)
1 parent 9ab2d63 commit a356774

File tree

12 files changed

+213
-61
lines changed

12 files changed

+213
-61
lines changed

ibis/expr/datatypes/core.py

Lines changed: 74 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -41,52 +41,58 @@
4141

4242

4343
@overload
44-
def dtype(value: type[int] | Literal["int"], nullable: bool = True) -> Int64: ...
44+
def dtype(value: type[int] | Literal["int"], nullable: bool | None = None) -> Int64: ...
4545
@overload
4646
def dtype(
47-
value: type[str] | Literal["str", "string"], nullable: bool = True
47+
value: type[str] | Literal["str", "string"], nullable: bool | None = None
4848
) -> String: ...
4949
@overload
5050
def dtype(
51-
value: type[bool] | Literal["bool", "boolean"], nullable: bool = True
51+
value: type[bool] | Literal["bool", "boolean"], nullable: bool | None = None
5252
) -> Boolean: ...
5353
@overload
54-
def dtype(value: type[bytes] | Literal["bytes"], nullable: bool = True) -> Binary: ...
54+
def dtype(
55+
value: type[bytes] | Literal["bytes"], nullable: bool | None = None
56+
) -> Binary: ...
5557
@overload
56-
def dtype(value: type[Real] | Literal["float"], nullable: bool = True) -> Float64: ...
58+
def dtype(
59+
value: type[Real] | Literal["float"], nullable: bool | None = None
60+
) -> Float64: ...
5761
@overload
5862
def dtype(
59-
value: type[pydecimal.Decimal] | Literal["decimal"], nullable: bool = True
63+
value: type[pydecimal.Decimal] | Literal["decimal"], nullable: bool | None = None
6064
) -> Decimal: ...
6165
@overload
6266
def dtype(
63-
value: type[pydatetime.datetime] | Literal["timestamp"], nullable: bool = True
67+
value: type[pydatetime.datetime] | Literal["timestamp"],
68+
nullable: bool | None = None,
6469
) -> Timestamp: ...
6570
@overload
6671
def dtype(
67-
value: type[pydatetime.date] | Literal["date"], nullable: bool = True
72+
value: type[pydatetime.date] | Literal["date"], nullable: bool | None = None
6873
) -> Date: ...
6974
@overload
7075
def dtype(
71-
value: type[pydatetime.time] | Literal["time"], nullable: bool = True
76+
value: type[pydatetime.time] | Literal["time"], nullable: bool | None = None
7277
) -> Time: ...
7378
@overload
7479
def dtype(
75-
value: type[pydatetime.timedelta] | Literal["interval"], nullable: bool = True
80+
value: type[pydatetime.timedelta] | Literal["interval"],
81+
nullable: bool | None = None,
7682
) -> Interval: ...
7783
@overload
7884
def dtype(
79-
value: type[pyuuid.UUID] | Literal["uuid"], nullable: bool = True
85+
value: type[pyuuid.UUID] | Literal["uuid"], nullable: bool | None = None
8086
) -> UUID: ...
8187
@overload
8288
def dtype(
8389
value: DataType | str | np.dtype | ExtensionDtype | pl.DataType | pa.DataType,
84-
nullable: bool = True,
90+
nullable: bool | None = None,
8591
) -> DataType: ...
8692

8793

8894
@lazy_singledispatch
89-
def dtype(value, nullable=True) -> DataType:
95+
def dtype(value, nullable: bool | None = None) -> DataType:
9096
"""Create a DataType object.
9197
9298
Parameters
@@ -96,21 +102,42 @@ def dtype(value, nullable=True) -> DataType:
96102
strings, python type annotations, numpy dtypes, pandas dtypes, and
97103
pyarrow types.
98104
nullable
99-
Whether the type should be nullable. Defaults to True.
100-
If `value` is a string prefixed by "!", the type is always non-nullable.
105+
Whether the resulting type should be nullable.
106+
If `None`, we try to infer nullability from the input value.
107+
For example, if `value` is a string starting with '!', the resulting type
108+
will be non-nullable.
109+
For inputs without an explicit nullability (like the python type `int` or
110+
numpy dtype of `np.int32`), we default to `nullable=True`.
101111
102112
Examples
103113
--------
104114
>>> import ibis
105115
>>> ibis.dtype("int32")
106116
Int32(nullable=True)
117+
118+
Prefixing the type with "!" makes it non-nullable:
119+
107120
>>> ibis.dtype("!int32")
108121
Int32(nullable=False)
109-
>>> ibis.dtype("array<float>")
110-
Array(value_type=Float64(nullable=True), length=None, nullable=True)
122+
123+
We support a rich string syntax for nested and parametric types:
124+
125+
>>> ibis.dtype("array<!float>")
126+
Array(value_type=Float64(nullable=False), length=None, nullable=True)
127+
>>> ibis.dtype("!struct<a: interval('s'), b: !bool>")
128+
Struct([('a', Interval(unit=<IntervalUnit.SECOND: 's'>, nullable=True)), ('b', Boolean(nullable=False))], nullable=False)
129+
>>> ibis.dtype("map<timestamp('America/Anchorage', 6), boolean>")
130+
Map(key_type=Timestamp(timezone='America/Anchorage', scale=6, nullable=True), value_type=Boolean(nullable=True), nullable=True)
131+
132+
The function is idempotent (AKA is a no-op when passed a DataType):
133+
>>> t = ibis.dtype("int32")
134+
>>> ibis.dtype(t) is t
135+
True
111136
112137
DataType objects may also be created from Python types:
113138
139+
>>> ibis.dtype(int)
140+
Int64(nullable=True)
114141
>>> ibis.dtype(int, nullable=False)
115142
Int64(nullable=False)
116143
>>> ibis.dtype(list[float])
@@ -121,36 +148,52 @@ def dtype(value, nullable=True) -> DataType:
121148
>>> import pyarrow as pa
122149
>>> ibis.dtype(pa.int32())
123150
Int32(nullable=True)
151+
>>> ibis.dtype(pa.int32(), nullable=False)
152+
Int32(nullable=False)
153+
154+
The `nullable` parameter may be used to override the nullability:
155+
156+
>>> ibis.dtype("!int32", nullable=True)
157+
Int32(nullable=True)
158+
>>> i = ibis.dtype("int32")
159+
>>> i
160+
Int32(nullable=True)
161+
>>> ibis.dtype(i, nullable=False)
162+
Int32(nullable=False)
124163
125164
"""
126165
if isinstance(value, DataType):
127-
return value
166+
if nullable is None:
167+
return value
168+
return value.copy(nullable=nullable)
128169
else:
170+
if nullable is None:
171+
nullable = True
129172
return DataType.from_typehint(value, nullable)
130173

131174

132175
@dtype.register(str)
133-
def from_string(value, nullable: bool = True):
176+
def from_string(value, nullable=None):
134177
return DataType.from_string(value, nullable)
135178

136179

137180
@dtype.register("numpy.dtype")
138-
def from_numpy_dtype(value, nullable=True):
181+
def from_numpy_dtype(value, nullable=None):
139182
return DataType.from_numpy(value, nullable)
140183

141184

142185
@dtype.register("pandas.core.dtypes.base.ExtensionDtype")
143-
def from_pandas_extension_dtype(value, nullable=True):
186+
def from_pandas_extension_dtype(value, nullable=None):
144187
return DataType.from_pandas(value, nullable)
145188

146189

147190
@dtype.register("pyarrow.lib.DataType")
148-
def from_pyarrow(value, nullable=True):
191+
def from_pyarrow(value, nullable=None):
149192
return DataType.from_pyarrow(value, nullable)
150193

151194

152195
@dtype.register("polars.datatypes.classes.DataTypeClass")
153-
def from_polars(value, nullable=True):
196+
def from_polars(value, nullable=None):
154197
return DataType.from_polars(value, nullable)
155198

156199

@@ -228,15 +271,15 @@ def castable(self, to: DataType, **kwargs) -> bool:
228271
return castable(self, to, **kwargs)
229272

230273
@classmethod
231-
def from_string(cls, value: str, nullable: bool = True) -> Self:
274+
def from_string(cls, value: str, nullable: bool | None = None) -> Self:
232275
from ibis.expr.datatypes.parse import parse
233276

234277
try:
235278
typ = parse(value)
236279
except SyntaxError:
237280
raise TypeError(f"{value!r} cannot be parsed as a datatype")
238281

239-
if not nullable:
282+
if nullable is not None:
240283
return typ.copy(nullable=nullable)
241284
return typ
242285

@@ -309,23 +352,25 @@ def from_typehint(cls, typ, nullable=True) -> Self:
309352
raise TypeError(f"Value {typ!r} is not a valid datatype")
310353

311354
@classmethod
312-
def from_numpy(cls, numpy_type: np.dtype, nullable: bool = True) -> Self:
355+
def from_numpy(cls, numpy_type: np.dtype, nullable: bool | None = None) -> Self:
313356
"""Return the equivalent ibis datatype."""
314357
from ibis.formats.numpy import NumpyType
315358

316359
return NumpyType.to_ibis(numpy_type, nullable=nullable)
317360

318361
@classmethod
319362
def from_pandas(
320-
cls, pandas_type: np.dtype | ExtensionDtype, nullable: bool = True
363+
cls, pandas_type: np.dtype | ExtensionDtype, nullable: bool | None = None
321364
) -> Self:
322365
"""Return the equivalent ibis datatype."""
323366
from ibis.formats.pandas import PandasType
324367

325368
return PandasType.to_ibis(pandas_type, nullable=nullable)
326369

327370
@classmethod
328-
def from_pyarrow(cls, arrow_type: pa.DataType, nullable: bool = True) -> Self:
371+
def from_pyarrow(
372+
cls, arrow_type: pa.DataType, nullable: bool | None = None
373+
) -> Self:
329374
"""Return the equivalent ibis datatype."""
330375
from ibis.formats.pyarrow import PyArrowType
331376

@@ -988,7 +1033,7 @@ def __getitem__(self, key: str) -> DataType:
9881033

9891034
def __repr__(self) -> str:
9901035
name = self.__class__.__name__
991-
return f"'{name}({list(self.items())}, nullable={self.nullable})"
1036+
return f"{name}({list(self.items())}, nullable={self.nullable})"
9921037

9931038
@property
9941039
def _pretty_piece(self) -> str:

ibis/expr/datatypes/tests/test_core.py

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,9 @@ def test_dtype(spec, expected):
7474
marks=pytest.mark.xfail(sys.version_info < (3, 10), reason="python 3.9"),
7575
),
7676
(lambda: ("!int",), dt.Int64(nullable=False)),
77-
(lambda: ("!int", True), dt.Int64(nullable=False)), # "!" overrides `nullable`
77+
(lambda: ("!int", None), dt.Int64(nullable=False)),
78+
(lambda: ("!int", False), dt.Int64(nullable=False)),
79+
(lambda: ("!int", True), dt.Int64(nullable=True)),
7880
],
7981
)
8082
def test_nullable_dtype(args, expected):
@@ -105,8 +107,73 @@ def test_bogus_union():
105107
(dt.Time, dt.time),
106108
],
107109
)
108-
def test_dtype_from_classes(klass, expected):
109-
assert dt.dtype(klass) == expected
110+
@pytest.mark.parametrize(
111+
("nullable", "expected_nullable"),
112+
[
113+
(True, True),
114+
(False, False),
115+
(None, True),
116+
],
117+
)
118+
def test_dtype_from_classes(klass, expected, nullable, expected_nullable):
119+
assert dt.dtype(klass, nullable=nullable) == expected.copy(
120+
nullable=expected_nullable
121+
)
122+
123+
124+
@pytest.mark.parametrize(
125+
("inp", "nullable", "expected"),
126+
[
127+
(dt.Null(nullable=True), True, dt.Null(nullable=True)),
128+
(dt.Null(nullable=True), False, dt.Null(nullable=False)),
129+
(dt.Null(nullable=True), None, dt.Null(nullable=True)),
130+
(dt.Null(nullable=False), True, dt.Null(nullable=True)),
131+
(dt.Null(nullable=False), False, dt.Null(nullable=False)),
132+
(dt.Null(nullable=False), None, dt.Null(nullable=False)),
133+
(dt.Int16(nullable=True), True, dt.Int16(nullable=True)),
134+
(dt.Int16(nullable=True), False, dt.Int16(nullable=False)),
135+
(dt.Int16(nullable=True), None, dt.Int16(nullable=True)),
136+
(dt.Int16(nullable=False), True, dt.Int16(nullable=True)),
137+
(dt.Int16(nullable=False), False, dt.Int16(nullable=False)),
138+
(dt.Int16(nullable=False), None, dt.Int16(nullable=False)),
139+
# The nullability of the element type is NEVER changed,
140+
# only the outer nullability can be changed.
141+
(
142+
dt.Array(dt.Int16(nullable=True), nullable=True),
143+
True,
144+
dt.Array(dt.Int16(nullable=True), nullable=True),
145+
),
146+
(
147+
dt.Array(dt.Int16(nullable=True), nullable=True),
148+
False,
149+
dt.Array(dt.Int16(nullable=True), nullable=False),
150+
),
151+
(
152+
dt.Array(dt.Int16(nullable=True), nullable=True),
153+
None,
154+
dt.Array(dt.Int16(nullable=True), nullable=True),
155+
),
156+
(
157+
dt.Array(dt.Int16(nullable=False), nullable=True),
158+
True,
159+
dt.Array(dt.Int16(nullable=False), nullable=True),
160+
),
161+
(
162+
dt.Array(dt.Int16(nullable=False), nullable=True),
163+
False,
164+
dt.Array(dt.Int16(nullable=False), nullable=False),
165+
),
166+
(
167+
dt.Array(dt.Int16(nullable=False), nullable=True),
168+
None,
169+
dt.Array(dt.Int16(nullable=False), nullable=True),
170+
),
171+
],
172+
)
173+
def test_dtype_from_datatype_instance(
174+
inp: dt.DataType, nullable: bool | None, expected: dt.DataType
175+
):
176+
assert dt.dtype(inp, nullable=nullable) == expected
110177

111178

112179
@pytest.mark.parametrize(

ibis/expr/datatypes/tests/test_parse.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@
4646
)
4747
def test_primitive_from_string(nullable, spec, expected):
4848
assert dt.dtype(spec, nullable=nullable) == expected(nullable=nullable)
49+
assert dt.dtype(spec, nullable=None) == expected(nullable=True)
50+
assert dt.dtype(spec) == expected(nullable=True)
51+
assert dt.dtype("!" + spec, nullable=nullable) == expected(nullable=nullable)
52+
assert dt.dtype("!" + spec, nullable=None) == expected(nullable=False)
53+
assert dt.dtype("!" + spec) == expected(nullable=False)
4954

5055

5156
@pytest.mark.parametrize(

ibis/formats/__init__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def from_ibis(cls, dtype: DataType) -> T:
3838
raise NotImplementedError
3939

4040
@classmethod
41-
def to_ibis(cls, typ: T, nullable: bool = True) -> DataType:
41+
def to_ibis(cls, typ: T, nullable: bool | None = None) -> DataType:
4242
"""Convert a format-specific type object to an Ibis DataType.
4343
4444
Parameters
@@ -47,6 +47,8 @@ def to_ibis(cls, typ: T, nullable: bool = True) -> DataType:
4747
The format-specific type object to convert.
4848
nullable
4949
Whether the Ibis DataType should be nullable.
50+
If `None`, the nullability will be inferred from `typ` if possible.
51+
If inference is not possible, we assume `nullable=True`.
5052
5153
Returns
5254
-------
@@ -56,7 +58,7 @@ def to_ibis(cls, typ: T, nullable: bool = True) -> DataType:
5658
raise NotImplementedError
5759

5860
@classmethod
59-
def from_string(cls, text: str, nullable: bool = True) -> DataType:
61+
def from_string(cls, text: str, nullable: bool | None = None) -> DataType:
6062
"""Convert a backend-specific string representation into an Ibis DataType.
6163
6264
Parameters
@@ -65,6 +67,8 @@ def from_string(cls, text: str, nullable: bool = True) -> DataType:
6567
The backend-specific string representation to convert.
6668
nullable
6769
Whether the Ibis DataType should be nullable.
70+
If `None`, the nullability will be inferred from `text` if possible,
71+
eg if the string starts with '!' it is considered non-nullable.
6872
6973
Returns
7074
-------

ibis/formats/numpy.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,13 @@
3636

3737
class NumpyType(TypeMapper[np.dtype]):
3838
@classmethod
39-
def to_ibis(cls, typ: np.dtype, nullable: bool = True) -> dt.DataType:
39+
def to_ibis(cls, typ: np.dtype, nullable: bool | None = True) -> dt.DataType:
40+
# numpy's type system doesn't keep track of nullability.
41+
# We accept nullable=None to be compatible with the rest of TypeMapper.to_ibis()
42+
# implementations, but we treat None as True, since we can't infer nullability
43+
# from a numpy dtype.
44+
if nullable is None:
45+
nullable = True
4046
if np.issubdtype(typ, np.datetime64):
4147
# TODO(kszucs): the following code provedes proper timestamp roundtrips
4248
# between ibis and numpy/pandas but breaks the test suite at several

ibis/formats/pandas.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,13 @@
3131

3232
class PandasType(NumpyType):
3333
@classmethod
34-
def to_ibis(cls, typ, nullable=True):
34+
def to_ibis(cls, typ, nullable: bool | None = None):
35+
# pandas's type system doesn't keep track of nullability.
36+
# We accept nullable=None to be compatible with the rest of TypeMapper.to_ibis()
37+
# implementations, but we treat None as True, since we can't infer nullability
38+
# from a pandas dtype.
39+
if nullable is None:
40+
nullable = True
3541
if isinstance(typ, pdt.DatetimeTZDtype):
3642
return dt.Timestamp(timezone=str(typ.tz), nullable=nullable)
3743
elif pdt.is_datetime64_dtype(typ):

0 commit comments

Comments
 (0)