4141
4242
4343@overload
44- def dtype (value : type [int ] | Literal ["int" ], nullable : bool = True ) -> Int64 : ...
44+ def dtype (value : type [int ] | Literal ["int" ], nullable : bool | None = None ) -> Int64 : ...
4545@overload
4646def dtype (
47- value : type [str ] | Literal ["str" , "string" ], nullable : bool = True
47+ value : type [str ] | Literal ["str" , "string" ], nullable : bool | None = None
4848) -> String : ...
4949@overload
5050def dtype (
51- value : type [bool ] | Literal ["bool" , "boolean" ], nullable : bool = True
51+ value : type [bool ] | Literal ["bool" , "boolean" ], nullable : bool | None = None
5252) -> Boolean : ...
5353@overload
54- def dtype (value : type [bytes ] | Literal ["bytes" ], nullable : bool = True ) -> Binary : ...
54+ def dtype (
55+ value : type [bytes ] | Literal ["bytes" ], nullable : bool | None = None
56+ ) -> Binary : ...
5557@overload
56- def dtype (value : type [Real ] | Literal ["float" ], nullable : bool = True ) -> Float64 : ...
58+ def dtype (
59+ value : type [Real ] | Literal ["float" ], nullable : bool | None = None
60+ ) -> Float64 : ...
5761@overload
5862def dtype (
59- value : type [pydecimal .Decimal ] | Literal ["decimal" ], nullable : bool = True
63+ value : type [pydecimal .Decimal ] | Literal ["decimal" ], nullable : bool | None = None
6064) -> Decimal : ...
6165@overload
6266def dtype (
63- value : type [pydatetime .datetime ] | Literal ["timestamp" ], nullable : bool = True
67+ value : type [pydatetime .datetime ] | Literal ["timestamp" ],
68+ nullable : bool | None = None ,
6469) -> Timestamp : ...
6570@overload
6671def dtype (
67- value : type [pydatetime .date ] | Literal ["date" ], nullable : bool = True
72+ value : type [pydatetime .date ] | Literal ["date" ], nullable : bool | None = None
6873) -> Date : ...
6974@overload
7075def dtype (
71- value : type [pydatetime .time ] | Literal ["time" ], nullable : bool = True
76+ value : type [pydatetime .time ] | Literal ["time" ], nullable : bool | None = None
7277) -> Time : ...
7378@overload
7479def dtype (
75- value : type [pydatetime .timedelta ] | Literal ["interval" ], nullable : bool = True
80+ value : type [pydatetime .timedelta ] | Literal ["interval" ],
81+ nullable : bool | None = None ,
7682) -> Interval : ...
7783@overload
7884def dtype (
79- value : type [pyuuid .UUID ] | Literal ["uuid" ], nullable : bool = True
85+ value : type [pyuuid .UUID ] | Literal ["uuid" ], nullable : bool | None = None
8086) -> UUID : ...
8187@overload
8288def dtype (
8389 value : DataType | str | np .dtype | ExtensionDtype | pl .DataType | pa .DataType ,
84- nullable : bool = True ,
90+ nullable : bool | None = None ,
8591) -> DataType : ...
8692
8793
8894@lazy_singledispatch
89- def dtype (value , nullable = True ) -> DataType :
95+ def dtype (value , nullable : bool | None = None ) -> DataType :
9096 """Create a DataType object.
9197
9298 Parameters
@@ -96,21 +102,42 @@ def dtype(value, nullable=True) -> DataType:
96102 strings, python type annotations, numpy dtypes, pandas dtypes, and
97103 pyarrow types.
98104 nullable
99- Whether the type should be nullable. Defaults to True.
100- If `value` is a string prefixed by "!", the type is always non-nullable.
105+ Whether the resulting type should be nullable.
106+ If `None`, we try to infer nullability from the input value.
107+ For example, if `value` is a string starting with '!', the resulting type
108+ will be non-nullable.
109+ For inputs without an explicit nullability (like the python type `int` or
110+ numpy dtype of `np.int32`), we default to `nullable=True`.
101111
102112 Examples
103113 --------
104114 >>> import ibis
105115 >>> ibis.dtype("int32")
106116 Int32(nullable=True)
117+
118+ Prefixing the type with "!" makes it non-nullable:
119+
107120 >>> ibis.dtype("!int32")
108121 Int32(nullable=False)
109- >>> ibis.dtype("array<float>")
110- Array(value_type=Float64(nullable=True), length=None, nullable=True)
122+
123+ We support a rich string syntax for nested and parametric types:
124+
125+ >>> ibis.dtype("array<!float>")
126+ Array(value_type=Float64(nullable=False), length=None, nullable=True)
127+ >>> ibis.dtype("!struct<a: interval('s'), b: !bool>")
128+ Struct([('a', Interval(unit=<IntervalUnit.SECOND: 's'>, nullable=True)), ('b', Boolean(nullable=False))], nullable=False)
129+ >>> ibis.dtype("map<timestamp('America/Anchorage', 6), boolean>")
130+ Map(key_type=Timestamp(timezone='America/Anchorage', scale=6, nullable=True), value_type=Boolean(nullable=True), nullable=True)
131+
132+ The function is idempotent (AKA is a no-op when passed a DataType):
133+ >>> t = ibis.dtype("int32")
134+ >>> ibis.dtype(t) is t
135+ True
111136
112137 DataType objects may also be created from Python types:
113138
139+ >>> ibis.dtype(int)
140+ Int64(nullable=True)
114141 >>> ibis.dtype(int, nullable=False)
115142 Int64(nullable=False)
116143 >>> ibis.dtype(list[float])
@@ -121,36 +148,52 @@ def dtype(value, nullable=True) -> DataType:
121148 >>> import pyarrow as pa
122149 >>> ibis.dtype(pa.int32())
123150 Int32(nullable=True)
151+ >>> ibis.dtype(pa.int32(), nullable=False)
152+ Int32(nullable=False)
153+
154+ The `nullable` parameter may be used to override the nullability:
155+
156+ >>> ibis.dtype("!int32", nullable=True)
157+ Int32(nullable=True)
158+ >>> i = ibis.dtype("int32")
159+ >>> i
160+ Int32(nullable=True)
161+ >>> ibis.dtype(i, nullable=False)
162+ Int32(nullable=False)
124163
125164 """
126165 if isinstance (value , DataType ):
127- return value
166+ if nullable is None :
167+ return value
168+ return value .copy (nullable = nullable )
128169 else :
170+ if nullable is None :
171+ nullable = True
129172 return DataType .from_typehint (value , nullable )
130173
131174
132175@dtype .register (str )
133- def from_string (value , nullable : bool = True ):
176+ def from_string (value , nullable = None ):
134177 return DataType .from_string (value , nullable )
135178
136179
137180@dtype .register ("numpy.dtype" )
138- def from_numpy_dtype (value , nullable = True ):
181+ def from_numpy_dtype (value , nullable = None ):
139182 return DataType .from_numpy (value , nullable )
140183
141184
142185@dtype .register ("pandas.core.dtypes.base.ExtensionDtype" )
143- def from_pandas_extension_dtype (value , nullable = True ):
186+ def from_pandas_extension_dtype (value , nullable = None ):
144187 return DataType .from_pandas (value , nullable )
145188
146189
147190@dtype .register ("pyarrow.lib.DataType" )
148- def from_pyarrow (value , nullable = True ):
191+ def from_pyarrow (value , nullable = None ):
149192 return DataType .from_pyarrow (value , nullable )
150193
151194
152195@dtype .register ("polars.datatypes.classes.DataTypeClass" )
153- def from_polars (value , nullable = True ):
196+ def from_polars (value , nullable = None ):
154197 return DataType .from_polars (value , nullable )
155198
156199
@@ -228,15 +271,15 @@ def castable(self, to: DataType, **kwargs) -> bool:
228271 return castable (self , to , ** kwargs )
229272
230273 @classmethod
231- def from_string (cls , value : str , nullable : bool = True ) -> Self :
274+ def from_string (cls , value : str , nullable : bool | None = None ) -> Self :
232275 from ibis .expr .datatypes .parse import parse
233276
234277 try :
235278 typ = parse (value )
236279 except SyntaxError :
237280 raise TypeError (f"{ value !r} cannot be parsed as a datatype" )
238281
239- if not nullable :
282+ if nullable is not None :
240283 return typ .copy (nullable = nullable )
241284 return typ
242285
@@ -309,23 +352,25 @@ def from_typehint(cls, typ, nullable=True) -> Self:
309352 raise TypeError (f"Value { typ !r} is not a valid datatype" )
310353
311354 @classmethod
312- def from_numpy (cls , numpy_type : np .dtype , nullable : bool = True ) -> Self :
355+ def from_numpy (cls , numpy_type : np .dtype , nullable : bool | None = None ) -> Self :
313356 """Return the equivalent ibis datatype."""
314357 from ibis .formats .numpy import NumpyType
315358
316359 return NumpyType .to_ibis (numpy_type , nullable = nullable )
317360
318361 @classmethod
319362 def from_pandas (
320- cls , pandas_type : np .dtype | ExtensionDtype , nullable : bool = True
363+ cls , pandas_type : np .dtype | ExtensionDtype , nullable : bool | None = None
321364 ) -> Self :
322365 """Return the equivalent ibis datatype."""
323366 from ibis .formats .pandas import PandasType
324367
325368 return PandasType .to_ibis (pandas_type , nullable = nullable )
326369
327370 @classmethod
328- def from_pyarrow (cls , arrow_type : pa .DataType , nullable : bool = True ) -> Self :
371+ def from_pyarrow (
372+ cls , arrow_type : pa .DataType , nullable : bool | None = None
373+ ) -> Self :
329374 """Return the equivalent ibis datatype."""
330375 from ibis .formats .pyarrow import PyArrowType
331376
@@ -988,7 +1033,7 @@ def __getitem__(self, key: str) -> DataType:
9881033
9891034 def __repr__ (self ) -> str :
9901035 name = self .__class__ .__name__
991- return f"' { name } ({ list (self .items ())} , nullable={ self .nullable } )"
1036+ return f"{ name } ({ list (self .items ())} , nullable={ self .nullable } )"
9921037
9931038 @property
9941039 def _pretty_piece (self ) -> str :
0 commit comments