Skip to content

Commit 5b0175c

Browse files
authored
refactor: add sqlglot type conversions (#1599)
1 parent 16a834e commit 5b0175c

File tree

4 files changed

+174
-0
lines changed

4 files changed

+174
-0
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import typing
18+
19+
import bigframes_vendored.constants as constants
20+
import numpy as np
21+
import pandas as pd
22+
import pyarrow as pa
23+
import sqlglot as sg
24+
25+
import bigframes.dtypes
26+
27+
28+
class SQLGlotType:
29+
@classmethod
30+
def from_bigframes_dtype(
31+
cls,
32+
bigframes_dtype: typing.Union[
33+
bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype, np.dtype[typing.Any]
34+
],
35+
):
36+
if bigframes_dtype == bigframes.dtypes.INT_DTYPE:
37+
return "INT64"
38+
elif bigframes_dtype == bigframes.dtypes.FLOAT_DTYPE:
39+
return "FLOAT64"
40+
elif bigframes_dtype == bigframes.dtypes.STRING_DTYPE:
41+
return "STRING"
42+
elif bigframes_dtype == bigframes.dtypes.BOOL_DTYPE:
43+
return "BOOLEAN"
44+
elif bigframes_dtype == bigframes.dtypes.DATE_DTYPE:
45+
return "DATE"
46+
elif bigframes_dtype == bigframes.dtypes.TIME_DTYPE:
47+
return "TIME"
48+
elif bigframes_dtype == bigframes.dtypes.DATETIME_DTYPE:
49+
return "DATETIME"
50+
elif bigframes_dtype == bigframes.dtypes.TIMESTAMP_DTYPE:
51+
return "TIMESTAMP"
52+
elif bigframes_dtype == bigframes.dtypes.BYTES_DTYPE:
53+
return "BYTES"
54+
elif bigframes_dtype == bigframes.dtypes.NUMERIC_DTYPE:
55+
return "NUMERIC"
56+
elif bigframes_dtype == bigframes.dtypes.BIGNUMERIC_DTYPE:
57+
return "BIGNUMERIC"
58+
elif bigframes_dtype == bigframes.dtypes.JSON_DTYPE:
59+
return "JSON"
60+
elif bigframes_dtype == bigframes.dtypes.GEO_DTYPE:
61+
return "GEOGRAPHY"
62+
elif isinstance(bigframes_dtype, pd.ArrowDtype):
63+
if pa.types.is_list(bigframes_dtype.pyarrow_dtype):
64+
inner_bigframes_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
65+
bigframes_dtype.pyarrow_dtype.value_type
66+
)
67+
return (
68+
f"ARRAY<{SQLGlotType.from_bigframes_dtype(inner_bigframes_dtype)}>"
69+
)
70+
elif pa.types.is_struct(bigframes_dtype.pyarrow_dtype):
71+
struct_type = typing.cast(pa.StructType, bigframes_dtype.pyarrow_dtype)
72+
inner_fields: list[str] = []
73+
for i in range(struct_type.num_fields):
74+
field = struct_type.field(i)
75+
key = sg.to_identifier(field.name).sql("bigquery")
76+
dtype = SQLGlotType.from_bigframes_dtype(
77+
bigframes.dtypes.arrow_dtype_to_bigframes_dtype(field.type)
78+
)
79+
inner_fields.append(f"{key} {dtype}")
80+
return "STRUCT<{}>".format(", ".join(inner_fields))
81+
82+
raise ValueError(
83+
f"Unsupported type for {bigframes_dtype}. {constants.FEEDBACK_LINK}"
84+
)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pandas as pd
16+
import pyarrow as pa
17+
18+
import bigframes.core.compile.sqlglot.sqlglot_types as sgt
19+
import bigframes.dtypes as dtypes
20+
21+
22+
def test_from_bigframes_simple_dtypes():
23+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.INT_DTYPE) == "INT64"
24+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.FLOAT_DTYPE) == "FLOAT64"
25+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.STRING_DTYPE) == "STRING"
26+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.BOOL_DTYPE) == "BOOLEAN"
27+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.DATE_DTYPE) == "DATE"
28+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.TIME_DTYPE) == "TIME"
29+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.DATETIME_DTYPE) == "DATETIME"
30+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.TIMESTAMP_DTYPE) == "TIMESTAMP"
31+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.BYTES_DTYPE) == "BYTES"
32+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.NUMERIC_DTYPE) == "NUMERIC"
33+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.BIGNUMERIC_DTYPE) == "BIGNUMERIC"
34+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.JSON_DTYPE) == "JSON"
35+
assert sgt.SQLGlotType.from_bigframes_dtype(dtypes.GEO_DTYPE) == "GEOGRAPHY"
36+
37+
38+
def test_from_bigframes_struct_dtypes():
39+
fields = [pa.field("int_col", pa.int64()), pa.field("bool_col", pa.bool_())]
40+
struct_type = pd.ArrowDtype(pa.struct(fields))
41+
expected = "STRUCT<int_col INT64, bool_col BOOLEAN>"
42+
assert sgt.SQLGlotType.from_bigframes_dtype(struct_type) == expected
43+
44+
45+
def test_from_bigframes_array_dtypes():
46+
int_array_type = pd.ArrowDtype(pa.list_(pa.int64()))
47+
assert sgt.SQLGlotType.from_bigframes_dtype(int_array_type) == "ARRAY<INT64>"
48+
49+
string_array_type = pd.ArrowDtype(pa.list_(pa.string()))
50+
assert sgt.SQLGlotType.from_bigframes_dtype(string_array_type) == "ARRAY<STRING>"
51+
52+
53+
def test_from_bigframes_multi_nested_dtypes():
54+
fields = [
55+
pa.field("string_col", pa.string()),
56+
pa.field("date_col", pa.date32()),
57+
pa.field("array_col", pa.list_(pa.timestamp("us"))),
58+
]
59+
array_type = pd.ArrowDtype(pa.list_(pa.struct(fields)))
60+
61+
expected = (
62+
"ARRAY<STRUCT<string_col STRING, date_col DATE, array_col ARRAY<DATETIME>>>"
63+
)
64+
assert sgt.SQLGlotType.from_bigframes_dtype(array_type) == expected

0 commit comments

Comments
 (0)