Skip to content

Commit 4b708d4

Browse files
authored
Merge branch 'main' into add-py-314
2 parents db02208 + 0a889fc commit 4b708d4

File tree

137 files changed

+47973
-1160
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

137 files changed

+47973
-1160
lines changed

.github/ISSUE_TEMPLATE/bug_report.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,12 @@ import bigframes
2929
import google.cloud.bigquery
3030
import pandas
3131
import pyarrow
32-
import sqlglot
3332

3433
print(f"Python: {sys.version}")
3534
print(f"bigframes=={bigframes.__version__}")
3635
print(f"google-cloud-bigquery=={google.cloud.bigquery.__version__}")
3736
print(f"pandas=={pandas.__version__}")
3837
print(f"pyarrow=={pyarrow.__version__}")
39-
print(f"sqlglot=={sqlglot.__version__}")
4038
```
4139

4240
#### Steps to reproduce

LICENSE

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,29 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
318318
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
319319
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
320320
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
321+
322+
---
323+
324+
Files: The bigframes_vendored.sqlglot module.
325+
326+
MIT License
327+
328+
Copyright (c) 2025 Toby Mao
329+
330+
Permission is hereby granted, free of charge, to any person obtaining a copy
331+
of this software and associated documentation files (the "Software"), to deal
332+
in the Software without restriction, including without limitation the rights
333+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
334+
copies of the Software, and to permit persons to whom the Software is
335+
furnished to do so, subject to the following conditions:
336+
337+
The above copyright notice and this permission notice shall be included in all
338+
copies or substantial portions of the Software.
339+
340+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
341+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
342+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
343+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
344+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
345+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
346+
SOFTWARE.

README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ It also contains code derived from the following third-party packages:
8282
* `Python <https://www.python.org/>`_
8383
* `scikit-learn <https://scikit-learn.org/>`_
8484
* `XGBoost <https://xgboost.readthedocs.io/en/stable/>`_
85+
* `SQLGlot <https://sqlglot.com/sqlglot.html>`_
8586

8687
For details, see the `third_party
8788
<https://github.com/googleapis/python-bigquery-dataframes/tree/main/third_party/bigframes_vendored>`_

bigframes/core/block_transforms.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -624,21 +624,7 @@ def skew(
624624
# counts, moment3 for each column
625625
aggregations = []
626626
for col in original_columns:
627-
delta3_expr = _mean_delta_to_power(3, col)
628-
count_agg = agg_expressions.UnaryAggregation(
629-
agg_ops.count_op,
630-
ex.deref(col),
631-
)
632-
moment3_agg = agg_expressions.UnaryAggregation(
633-
agg_ops.mean_op,
634-
delta3_expr,
635-
)
636-
variance_agg = agg_expressions.UnaryAggregation(
637-
agg_ops.PopVarOp(),
638-
ex.deref(col),
639-
)
640-
skew_expr = _skew_from_moments_and_count(count_agg, moment3_agg, variance_agg)
641-
aggregations.append(skew_expr)
627+
aggregations.append(skew_expr(ex.deref(col)))
642628

643629
block = block.aggregate(
644630
aggregations, grouping_column_ids, column_labels=column_labels
@@ -662,16 +648,7 @@ def kurt(
662648
# counts, moment4 for each column
663649
kurt_exprs = []
664650
for col in original_columns:
665-
delta_4_expr = _mean_delta_to_power(4, col)
666-
count_agg = agg_expressions.UnaryAggregation(agg_ops.count_op, ex.deref(col))
667-
moment4_agg = agg_expressions.UnaryAggregation(agg_ops.mean_op, delta_4_expr)
668-
variance_agg = agg_expressions.UnaryAggregation(
669-
agg_ops.PopVarOp(), ex.deref(col)
670-
)
671-
672-
# Corresponds to order of aggregations in preceding loop
673-
kurt_expr = _kurt_from_moments_and_count(count_agg, moment4_agg, variance_agg)
674-
kurt_exprs.append(kurt_expr)
651+
kurt_exprs.append(kurt_expr(ex.deref(col)))
675652

676653
block = block.aggregate(
677654
kurt_exprs, grouping_column_ids, column_labels=column_labels
@@ -685,13 +662,38 @@ def kurt(
685662
return block
686663

687664

665+
def skew_expr(expr: ex.Expression) -> ex.Expression:
666+
delta3_expr = _mean_delta_to_power(3, expr)
667+
count_agg = agg_expressions.UnaryAggregation(
668+
agg_ops.count_op,
669+
expr,
670+
)
671+
moment3_agg = agg_expressions.UnaryAggregation(
672+
agg_ops.mean_op,
673+
delta3_expr,
674+
)
675+
variance_agg = agg_expressions.UnaryAggregation(
676+
agg_ops.PopVarOp(),
677+
expr,
678+
)
679+
return _skew_from_moments_and_count(count_agg, moment3_agg, variance_agg)
680+
681+
682+
def kurt_expr(expr: ex.Expression) -> ex.Expression:
683+
delta_4_expr = _mean_delta_to_power(4, expr)
684+
count_agg = agg_expressions.UnaryAggregation(agg_ops.count_op, expr)
685+
moment4_agg = agg_expressions.UnaryAggregation(agg_ops.mean_op, delta_4_expr)
686+
variance_agg = agg_expressions.UnaryAggregation(agg_ops.PopVarOp(), expr)
687+
return _kurt_from_moments_and_count(count_agg, moment4_agg, variance_agg)
688+
689+
688690
def _mean_delta_to_power(
689691
n_power: int,
690-
val_id: str,
692+
col_expr: ex.Expression,
691693
) -> ex.Expression:
692694
"""Calculate (x-mean(x))^n. Useful for calculating moment statistics such as skew and kurtosis."""
693-
mean_expr = agg_expressions.UnaryAggregation(agg_ops.mean_op, ex.deref(val_id))
694-
delta = ops.sub_op.as_expr(val_id, mean_expr)
695+
mean_expr = agg_expressions.UnaryAggregation(agg_ops.mean_op, col_expr)
696+
delta = ops.sub_op.as_expr(col_expr, mean_expr)
695697
return ops.pow_op.as_expr(delta, ex.const(n_power))
696698

697699

bigframes/core/bq_data.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,21 @@ def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable:
6464
else tuple(table.clustering_fields),
6565
)
6666

67+
@staticmethod
68+
def from_ref_and_schema(
69+
table_ref: bq.TableReference,
70+
schema: Sequence[bq.SchemaField],
71+
cluster_cols: Optional[Sequence[str]] = None,
72+
) -> GbqTable:
73+
return GbqTable(
74+
project_id=table_ref.project,
75+
dataset_id=table_ref.dataset_id,
76+
table_id=table_ref.table_id,
77+
physical_schema=tuple(schema),
78+
is_physically_stored=True,
79+
cluster_cols=tuple(cluster_cols) if cluster_cols else None,
80+
)
81+
6782
def get_table_ref(self) -> bq.TableReference:
6883
return bq.TableReference(
6984
bq.DatasetReference(self.project_id, self.dataset_id), self.table_id

bigframes/core/compile/sqlglot/aggregate_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16-
import sqlglot.expressions as sge
16+
import bigframes_vendored.sqlglot.expressions as sge
1717

1818
from bigframes.core import agg_expressions, window_spec
1919
from bigframes.core.compile.sqlglot.aggregations import (

bigframes/core/compile/sqlglot/aggregations/binary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import typing
1818

19-
import sqlglot.expressions as sge
19+
import bigframes_vendored.sqlglot.expressions as sge
2020

2121
from bigframes.core import window_spec
2222
import bigframes.core.compile.sqlglot.aggregations.op_registration as reg

bigframes/core/compile/sqlglot/aggregations/nullary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import typing
1818

19-
import sqlglot.expressions as sge
19+
import bigframes_vendored.sqlglot.expressions as sge
2020

2121
from bigframes.core import window_spec
2222
import bigframes.core.compile.sqlglot.aggregations.op_registration as reg

bigframes/core/compile/sqlglot/aggregations/op_registration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import typing
1818

19-
from sqlglot import expressions as sge
19+
from bigframes_vendored.sqlglot import expressions as sge
2020

2121
from bigframes.operations import aggregations as agg_ops
2222

bigframes/core/compile/sqlglot/aggregations/ordered_unary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from __future__ import annotations
1616

17-
import sqlglot.expressions as sge
17+
import bigframes_vendored.sqlglot.expressions as sge
1818

1919
import bigframes.core.compile.sqlglot.aggregations.op_registration as reg
2020
import bigframes.core.compile.sqlglot.expressions.typed_expr as typed_expr

0 commit comments

Comments
 (0)