Skip to content

Commit c291ac3

Browse files
authored
Merge branch 'main' into executor_write_api
2 parents 40323d8 + b321d72 commit c291ac3

File tree

130 files changed

+47562
-819
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+47562
-819
lines changed

.github/ISSUE_TEMPLATE/bug_report.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,12 @@ import bigframes
2929
import google.cloud.bigquery
3030
import pandas
3131
import pyarrow
32-
import sqlglot
3332

3433
print(f"Python: {sys.version}")
3534
print(f"bigframes=={bigframes.__version__}")
3635
print(f"google-cloud-bigquery=={google.cloud.bigquery.__version__}")
3736
print(f"pandas=={pandas.__version__}")
3837
print(f"pyarrow=={pyarrow.__version__}")
39-
print(f"sqlglot=={sqlglot.__version__}")
4038
```
4139

4240
#### Steps to reproduce

LICENSE

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,29 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
318318
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
319319
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
320320
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
321+
322+
---
323+
324+
Files: The bigframes_vendored.sqlglot module.
325+
326+
MIT License
327+
328+
Copyright (c) 2025 Toby Mao
329+
330+
Permission is hereby granted, free of charge, to any person obtaining a copy
331+
of this software and associated documentation files (the "Software"), to deal
332+
in the Software without restriction, including without limitation the rights
333+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
334+
copies of the Software, and to permit persons to whom the Software is
335+
furnished to do so, subject to the following conditions:
336+
337+
The above copyright notice and this permission notice shall be included in all
338+
copies or substantial portions of the Software.
339+
340+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
341+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
342+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
343+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
344+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
345+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
346+
SOFTWARE.

README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ It also contains code derived from the following third-party packages:
8282
* `Python <https://www.python.org/>`_
8383
* `scikit-learn <https://scikit-learn.org/>`_
8484
* `XGBoost <https://xgboost.readthedocs.io/en/stable/>`_
85+
* `SQLGlot <https://sqlglot.com/sqlglot.html>`_
8586

8687
For details, see the `third_party
8788
<https://github.com/googleapis/python-bigquery-dataframes/tree/main/third_party/bigframes_vendored>`_

bigframes/core/block_transforms.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -625,21 +625,7 @@ def skew(
625625
# counts, moment3 for each column
626626
aggregations = []
627627
for col in original_columns:
628-
delta3_expr = _mean_delta_to_power(3, col)
629-
count_agg = agg_expressions.UnaryAggregation(
630-
agg_ops.count_op,
631-
ex.deref(col),
632-
)
633-
moment3_agg = agg_expressions.UnaryAggregation(
634-
agg_ops.mean_op,
635-
delta3_expr,
636-
)
637-
variance_agg = agg_expressions.UnaryAggregation(
638-
agg_ops.PopVarOp(),
639-
ex.deref(col),
640-
)
641-
skew_expr = _skew_from_moments_and_count(count_agg, moment3_agg, variance_agg)
642-
aggregations.append(skew_expr)
628+
aggregations.append(skew_expr(ex.deref(col)))
643629

644630
block = block.aggregate(
645631
aggregations, grouping_column_ids, column_labels=column_labels
@@ -663,16 +649,7 @@ def kurt(
663649
# counts, moment4 for each column
664650
kurt_exprs = []
665651
for col in original_columns:
666-
delta_4_expr = _mean_delta_to_power(4, col)
667-
count_agg = agg_expressions.UnaryAggregation(agg_ops.count_op, ex.deref(col))
668-
moment4_agg = agg_expressions.UnaryAggregation(agg_ops.mean_op, delta_4_expr)
669-
variance_agg = agg_expressions.UnaryAggregation(
670-
agg_ops.PopVarOp(), ex.deref(col)
671-
)
672-
673-
# Corresponds to order of aggregations in preceding loop
674-
kurt_expr = _kurt_from_moments_and_count(count_agg, moment4_agg, variance_agg)
675-
kurt_exprs.append(kurt_expr)
652+
kurt_exprs.append(kurt_expr(ex.deref(col)))
676653

677654
block = block.aggregate(
678655
kurt_exprs, grouping_column_ids, column_labels=column_labels
@@ -686,13 +663,38 @@ def kurt(
686663
return block
687664

688665

666+
def skew_expr(expr: ex.Expression) -> ex.Expression:
667+
delta3_expr = _mean_delta_to_power(3, expr)
668+
count_agg = agg_expressions.UnaryAggregation(
669+
agg_ops.count_op,
670+
expr,
671+
)
672+
moment3_agg = agg_expressions.UnaryAggregation(
673+
agg_ops.mean_op,
674+
delta3_expr,
675+
)
676+
variance_agg = agg_expressions.UnaryAggregation(
677+
agg_ops.PopVarOp(),
678+
expr,
679+
)
680+
return _skew_from_moments_and_count(count_agg, moment3_agg, variance_agg)
681+
682+
683+
def kurt_expr(expr: ex.Expression) -> ex.Expression:
684+
delta_4_expr = _mean_delta_to_power(4, expr)
685+
count_agg = agg_expressions.UnaryAggregation(agg_ops.count_op, expr)
686+
moment4_agg = agg_expressions.UnaryAggregation(agg_ops.mean_op, delta_4_expr)
687+
variance_agg = agg_expressions.UnaryAggregation(agg_ops.PopVarOp(), expr)
688+
return _kurt_from_moments_and_count(count_agg, moment4_agg, variance_agg)
689+
690+
689691
def _mean_delta_to_power(
690692
n_power: int,
691-
val_id: str,
693+
col_expr: ex.Expression,
692694
) -> ex.Expression:
693695
"""Calculate (x-mean(x))^n. Useful for calculating moment statistics such as skew and kurtosis."""
694-
mean_expr = agg_expressions.UnaryAggregation(agg_ops.mean_op, ex.deref(val_id))
695-
delta = ops.sub_op.as_expr(val_id, mean_expr)
696+
mean_expr = agg_expressions.UnaryAggregation(agg_ops.mean_op, col_expr)
697+
delta = ops.sub_op.as_expr(col_expr, mean_expr)
696698
return ops.pow_op.as_expr(delta, ex.const(n_power))
697699

698700

bigframes/core/compile/sqlglot/aggregate_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16-
import sqlglot.expressions as sge
16+
import bigframes_vendored.sqlglot.expressions as sge
1717

1818
from bigframes.core import agg_expressions, window_spec
1919
from bigframes.core.compile.sqlglot.aggregations import (

bigframes/core/compile/sqlglot/aggregations/binary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import typing
1818

19-
import sqlglot.expressions as sge
19+
import bigframes_vendored.sqlglot.expressions as sge
2020

2121
from bigframes.core import window_spec
2222
import bigframes.core.compile.sqlglot.aggregations.op_registration as reg

bigframes/core/compile/sqlglot/aggregations/nullary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import typing
1818

19-
import sqlglot.expressions as sge
19+
import bigframes_vendored.sqlglot.expressions as sge
2020

2121
from bigframes.core import window_spec
2222
import bigframes.core.compile.sqlglot.aggregations.op_registration as reg

bigframes/core/compile/sqlglot/aggregations/op_registration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import typing
1818

19-
from sqlglot import expressions as sge
19+
from bigframes_vendored.sqlglot import expressions as sge
2020

2121
from bigframes.operations import aggregations as agg_ops
2222

bigframes/core/compile/sqlglot/aggregations/ordered_unary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from __future__ import annotations
1616

17-
import sqlglot.expressions as sge
17+
import bigframes_vendored.sqlglot.expressions as sge
1818

1919
import bigframes.core.compile.sqlglot.aggregations.op_registration as reg
2020
import bigframes.core.compile.sqlglot.expressions.typed_expr as typed_expr

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
import typing
1818

19+
import bigframes_vendored.sqlglot.expressions as sge
1920
import pandas as pd
20-
import sqlglot.expressions as sge
2121

2222
from bigframes import dtypes
2323
from bigframes.core import window_spec

0 commit comments

Comments
 (0)