|
10 | 10 | from pointblank.validate import _create_table_type_html, _create_table_dims_html |
11 | 11 | from pointblank._utils import _get_tbl_type, _check_any_df_lib, _select_df_lib |
12 | 12 |
|
13 | | -__all__ = ["preview"] |
| 13 | +__all__ = ["preview", "get_column_count", "get_row_count"] |
14 | 14 |
|
15 | 15 |
|
16 | 16 | def preview( |
@@ -498,3 +498,169 @@ def _select_columns( |
498 | 498 | if tbl_type == "polars": |
499 | 499 | return data.select(resolved_columns) |
500 | 500 | return data[resolved_columns] |
| 501 | + |
| 502 | + |
| 503 | +def get_column_count(data: FrameT | Any) -> int: |
| 504 | + """ |
| 505 | + Get the number of columns in a table. |
| 506 | +
|
| 507 | + The `get_column_count()` function returns the number of columns in a table. The function works |
| 508 | + with any table that is supported by the `pointblank` library, including Pandas, Polars, and Ibis |
| 509 | + backend tables (e.g., DuckDB, MySQL, PostgreSQL, SQLite, Parquet, etc.). |
| 510 | +
|
| 511 | + Parameters |
| 512 | + ---------- |
| 513 | + data |
| 514 | + The table for which to get the column count, which could be a DataFrame object or an Ibis |
| 515 | + table object. Read the *Supported Input Table Types* section for details on the supported |
| 516 | + table types. |
| 517 | +
|
| 518 | + Returns |
| 519 | + ------- |
| 520 | + int |
| 521 | + The number of columns in the table. |
| 522 | +
|
| 523 | + Supported Input Table Types |
| 524 | + --------------------------- |
| 525 | + The `data=` parameter can be given any of the following table types: |
| 526 | +
|
| 527 | + - Polars DataFrame (`"polars"`) |
| 528 | + - Pandas DataFrame (`"pandas"`) |
| 529 | + - DuckDB table (`"duckdb"`)* |
| 530 | + - MySQL table (`"mysql"`)* |
| 531 | + - PostgreSQL table (`"postgresql"`)* |
| 532 | + - SQLite table (`"sqlite"`)* |
| 533 | + - Parquet table (`"parquet"`)* |
| 534 | +
|
| 535 | + The table types marked with an asterisk need to be prepared as Ibis tables (with type of |
| 536 | + `ibis.expr.types.relations.Table`). Furthermore, using `get_column_count()` with these types of |
| 537 | + tables requires the Ibis library (`v9.5.0` or above) to be installed. If the input table is a |
| 538 | + Polars or Pandas DataFrame, the availability of Ibis is not needed. |
| 539 | +
|
| 540 | + Examples |
| 541 | + -------- |
| 542 | + To get the number of columns in a table, we can use the `get_column_count()` function. Here's an |
| 543 | + example using the `small_table` dataset (itself loaded using the `load_dataset()` function): |
| 544 | +
|
| 545 | + ```{python} |
| 546 | + import pointblank as pb |
| 547 | +
|
| 548 | + small_table_polars = pb.load_dataset("small_table") |
| 549 | +
|
| 550 | + pb.get_column_count(small_table_polars) |
| 551 | + ``` |
| 552 | +
|
| 553 | + This table is a Polars DataFrame, but the `get_column_count()` function works with any table |
| 554 | + supported by `pointblank`, including Pandas DataFrames and Ibis backend tables. Here's an |
| 555 | + example using a DuckDB table handled by Ibis: |
| 556 | +
|
| 557 | + ```{python} |
| 558 | + small_table_duckdb = pb.load_dataset("small_table", tbl_type="duckdb") |
| 559 | +
|
| 560 | + pb.get_column_count(small_table_duckdb) |
| 561 | + ``` |
| 562 | +
|
| 563 | + The function always returns the number of columns in the table as an integer value, which is |
| 564 | + `8` for the `small_table` dataset. |
| 565 | + """ |
| 566 | + |
| 567 | + if "ibis.expr.types.relations.Table" in str(type(data)): |
| 568 | + return len(data.columns) |
| 569 | + |
| 570 | + elif "polars" in str(type(data)): |
| 571 | + return len(data.columns) |
| 572 | + |
| 573 | + elif "pandas" in str(type(data)): |
| 574 | + return data.shape[1] |
| 575 | + |
| 576 | + else: |
| 577 | + raise ValueError("The input table type supplied in `data=` is not supported.") |
| 578 | + |
| 579 | + |
| 580 | +def get_row_count(data: FrameT | Any) -> int: |
| 581 | + """ |
| 582 | + Get the number of rows in a table. |
| 583 | +
|
| 584 | + The `get_row_count()` function returns the number of rows in a table. The function works with |
| 585 | + any table that is supported by the `pointblank` library, including Pandas, Polars, and Ibis |
| 586 | + backend tables (e.g., DuckDB, MySQL, PostgreSQL, SQLite, Parquet, etc.). |
| 587 | +
|
| 588 | + Parameters |
| 589 | + ---------- |
| 590 | + data |
| 591 | + The table for which to get the row count, which could be a DataFrame object or an Ibis table |
| 592 | + object. Read the *Supported Input Table Types* section for details on the supported table |
| 593 | + types. |
| 594 | +
|
| 595 | + Returns |
| 596 | + ------- |
| 597 | + int |
| 598 | + The number of rows in the table. |
| 599 | +
|
| 600 | + Supported Input Table Types |
| 601 | + --------------------------- |
| 602 | + The `data=` parameter can be given any of the following table types: |
| 603 | +
|
| 604 | + - Polars DataFrame (`"polars"`) |
| 605 | + - Pandas DataFrame (`"pandas"`) |
| 606 | + - DuckDB table (`"duckdb"`)* |
| 607 | + - MySQL table (`"mysql"`)* |
| 608 | + - PostgreSQL table (`"postgresql"`)* |
| 609 | + - SQLite table (`"sqlite"`)* |
| 610 | + - Parquet table (`"parquet"`)* |
| 611 | +
|
| 612 | + The table types marked with an asterisk need to be prepared as Ibis tables (with type of |
| 613 | + `ibis.expr.types.relations.Table`). Furthermore, using `get_row_count()` with these types of |
| 614 | + tables requires the Ibis library (`v9.5.0` or above) to be installed. If the input table is a |
| 615 | + Polars or Pandas DataFrame, the availability of Ibis is not needed. |
| 616 | +
|
| 617 | + Examples |
| 618 | + -------- |
| 619 | + Getting the number of rows in a table is easily done by using the `get_row_count()` function. |
| 620 | + Here's an example using the `game_revenue` dataset (itself loaded using the `load_dataset()` |
| 621 | + function): |
| 622 | +
|
| 623 | + ```{python} |
| 624 | + import pointblank as pb |
| 625 | +
|
| 626 | + game_revenue_polars = pb.load_dataset("game_revenue") |
| 627 | +
|
| 628 | + pb.get_row_count(game_revenue_polars) |
| 629 | + ``` |
| 630 | +
|
| 631 | + This table is a Polars DataFrame, but the `get_row_count()` function works with any table |
| 632 | + supported by `pointblank`, including Pandas DataFrames and Ibis backend tables. Here's an |
| 633 | + example using a DuckDB table handled by Ibis: |
| 634 | +
|
| 635 | + ```{python} |
| 636 | + game_revenue_duckdb = pb.load_dataset("game_revenue", tbl_type="duckdb") |
| 637 | +
|
| 638 | + pb.get_row_count(game_revenue_duckdb) |
| 639 | + ``` |
| 640 | +
|
| 641 | + The function always returns the number of rows in the table as an integer value, which is `2000` |
| 642 | + for the `game_revenue` dataset. |
| 643 | + """ |
| 644 | + |
| 645 | + if "ibis.expr.types.relations.Table" in str(type(data)): |
| 646 | + |
| 647 | + # Determine whether Pandas or Polars is available to get the row count |
| 648 | + _check_any_df_lib(method_used="get_row_count") |
| 649 | + |
| 650 | + # Select the DataFrame library to use for displaying the Ibis table |
| 651 | + df_lib = _select_df_lib(preference="polars") |
| 652 | + df_lib_name = df_lib.__name__ |
| 653 | + |
| 654 | + if df_lib_name == "pandas": |
| 655 | + return int(data.count().to_pandas()) |
| 656 | + else: |
| 657 | + return int(data.count().to_polars()) |
| 658 | + |
| 659 | + elif "polars" in str(type(data)): |
| 660 | + return int(data.height) |
| 661 | + |
| 662 | + elif "pandas" in str(type(data)): |
| 663 | + return data.shape[0] |
| 664 | + |
| 665 | + else: |
| 666 | + raise ValueError("The input table type supplied in `data=` is not supported.") |
0 commit comments