From e8308de265cbaf0005e8bfd87ebe2b236d1f1030 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 22 Apr 2025 10:12:13 +0800 Subject: [PATCH 1/5] docs: enhance user guide with detailed DataFrame operations and examples --- docs/source/api/dataframe.rst | 162 ++++++++++++++++++++ docs/source/user-guide/basics.rst | 2 + docs/source/user-guide/dataframe.rst | 213 +++++++++++++++++++++++++++ 3 files changed, 377 insertions(+) create mode 100644 docs/source/api/dataframe.rst create mode 100644 docs/source/user-guide/dataframe.rst diff --git a/docs/source/api/dataframe.rst b/docs/source/api/dataframe.rst new file mode 100644 index 000000000..675b14bf9 --- /dev/null +++ b/docs/source/api/dataframe.rst @@ -0,0 +1,162 @@ +DataFrames +========== + +Overview +-------- + +DataFusion's DataFrame API provides a powerful interface for building and executing queries against data sources. +It offers a familiar API similar to pandas and other DataFrame libraries, but with the performance benefits of Rust +and Arrow. + +A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation. +The actual execution happens when terminal operations like `collect()` or `show()` are called. + +Basic Usage +---------- + +.. code-block:: python + + import datafusion + from datafusion import col, lit + + # Create a context and register a data source + ctx = datafusion.SessionContext() + ctx.register_csv("my_table", "path/to/data.csv") + + # Create and manipulate a DataFrame + df = ctx.sql("SELECT * FROM my_table") + + # Or use the DataFrame API directly + df = (ctx.table("my_table") + .filter(col("age") > lit(25)) + .select([col("name"), col("age")])) + + # Execute and collect results + result = df.collect() + + # Display the first few rows + df.show() + +HTML Rendering +------------- + +When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will +automatically display as formatted HTML tables, making it easier to visualize your data. + +The `_repr_html_` method is called automatically by Jupyter to render a DataFrame. This method +controls how DataFrames appear in notebook environments, providing a richer visualization than +plain text output. + +Customizing HTML Rendering +------------------------- + +You can customize how DataFrames are rendered in HTML by configuring the formatter: + +.. code-block:: python + + from datafusion.html_formatter import configure_formatter + + # Change the default styling + configure_formatter( + max_rows=50, # Maximum number of rows to display + max_width=None, # Maximum width in pixels (None for auto) + theme="light", # Theme: "light" or "dark" + precision=2, # Floating point precision + thousands_separator=",", # Separator for thousands + date_format="%Y-%m-%d", # Date format + truncate_width=20 # Max width for string columns before truncating + ) + +The formatter settings affect all DataFrames displayed after configuration. + +Custom Style Providers +--------------------- + +For advanced styling needs, you can create a custom style provider: + +.. code-block:: python + + from datafusion.html_formatter import StyleProvider, configure_formatter + + class MyStyleProvider(StyleProvider): + def get_table_styles(self): + return { + "table": "border-collapse: collapse; width: 100%;", + "th": "background-color: #007bff; color: white; padding: 8px; text-align: left;", + "td": "border: 1px solid #ddd; padding: 8px;", + "tr:nth-child(even)": "background-color: #f2f2f2;", + } + + def get_value_styles(self, dtype, value): + """Return custom styles for specific values""" + if dtype == "float" and value < 0: + return "color: red;" + return None + + # Apply the custom style provider + configure_formatter(style_provider=MyStyleProvider()) + +Creating a Custom Formatter +-------------------------- + +For complete control over rendering, you can implement a custom formatter: + +.. code-block:: python + + from datafusion.html_formatter import Formatter, get_formatter + + class MyFormatter(Formatter): + def format_html(self, batches, schema, has_more=False, table_uuid=None): + # Create your custom HTML here + html = "
" + # ... formatting logic ... + html += "
" + return html + + # Set as the global formatter + configure_formatter(formatter_class=MyFormatter) + + # Or use the formatter just for specific operations + formatter = get_formatter() + custom_html = formatter.format_html(batches, schema) + +Managing Formatters +------------------ + +Reset to default formatting: + +.. code-block:: python + + from datafusion.html_formatter import reset_formatter + + # Reset to default settings + reset_formatter() + +Get the current formatter settings: + +.. code-block:: python + + from datafusion.html_formatter import get_formatter + + formatter = get_formatter() + print(formatter.max_rows) + print(formatter.theme) + +Contextual Formatting +-------------------- + +You can also use a context manager to temporarily change formatting settings: + +.. code-block:: python + + from datafusion.html_formatter import formatting_context + + # Default formatting + df.show() + + # Temporarily use different formatting + with formatting_context(max_rows=100, theme="dark"): + df.show() # Will use the temporary settings + + # Back to default formatting + df.show() diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst index 6636c0c6a..bff240b6b 100644 --- a/docs/source/user-guide/basics.rst +++ b/docs/source/user-guide/basics.rst @@ -72,6 +72,8 @@ DataFrames are typically created by calling a method on :py:class:`~datafusion.c calling the transformation methods, such as :py:func:`~datafusion.dataframe.DataFrame.filter`, :py:func:`~datafusion.dataframe.DataFrame.select`, :py:func:`~datafusion.dataframe.DataFrame.aggregate`, and :py:func:`~datafusion.dataframe.DataFrame.limit` to build up a query definition. +For more details on working with DataFrames, including visualization options and conversion to other formats, see :doc:`dataframe`. + Expressions ----------- diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst new file mode 100644 index 000000000..3c6428529 --- /dev/null +++ b/docs/source/user-guide/dataframe.rst @@ -0,0 +1,213 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +DataFrame Operations +=================== + +Working with DataFrames +---------------------- + +A DataFrame in DataFusion represents a logical plan that defines a series of operations to be performed on data. +This logical plan is not executed until you call a terminal operation like :py:func:`~datafusion.dataframe.DataFrame.collect` +or :py:func:`~datafusion.dataframe.DataFrame.show`. + +DataFrames provide a familiar API for data manipulation: + +.. ipython:: python + + import datafusion + from datafusion import col, lit, functions as f + + ctx = datafusion.SessionContext() + + # Create a DataFrame from a CSV file + df = ctx.read_csv("example.csv") + + # Add transformations + df = df.filter(col("age") > lit(30)) \ + .select([col("name"), col("age"), (col("salary") * lit(1.1)).alias("new_salary")]) \ + .sort("age") + + # Execute the plan + df.show() + +Common DataFrame Operations +-------------------------- + +DataFusion supports a wide range of operations on DataFrames: + +Filtering and Selection +~~~~~~~~~~~~~~~~~~~~~~~ + +.. ipython:: python + + # Filter rows + df = df.filter(col("age") > lit(30)) + + # Select columns + df = df.select([col("name"), col("age")]) + + # Select by column name + df = df.select_columns(["name", "age"]) + + # Select using column indexing + df = df["name", "age"] + +Aggregation +~~~~~~~~~~ + +.. ipython:: python + + # Group by and aggregate + df = df.aggregate( + [col("category")], # Group by columns + [f.sum(col("amount")).alias("total"), + f.avg(col("price")).alias("avg_price")] + ) + +Joins +~~~~~ + +.. ipython:: python + + # Join two DataFrames + df_joined = df1.join( + df2, + how="inner", + left_on=["id"], + right_on=["id"] + ) + + # Join with custom expressions + df_joined = df1.join_on( + df2, + [col("df1.id") == col("df2.id")], + how="left" + ) + +DataFrame Visualization +---------------------- + +Jupyter Notebook Integration +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When working in Jupyter notebooks, DataFrames automatically display as HTML tables. This is +handled by the :code:`_repr_html_` method, which provides a rich, formatted view of your data. + +.. ipython:: python + + # DataFrames render as HTML tables in notebooks + df # Just displaying the DataFrame renders it as HTML + +Customizing DataFrame Display +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can customize how DataFrames are displayed using the HTML formatter: + +.. ipython:: python + + from datafusion.html_formatter import configure_formatter + + # Change display settings + configure_formatter( + max_rows=100, # Show more rows + truncate_width=30, # Allow longer strings + theme="light", # Use light theme + precision=2 # Set decimal precision + ) + + # Now display uses the new format + df.show() + +Creating a Custom Style Provider +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For advanced styling needs: + +.. code-block:: python + + from datafusion.html_formatter import StyleProvider, configure_formatter + + class CustomStyleProvider(StyleProvider): + def get_table_styles(self): + return { + "table": "border-collapse: collapse; width: 100%;", + "th": "background-color: #4CAF50; color: white; padding: 10px;", + "td": "border: 1px solid #ddd; padding: 8px;", + "tr:hover": "background-color: #f5f5f5;", + } + + def get_value_styles(self, dtype, value): + if dtype == "float" and value < 0: + return "color: red; font-weight: bold;" + return None + + # Apply custom styling + configure_formatter(style_provider=CustomStyleProvider()) + +Managing Display Settings +~~~~~~~~~~~~~~~~~~~~~~~ + +You can temporarily change formatting settings with context managers: + +.. code-block:: python + + from datafusion.html_formatter import formatting_context + + # Use different formatting temporarily + with formatting_context(max_rows=5, theme="dark"): + df.show() # Will show only 5 rows with dark theme + + # Reset to default formatting + from datafusion.html_formatter import reset_formatter + reset_formatter() + +Converting to Other Formats +-------------------------- + +DataFusion DataFrames can be easily converted to other popular formats: + +.. ipython:: python + + # Convert to Arrow Table + arrow_table = df.to_arrow_table() + + # Convert to Pandas DataFrame + pandas_df = df.to_pandas() + + # Convert to Polars DataFrame + polars_df = df.to_polars() + + # Convert to Python data structures + python_dict = df.to_pydict() + python_list = df.to_pylist() + +Saving DataFrames +--------------- + +You can write DataFrames to various file formats: + +.. ipython:: python + + # Write to CSV + df.write_csv("output.csv", with_header=True) + + # Write to Parquet + df.write_parquet("output.parquet", compression="zstd") + + # Write to JSON + df.write_json("output.json") From 09e213911aa8463d12c56ed9357f7efe4c43d3da Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 22 Apr 2025 10:35:25 +0800 Subject: [PATCH 2/5] move /docs/source/api/dataframe.rst into user-guide --- docs/source/user-guide/dataframe.rst | 242 ++++++++++++--------------- 1 file changed, 104 insertions(+), 138 deletions(-) diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst index 3c6428529..a85f88cfb 100644 --- a/docs/source/user-guide/dataframe.rst +++ b/docs/source/user-guide/dataframe.rst @@ -15,199 +15,165 @@ .. specific language governing permissions and limitations .. under the License. -DataFrame Operations -=================== +DataFrames +========== -Working with DataFrames ----------------------- +Overview +-------- -A DataFrame in DataFusion represents a logical plan that defines a series of operations to be performed on data. -This logical plan is not executed until you call a terminal operation like :py:func:`~datafusion.dataframe.DataFrame.collect` -or :py:func:`~datafusion.dataframe.DataFrame.show`. +DataFusion's DataFrame API provides a powerful interface for building and executing queries against data sources. +It offers a familiar API similar to pandas and other DataFrame libraries, but with the performance benefits of Rust +and Arrow. -DataFrames provide a familiar API for data manipulation: +A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation. +The actual execution happens when terminal operations like `collect()` or `show()` are called. -.. ipython:: python +Basic Usage +---------- - import datafusion - from datafusion import col, lit, functions as f - - ctx = datafusion.SessionContext() - - # Create a DataFrame from a CSV file - df = ctx.read_csv("example.csv") - - # Add transformations - df = df.filter(col("age") > lit(30)) \ - .select([col("name"), col("age"), (col("salary") * lit(1.1)).alias("new_salary")]) \ - .sort("age") - - # Execute the plan - df.show() - -Common DataFrame Operations --------------------------- - -DataFusion supports a wide range of operations on DataFrames: - -Filtering and Selection -~~~~~~~~~~~~~~~~~~~~~~~ +.. code-block:: python -.. ipython:: python + import datafusion + from datafusion import col, lit - # Filter rows - df = df.filter(col("age") > lit(30)) + # Create a context and register a data source + ctx = datafusion.SessionContext() + ctx.register_csv("my_table", "path/to/data.csv") - # Select columns - df = df.select([col("name"), col("age")]) + # Create and manipulate a DataFrame + df = ctx.sql("SELECT * FROM my_table") - # Select by column name - df = df.select_columns(["name", "age"]) + # Or use the DataFrame API directly + df = (ctx.table("my_table") + .filter(col("age") > lit(25)) + .select([col("name"), col("age")])) - # Select using column indexing - df = df["name", "age"] - -Aggregation -~~~~~~~~~~ - -.. ipython:: python - - # Group by and aggregate - df = df.aggregate( - [col("category")], # Group by columns - [f.sum(col("amount")).alias("total"), - f.avg(col("price")).alias("avg_price")] - ) - -Joins -~~~~~ - -.. ipython:: python - - # Join two DataFrames - df_joined = df1.join( - df2, - how="inner", - left_on=["id"], - right_on=["id"] - ) + # Execute and collect results + result = df.collect() - # Join with custom expressions - df_joined = df1.join_on( - df2, - [col("df1.id") == col("df2.id")], - how="left" - ) - -DataFrame Visualization ----------------------- - -Jupyter Notebook Integration -~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Display the first few rows + df.show() -When working in Jupyter notebooks, DataFrames automatically display as HTML tables. This is -handled by the :code:`_repr_html_` method, which provides a rich, formatted view of your data. +HTML Rendering +------------- -.. ipython:: python +When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will +automatically display as formatted HTML tables, making it easier to visualize your data. - # DataFrames render as HTML tables in notebooks - df # Just displaying the DataFrame renders it as HTML +The `_repr_html_` method is called automatically by Jupyter to render a DataFrame. This method +controls how DataFrames appear in notebook environments, providing a richer visualization than +plain text output. -Customizing DataFrame Display -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Customizing HTML Rendering +------------------------- -You can customize how DataFrames are displayed using the HTML formatter: +You can customize how DataFrames are rendered in HTML by configuring the formatter: -.. ipython:: python +.. code-block:: python from datafusion.html_formatter import configure_formatter - # Change display settings + # Change the default styling configure_formatter( - max_rows=100, # Show more rows - truncate_width=30, # Allow longer strings - theme="light", # Use light theme - precision=2 # Set decimal precision + max_rows=50, # Maximum number of rows to display + max_width=None, # Maximum width in pixels (None for auto) + theme="light", # Theme: "light" or "dark" + precision=2, # Floating point precision + thousands_separator=",", # Separator for thousands + date_format="%Y-%m-%d", # Date format + truncate_width=20 # Max width for string columns before truncating ) - - # Now display uses the new format - df.show() -Creating a Custom Style Provider -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The formatter settings affect all DataFrames displayed after configuration. + +Custom Style Providers +--------------------- -For advanced styling needs: +For advanced styling needs, you can create a custom style provider: .. code-block:: python from datafusion.html_formatter import StyleProvider, configure_formatter - class CustomStyleProvider(StyleProvider): + class MyStyleProvider(StyleProvider): def get_table_styles(self): return { "table": "border-collapse: collapse; width: 100%;", - "th": "background-color: #4CAF50; color: white; padding: 10px;", + "th": "background-color: #007bff; color: white; padding: 8px; text-align: left;", "td": "border: 1px solid #ddd; padding: 8px;", - "tr:hover": "background-color: #f5f5f5;", + "tr:nth-child(even)": "background-color: #f2f2f2;", } def get_value_styles(self, dtype, value): + """Return custom styles for specific values""" if dtype == "float" and value < 0: - return "color: red; font-weight: bold;" + return "color: red;" return None - # Apply custom styling - configure_formatter(style_provider=CustomStyleProvider()) + # Apply the custom style provider + configure_formatter(style_provider=MyStyleProvider()) -Managing Display Settings -~~~~~~~~~~~~~~~~~~~~~~~ +Creating a Custom Formatter +-------------------------- -You can temporarily change formatting settings with context managers: +For complete control over rendering, you can implement a custom formatter: .. code-block:: python - from datafusion.html_formatter import formatting_context + from datafusion.html_formatter import Formatter, get_formatter - # Use different formatting temporarily - with formatting_context(max_rows=5, theme="dark"): - df.show() # Will show only 5 rows with dark theme + class MyFormatter(Formatter): + def format_html(self, batches, schema, has_more=False, table_uuid=None): + # Create your custom HTML here + html = "
" + # ... formatting logic ... + html += "
" + return html - # Reset to default formatting - from datafusion.html_formatter import reset_formatter - reset_formatter() + # Set as the global formatter + configure_formatter(formatter_class=MyFormatter) + + # Or use the formatter just for specific operations + formatter = get_formatter() + custom_html = formatter.format_html(batches, schema) -Converting to Other Formats --------------------------- +Managing Formatters +------------------ -DataFusion DataFrames can be easily converted to other popular formats: +Reset to default formatting: -.. ipython:: python +.. code-block:: python - # Convert to Arrow Table - arrow_table = df.to_arrow_table() - - # Convert to Pandas DataFrame - pandas_df = df.to_pandas() + from datafusion.html_formatter import reset_formatter - # Convert to Polars DataFrame - polars_df = df.to_polars() + # Reset to default settings + reset_formatter() + +Get the current formatter settings: + +.. code-block:: python + + from datafusion.html_formatter import get_formatter - # Convert to Python data structures - python_dict = df.to_pydict() - python_list = df.to_pylist() + formatter = get_formatter() + print(formatter.max_rows) + print(formatter.theme) -Saving DataFrames ---------------- +Contextual Formatting +-------------------- -You can write DataFrames to various file formats: +You can also use a context manager to temporarily change formatting settings: -.. ipython:: python +.. code-block:: python - # Write to CSV - df.write_csv("output.csv", with_header=True) + from datafusion.html_formatter import formatting_context - # Write to Parquet - df.write_parquet("output.parquet", compression="zstd") + # Default formatting + df.show() + + # Temporarily use different formatting + with formatting_context(max_rows=100, theme="dark"): + df.show() # Will use the temporary settings - # Write to JSON - df.write_json("output.json") + # Back to default formatting + df.show() From 4b0045f28cdef4296e2cce746cf25a5f32262dd7 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 22 Apr 2025 10:38:47 +0800 Subject: [PATCH 3/5] docs: remove DataFrame API documentation --- docs/source/api/dataframe.rst | 162 ---------------------------------- 1 file changed, 162 deletions(-) delete mode 100644 docs/source/api/dataframe.rst diff --git a/docs/source/api/dataframe.rst b/docs/source/api/dataframe.rst deleted file mode 100644 index 675b14bf9..000000000 --- a/docs/source/api/dataframe.rst +++ /dev/null @@ -1,162 +0,0 @@ -DataFrames -========== - -Overview --------- - -DataFusion's DataFrame API provides a powerful interface for building and executing queries against data sources. -It offers a familiar API similar to pandas and other DataFrame libraries, but with the performance benefits of Rust -and Arrow. - -A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation. -The actual execution happens when terminal operations like `collect()` or `show()` are called. - -Basic Usage ----------- - -.. code-block:: python - - import datafusion - from datafusion import col, lit - - # Create a context and register a data source - ctx = datafusion.SessionContext() - ctx.register_csv("my_table", "path/to/data.csv") - - # Create and manipulate a DataFrame - df = ctx.sql("SELECT * FROM my_table") - - # Or use the DataFrame API directly - df = (ctx.table("my_table") - .filter(col("age") > lit(25)) - .select([col("name"), col("age")])) - - # Execute and collect results - result = df.collect() - - # Display the first few rows - df.show() - -HTML Rendering -------------- - -When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will -automatically display as formatted HTML tables, making it easier to visualize your data. - -The `_repr_html_` method is called automatically by Jupyter to render a DataFrame. This method -controls how DataFrames appear in notebook environments, providing a richer visualization than -plain text output. - -Customizing HTML Rendering -------------------------- - -You can customize how DataFrames are rendered in HTML by configuring the formatter: - -.. code-block:: python - - from datafusion.html_formatter import configure_formatter - - # Change the default styling - configure_formatter( - max_rows=50, # Maximum number of rows to display - max_width=None, # Maximum width in pixels (None for auto) - theme="light", # Theme: "light" or "dark" - precision=2, # Floating point precision - thousands_separator=",", # Separator for thousands - date_format="%Y-%m-%d", # Date format - truncate_width=20 # Max width for string columns before truncating - ) - -The formatter settings affect all DataFrames displayed after configuration. - -Custom Style Providers ---------------------- - -For advanced styling needs, you can create a custom style provider: - -.. code-block:: python - - from datafusion.html_formatter import StyleProvider, configure_formatter - - class MyStyleProvider(StyleProvider): - def get_table_styles(self): - return { - "table": "border-collapse: collapse; width: 100%;", - "th": "background-color: #007bff; color: white; padding: 8px; text-align: left;", - "td": "border: 1px solid #ddd; padding: 8px;", - "tr:nth-child(even)": "background-color: #f2f2f2;", - } - - def get_value_styles(self, dtype, value): - """Return custom styles for specific values""" - if dtype == "float" and value < 0: - return "color: red;" - return None - - # Apply the custom style provider - configure_formatter(style_provider=MyStyleProvider()) - -Creating a Custom Formatter --------------------------- - -For complete control over rendering, you can implement a custom formatter: - -.. code-block:: python - - from datafusion.html_formatter import Formatter, get_formatter - - class MyFormatter(Formatter): - def format_html(self, batches, schema, has_more=False, table_uuid=None): - # Create your custom HTML here - html = "
" - # ... formatting logic ... - html += "
" - return html - - # Set as the global formatter - configure_formatter(formatter_class=MyFormatter) - - # Or use the formatter just for specific operations - formatter = get_formatter() - custom_html = formatter.format_html(batches, schema) - -Managing Formatters ------------------- - -Reset to default formatting: - -.. code-block:: python - - from datafusion.html_formatter import reset_formatter - - # Reset to default settings - reset_formatter() - -Get the current formatter settings: - -.. code-block:: python - - from datafusion.html_formatter import get_formatter - - formatter = get_formatter() - print(formatter.max_rows) - print(formatter.theme) - -Contextual Formatting --------------------- - -You can also use a context manager to temporarily change formatting settings: - -.. code-block:: python - - from datafusion.html_formatter import formatting_context - - # Default formatting - df.show() - - # Temporarily use different formatting - with formatting_context(max_rows=100, theme="dark"): - df.show() # Will use the temporary settings - - # Back to default formatting - df.show() From 9a828702a489ddd7463a04fbbd878633d51130fb Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Sun, 27 Apr 2025 18:25:25 +0800 Subject: [PATCH 4/5] docs: fix formatting inconsistencies in DataFrame user guide --- docs/source/user-guide/dataframe.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst index a85f88cfb..a78fd8073 100644 --- a/docs/source/user-guide/dataframe.rst +++ b/docs/source/user-guide/dataframe.rst @@ -26,10 +26,10 @@ It offers a familiar API similar to pandas and other DataFrame libraries, but wi and Arrow. A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation. -The actual execution happens when terminal operations like `collect()` or `show()` are called. +The actual execution happens when terminal operations like ``collect()`` or ``show()`` are called. Basic Usage ----------- +----------- .. code-block:: python @@ -55,17 +55,17 @@ Basic Usage df.show() HTML Rendering -------------- +-------------- When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will automatically display as formatted HTML tables, making it easier to visualize your data. -The `_repr_html_` method is called automatically by Jupyter to render a DataFrame. This method +The ``_repr_html_`` method is called automatically by Jupyter to render a DataFrame. This method controls how DataFrames appear in notebook environments, providing a richer visualization than plain text output. Customizing HTML Rendering -------------------------- +-------------------------- You can customize how DataFrames are rendered in HTML by configuring the formatter: @@ -87,7 +87,7 @@ You can customize how DataFrames are rendered in HTML by configuring the formatt The formatter settings affect all DataFrames displayed after configuration. Custom Style Providers ---------------------- +---------------------- For advanced styling needs, you can create a custom style provider: @@ -114,7 +114,7 @@ For advanced styling needs, you can create a custom style provider: configure_formatter(style_provider=MyStyleProvider()) Creating a Custom Formatter --------------------------- +--------------------------- For complete control over rendering, you can implement a custom formatter: @@ -138,7 +138,7 @@ For complete control over rendering, you can implement a custom formatter: custom_html = formatter.format_html(batches, schema) Managing Formatters ------------------- +------------------- Reset to default formatting: @@ -160,7 +160,7 @@ Get the current formatter settings: print(formatter.theme) Contextual Formatting --------------------- +--------------------- You can also use a context manager to temporarily change formatting settings: From ef47425e5c9d6ad21cc142fd1e9d7866f102f2c8 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 27 Apr 2025 09:39:34 -0400 Subject: [PATCH 5/5] Two minor corrections to documentation rendering --- docs/source/index.rst | 1 + docs/source/user-guide/basics.rst | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 558b2d572..c18793822 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -72,6 +72,7 @@ Example user-guide/introduction user-guide/basics user-guide/data-sources + user-guide/dataframe user-guide/common-operations/index user-guide/io/index user-guide/configuration diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst index bff240b6b..2975d9a6b 100644 --- a/docs/source/user-guide/basics.rst +++ b/docs/source/user-guide/basics.rst @@ -21,7 +21,8 @@ Concepts ======== In this section, we will cover a basic example to introduce a few key concepts. We will use the -2021 Yellow Taxi Trip Records ([download](https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet)), from the [TLC Trip Record Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page). +2021 Yellow Taxi Trip Records (`download `_), +from the `TLC Trip Record Data `_. .. ipython:: python