From e8308de265cbaf0005e8bfd87ebe2b236d1f1030 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Apr 2025 10:12:13 +0800
Subject: [PATCH 1/5] docs: enhance user guide with detailed DataFrame
 operations and examples

---
 docs/source/api/dataframe.rst        | 162 ++++++++++++++++++++
 docs/source/user-guide/basics.rst    |   2 +
 docs/source/user-guide/dataframe.rst | 213 +++++++++++++++++++++++++++
 3 files changed, 377 insertions(+)
 create mode 100644 docs/source/api/dataframe.rst
 create mode 100644 docs/source/user-guide/dataframe.rst

diff --git a/docs/source/api/dataframe.rst b/docs/source/api/dataframe.rst
new file mode 100644
index 000000000..675b14bf9
--- /dev/null
+++ b/docs/source/api/dataframe.rst
@@ -0,0 +1,162 @@
+DataFrames
+==========
+
+Overview
+--------
+
+DataFusion's DataFrame API provides a powerful interface for building and executing queries against data sources. 
+It offers a familiar API similar to pandas and other DataFrame libraries, but with the performance benefits of Rust 
+and Arrow.
+
+A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation.
+The actual execution happens when terminal operations like `collect()` or `show()` are called.
+
+Basic Usage
+----------
+
+.. code-block:: python
+
+    import datafusion
+    from datafusion import col, lit
+
+    # Create a context and register a data source
+    ctx = datafusion.SessionContext()
+    ctx.register_csv("my_table", "path/to/data.csv")
+    
+    # Create and manipulate a DataFrame
+    df = ctx.sql("SELECT * FROM my_table")
+    
+    # Or use the DataFrame API directly
+    df = (ctx.table("my_table")
+          .filter(col("age") > lit(25))
+          .select([col("name"), col("age")]))
+    
+    # Execute and collect results
+    result = df.collect()
+    
+    # Display the first few rows
+    df.show()
+
+HTML Rendering
+-------------
+
+When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will
+automatically display as formatted HTML tables, making it easier to visualize your data.
+
+The `_repr_html_` method is called automatically by Jupyter to render a DataFrame. This method 
+controls how DataFrames appear in notebook environments, providing a richer visualization than
+plain text output.
+
+Customizing HTML Rendering
+-------------------------
+
+You can customize how DataFrames are rendered in HTML by configuring the formatter:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import configure_formatter
+    
+    # Change the default styling
+    configure_formatter(
+        max_rows=50,           # Maximum number of rows to display
+        max_width=None,        # Maximum width in pixels (None for auto)
+        theme="light",         # Theme: "light" or "dark" 
+        precision=2,           # Floating point precision
+        thousands_separator=",", # Separator for thousands
+        date_format="%Y-%m-%d", # Date format
+        truncate_width=20      # Max width for string columns before truncating
+    )
+
+The formatter settings affect all DataFrames displayed after configuration.
+
+Custom Style Providers
+---------------------
+
+For advanced styling needs, you can create a custom style provider:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import StyleProvider, configure_formatter
+    
+    class MyStyleProvider(StyleProvider):
+        def get_table_styles(self):
+            return {
+                "table": "border-collapse: collapse; width: 100%;",
+                "th": "background-color: #007bff; color: white; padding: 8px; text-align: left;",
+                "td": "border: 1px solid #ddd; padding: 8px;",
+                "tr:nth-child(even)": "background-color: #f2f2f2;",
+            }
+            
+        def get_value_styles(self, dtype, value):
+            """Return custom styles for specific values"""
+            if dtype == "float" and value < 0:
+                return "color: red;"
+            return None
+    
+    # Apply the custom style provider
+    configure_formatter(style_provider=MyStyleProvider())
+
+Creating a Custom Formatter
+--------------------------
+
+For complete control over rendering, you can implement a custom formatter:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import Formatter, get_formatter
+    
+    class MyFormatter(Formatter):
+        def format_html(self, batches, schema, has_more=False, table_uuid=None):
+            # Create your custom HTML here
+            html = "<div class='my-custom-table'>"
+            # ... formatting logic ...
+            html += "</div>"
+            return html
+    
+    # Set as the global formatter
+    configure_formatter(formatter_class=MyFormatter)
+    
+    # Or use the formatter just for specific operations
+    formatter = get_formatter()
+    custom_html = formatter.format_html(batches, schema)
+
+Managing Formatters
+------------------
+
+Reset to default formatting:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import reset_formatter
+    
+    # Reset to default settings
+    reset_formatter()
+
+Get the current formatter settings:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import get_formatter
+    
+    formatter = get_formatter()
+    print(formatter.max_rows)
+    print(formatter.theme)
+
+Contextual Formatting
+--------------------
+
+You can also use a context manager to temporarily change formatting settings:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import formatting_context
+    
+    # Default formatting
+    df.show()
+    
+    # Temporarily use different formatting
+    with formatting_context(max_rows=100, theme="dark"):
+        df.show()  # Will use the temporary settings
+    
+    # Back to default formatting
+    df.show()
diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst
index 6636c0c6a..bff240b6b 100644
--- a/docs/source/user-guide/basics.rst
+++ b/docs/source/user-guide/basics.rst
@@ -72,6 +72,8 @@ DataFrames are typically created by calling a method on :py:class:`~datafusion.c
 calling the transformation methods, such as :py:func:`~datafusion.dataframe.DataFrame.filter`, :py:func:`~datafusion.dataframe.DataFrame.select`, :py:func:`~datafusion.dataframe.DataFrame.aggregate`,
 and :py:func:`~datafusion.dataframe.DataFrame.limit` to build up a query definition.
 
+For more details on working with DataFrames, including visualization options and conversion to other formats, see :doc:`dataframe`.
+
 Expressions
 -----------
 
diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst
new file mode 100644
index 000000000..3c6428529
--- /dev/null
+++ b/docs/source/user-guide/dataframe.rst
@@ -0,0 +1,213 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+DataFrame Operations
+===================
+
+Working with DataFrames
+----------------------
+
+A DataFrame in DataFusion represents a logical plan that defines a series of operations to be performed on data. 
+This logical plan is not executed until you call a terminal operation like :py:func:`~datafusion.dataframe.DataFrame.collect` 
+or :py:func:`~datafusion.dataframe.DataFrame.show`.
+
+DataFrames provide a familiar API for data manipulation:
+
+.. ipython:: python
+
+    import datafusion
+    from datafusion import col, lit, functions as f
+    
+    ctx = datafusion.SessionContext()
+    
+    # Create a DataFrame from a CSV file
+    df = ctx.read_csv("example.csv")
+    
+    # Add transformations
+    df = df.filter(col("age") > lit(30)) \
+           .select([col("name"), col("age"), (col("salary") * lit(1.1)).alias("new_salary")]) \
+           .sort("age")
+    
+    # Execute the plan
+    df.show()
+
+Common DataFrame Operations
+--------------------------
+
+DataFusion supports a wide range of operations on DataFrames:
+
+Filtering and Selection
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. ipython:: python
+
+    # Filter rows
+    df = df.filter(col("age") > lit(30))
+    
+    # Select columns
+    df = df.select([col("name"), col("age")])
+    
+    # Select by column name
+    df = df.select_columns(["name", "age"])
+    
+    # Select using column indexing
+    df = df["name", "age"]
+
+Aggregation
+~~~~~~~~~~
+
+.. ipython:: python
+
+    # Group by and aggregate
+    df = df.aggregate(
+        [col("category")],  # Group by columns
+        [f.sum(col("amount")).alias("total"), 
+         f.avg(col("price")).alias("avg_price")]
+    )
+
+Joins
+~~~~~
+
+.. ipython:: python
+
+    # Join two DataFrames
+    df_joined = df1.join(
+        df2,
+        how="inner",
+        left_on=["id"], 
+        right_on=["id"]
+    )
+    
+    # Join with custom expressions
+    df_joined = df1.join_on(
+        df2,
+        [col("df1.id") == col("df2.id")],
+        how="left"
+    )
+
+DataFrame Visualization
+----------------------
+
+Jupyter Notebook Integration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When working in Jupyter notebooks, DataFrames automatically display as HTML tables. This is 
+handled by the :code:`_repr_html_` method, which provides a rich, formatted view of your data.
+
+.. ipython:: python
+
+    # DataFrames render as HTML tables in notebooks
+    df  # Just displaying the DataFrame renders it as HTML
+
+Customizing DataFrame Display
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can customize how DataFrames are displayed using the HTML formatter:
+
+.. ipython:: python
+
+    from datafusion.html_formatter import configure_formatter
+    
+    # Change display settings
+    configure_formatter(
+        max_rows=100,          # Show more rows
+        truncate_width=30,     # Allow longer strings
+        theme="light",         # Use light theme
+        precision=2            # Set decimal precision
+    )
+    
+    # Now display uses the new format
+    df.show()
+
+Creating a Custom Style Provider
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For advanced styling needs:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import StyleProvider, configure_formatter
+    
+    class CustomStyleProvider(StyleProvider):
+        def get_table_styles(self):
+            return {
+                "table": "border-collapse: collapse; width: 100%;",
+                "th": "background-color: #4CAF50; color: white; padding: 10px;",
+                "td": "border: 1px solid #ddd; padding: 8px;",
+                "tr:hover": "background-color: #f5f5f5;",
+            }
+            
+        def get_value_styles(self, dtype, value):
+            if dtype == "float" and value < 0:
+                return "color: red; font-weight: bold;"
+            return None
+    
+    # Apply custom styling
+    configure_formatter(style_provider=CustomStyleProvider())
+
+Managing Display Settings
+~~~~~~~~~~~~~~~~~~~~~~~
+
+You can temporarily change formatting settings with context managers:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import formatting_context
+    
+    # Use different formatting temporarily
+    with formatting_context(max_rows=5, theme="dark"):
+        df.show()  # Will show only 5 rows with dark theme
+    
+    # Reset to default formatting
+    from datafusion.html_formatter import reset_formatter
+    reset_formatter()
+
+Converting to Other Formats
+--------------------------
+
+DataFusion DataFrames can be easily converted to other popular formats:
+
+.. ipython:: python
+
+    # Convert to Arrow Table
+    arrow_table = df.to_arrow_table()
+    
+    # Convert to Pandas DataFrame
+    pandas_df = df.to_pandas()
+    
+    # Convert to Polars DataFrame
+    polars_df = df.to_polars()
+    
+    # Convert to Python data structures
+    python_dict = df.to_pydict()
+    python_list = df.to_pylist()
+
+Saving DataFrames
+---------------
+
+You can write DataFrames to various file formats:
+
+.. ipython:: python
+
+    # Write to CSV
+    df.write_csv("output.csv", with_header=True)
+    
+    # Write to Parquet
+    df.write_parquet("output.parquet", compression="zstd")
+    
+    # Write to JSON
+    df.write_json("output.json")

From 09e213911aa8463d12c56ed9357f7efe4c43d3da Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Apr 2025 10:35:25 +0800
Subject: [PATCH 2/5] move /docs/source/api/dataframe.rst into user-guide

---
 docs/source/user-guide/dataframe.rst | 242 ++++++++++++---------------
 1 file changed, 104 insertions(+), 138 deletions(-)

diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst
index 3c6428529..a85f88cfb 100644
--- a/docs/source/user-guide/dataframe.rst
+++ b/docs/source/user-guide/dataframe.rst
@@ -15,199 +15,165 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-DataFrame Operations
-===================
+DataFrames
+==========
 
-Working with DataFrames
-----------------------
+Overview
+--------
 
-A DataFrame in DataFusion represents a logical plan that defines a series of operations to be performed on data. 
-This logical plan is not executed until you call a terminal operation like :py:func:`~datafusion.dataframe.DataFrame.collect` 
-or :py:func:`~datafusion.dataframe.DataFrame.show`.
+DataFusion's DataFrame API provides a powerful interface for building and executing queries against data sources. 
+It offers a familiar API similar to pandas and other DataFrame libraries, but with the performance benefits of Rust 
+and Arrow.
 
-DataFrames provide a familiar API for data manipulation:
+A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation.
+The actual execution happens when terminal operations like `collect()` or `show()` are called.
 
-.. ipython:: python
+Basic Usage
+----------
 
-    import datafusion
-    from datafusion import col, lit, functions as f
-    
-    ctx = datafusion.SessionContext()
-    
-    # Create a DataFrame from a CSV file
-    df = ctx.read_csv("example.csv")
-    
-    # Add transformations
-    df = df.filter(col("age") > lit(30)) \
-           .select([col("name"), col("age"), (col("salary") * lit(1.1)).alias("new_salary")]) \
-           .sort("age")
-    
-    # Execute the plan
-    df.show()
-
-Common DataFrame Operations
---------------------------
-
-DataFusion supports a wide range of operations on DataFrames:
-
-Filtering and Selection
-~~~~~~~~~~~~~~~~~~~~~~~
+.. code-block:: python
 
-.. ipython:: python
+    import datafusion
+    from datafusion import col, lit
 
-    # Filter rows
-    df = df.filter(col("age") > lit(30))
+    # Create a context and register a data source
+    ctx = datafusion.SessionContext()
+    ctx.register_csv("my_table", "path/to/data.csv")
     
-    # Select columns
-    df = df.select([col("name"), col("age")])
+    # Create and manipulate a DataFrame
+    df = ctx.sql("SELECT * FROM my_table")
     
-    # Select by column name
-    df = df.select_columns(["name", "age"])
+    # Or use the DataFrame API directly
+    df = (ctx.table("my_table")
+          .filter(col("age") > lit(25))
+          .select([col("name"), col("age")]))
     
-    # Select using column indexing
-    df = df["name", "age"]
-
-Aggregation
-~~~~~~~~~~
-
-.. ipython:: python
-
-    # Group by and aggregate
-    df = df.aggregate(
-        [col("category")],  # Group by columns
-        [f.sum(col("amount")).alias("total"), 
-         f.avg(col("price")).alias("avg_price")]
-    )
-
-Joins
-~~~~~
-
-.. ipython:: python
-
-    # Join two DataFrames
-    df_joined = df1.join(
-        df2,
-        how="inner",
-        left_on=["id"], 
-        right_on=["id"]
-    )
+    # Execute and collect results
+    result = df.collect()
     
-    # Join with custom expressions
-    df_joined = df1.join_on(
-        df2,
-        [col("df1.id") == col("df2.id")],
-        how="left"
-    )
-
-DataFrame Visualization
-----------------------
-
-Jupyter Notebook Integration
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # Display the first few rows
+    df.show()
 
-When working in Jupyter notebooks, DataFrames automatically display as HTML tables. This is 
-handled by the :code:`_repr_html_` method, which provides a rich, formatted view of your data.
+HTML Rendering
+-------------
 
-.. ipython:: python
+When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will
+automatically display as formatted HTML tables, making it easier to visualize your data.
 
-    # DataFrames render as HTML tables in notebooks
-    df  # Just displaying the DataFrame renders it as HTML
+The `_repr_html_` method is called automatically by Jupyter to render a DataFrame. This method 
+controls how DataFrames appear in notebook environments, providing a richer visualization than
+plain text output.
 
-Customizing DataFrame Display
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Customizing HTML Rendering
+-------------------------
 
-You can customize how DataFrames are displayed using the HTML formatter:
+You can customize how DataFrames are rendered in HTML by configuring the formatter:
 
-.. ipython:: python
+.. code-block:: python
 
     from datafusion.html_formatter import configure_formatter
     
-    # Change display settings
+    # Change the default styling
     configure_formatter(
-        max_rows=100,          # Show more rows
-        truncate_width=30,     # Allow longer strings
-        theme="light",         # Use light theme
-        precision=2            # Set decimal precision
+        max_rows=50,           # Maximum number of rows to display
+        max_width=None,        # Maximum width in pixels (None for auto)
+        theme="light",         # Theme: "light" or "dark" 
+        precision=2,           # Floating point precision
+        thousands_separator=",", # Separator for thousands
+        date_format="%Y-%m-%d", # Date format
+        truncate_width=20      # Max width for string columns before truncating
     )
-    
-    # Now display uses the new format
-    df.show()
 
-Creating a Custom Style Provider
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The formatter settings affect all DataFrames displayed after configuration.
+
+Custom Style Providers
+---------------------
 
-For advanced styling needs:
+For advanced styling needs, you can create a custom style provider:
 
 .. code-block:: python
 
     from datafusion.html_formatter import StyleProvider, configure_formatter
     
-    class CustomStyleProvider(StyleProvider):
+    class MyStyleProvider(StyleProvider):
         def get_table_styles(self):
             return {
                 "table": "border-collapse: collapse; width: 100%;",
-                "th": "background-color: #4CAF50; color: white; padding: 10px;",
+                "th": "background-color: #007bff; color: white; padding: 8px; text-align: left;",
                 "td": "border: 1px solid #ddd; padding: 8px;",
-                "tr:hover": "background-color: #f5f5f5;",
+                "tr:nth-child(even)": "background-color: #f2f2f2;",
             }
             
         def get_value_styles(self, dtype, value):
+            """Return custom styles for specific values"""
             if dtype == "float" and value < 0:
-                return "color: red; font-weight: bold;"
+                return "color: red;"
             return None
     
-    # Apply custom styling
-    configure_formatter(style_provider=CustomStyleProvider())
+    # Apply the custom style provider
+    configure_formatter(style_provider=MyStyleProvider())
 
-Managing Display Settings
-~~~~~~~~~~~~~~~~~~~~~~~
+Creating a Custom Formatter
+--------------------------
 
-You can temporarily change formatting settings with context managers:
+For complete control over rendering, you can implement a custom formatter:
 
 .. code-block:: python
 
-    from datafusion.html_formatter import formatting_context
+    from datafusion.html_formatter import Formatter, get_formatter
     
-    # Use different formatting temporarily
-    with formatting_context(max_rows=5, theme="dark"):
-        df.show()  # Will show only 5 rows with dark theme
+    class MyFormatter(Formatter):
+        def format_html(self, batches, schema, has_more=False, table_uuid=None):
+            # Create your custom HTML here
+            html = "<div class='my-custom-table'>"
+            # ... formatting logic ...
+            html += "</div>"
+            return html
     
-    # Reset to default formatting
-    from datafusion.html_formatter import reset_formatter
-    reset_formatter()
+    # Set as the global formatter
+    configure_formatter(formatter_class=MyFormatter)
+    
+    # Or use the formatter just for specific operations
+    formatter = get_formatter()
+    custom_html = formatter.format_html(batches, schema)
 
-Converting to Other Formats
---------------------------
+Managing Formatters
+------------------
 
-DataFusion DataFrames can be easily converted to other popular formats:
+Reset to default formatting:
 
-.. ipython:: python
+.. code-block:: python
 
-    # Convert to Arrow Table
-    arrow_table = df.to_arrow_table()
-    
-    # Convert to Pandas DataFrame
-    pandas_df = df.to_pandas()
+    from datafusion.html_formatter import reset_formatter
     
-    # Convert to Polars DataFrame
-    polars_df = df.to_polars()
+    # Reset to default settings
+    reset_formatter()
+
+Get the current formatter settings:
+
+.. code-block:: python
+
+    from datafusion.html_formatter import get_formatter
     
-    # Convert to Python data structures
-    python_dict = df.to_pydict()
-    python_list = df.to_pylist()
+    formatter = get_formatter()
+    print(formatter.max_rows)
+    print(formatter.theme)
 
-Saving DataFrames
----------------
+Contextual Formatting
+--------------------
 
-You can write DataFrames to various file formats:
+You can also use a context manager to temporarily change formatting settings:
 
-.. ipython:: python
+.. code-block:: python
 
-    # Write to CSV
-    df.write_csv("output.csv", with_header=True)
+    from datafusion.html_formatter import formatting_context
     
-    # Write to Parquet
-    df.write_parquet("output.parquet", compression="zstd")
+    # Default formatting
+    df.show()
+    
+    # Temporarily use different formatting
+    with formatting_context(max_rows=100, theme="dark"):
+        df.show()  # Will use the temporary settings
     
-    # Write to JSON
-    df.write_json("output.json")
+    # Back to default formatting
+    df.show()

From 4b0045f28cdef4296e2cce746cf25a5f32262dd7 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Tue, 22 Apr 2025 10:38:47 +0800
Subject: [PATCH 3/5] docs: remove  DataFrame API documentation

---
 docs/source/api/dataframe.rst | 162 ----------------------------------
 1 file changed, 162 deletions(-)
 delete mode 100644 docs/source/api/dataframe.rst

diff --git a/docs/source/api/dataframe.rst b/docs/source/api/dataframe.rst
deleted file mode 100644
index 675b14bf9..000000000
--- a/docs/source/api/dataframe.rst
+++ /dev/null
@@ -1,162 +0,0 @@
-DataFrames
-==========
-
-Overview
---------
-
-DataFusion's DataFrame API provides a powerful interface for building and executing queries against data sources. 
-It offers a familiar API similar to pandas and other DataFrame libraries, but with the performance benefits of Rust 
-and Arrow.
-
-A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation.
-The actual execution happens when terminal operations like `collect()` or `show()` are called.
-
-Basic Usage
-----------
-
-.. code-block:: python
-
-    import datafusion
-    from datafusion import col, lit
-
-    # Create a context and register a data source
-    ctx = datafusion.SessionContext()
-    ctx.register_csv("my_table", "path/to/data.csv")
-    
-    # Create and manipulate a DataFrame
-    df = ctx.sql("SELECT * FROM my_table")
-    
-    # Or use the DataFrame API directly
-    df = (ctx.table("my_table")
-          .filter(col("age") > lit(25))
-          .select([col("name"), col("age")]))
-    
-    # Execute and collect results
-    result = df.collect()
-    
-    # Display the first few rows
-    df.show()
-
-HTML Rendering
--------------
-
-When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will
-automatically display as formatted HTML tables, making it easier to visualize your data.
-
-The `_repr_html_` method is called automatically by Jupyter to render a DataFrame. This method 
-controls how DataFrames appear in notebook environments, providing a richer visualization than
-plain text output.
-
-Customizing HTML Rendering
--------------------------
-
-You can customize how DataFrames are rendered in HTML by configuring the formatter:
-
-.. code-block:: python
-
-    from datafusion.html_formatter import configure_formatter
-    
-    # Change the default styling
-    configure_formatter(
-        max_rows=50,           # Maximum number of rows to display
-        max_width=None,        # Maximum width in pixels (None for auto)
-        theme="light",         # Theme: "light" or "dark" 
-        precision=2,           # Floating point precision
-        thousands_separator=",", # Separator for thousands
-        date_format="%Y-%m-%d", # Date format
-        truncate_width=20      # Max width for string columns before truncating
-    )
-
-The formatter settings affect all DataFrames displayed after configuration.
-
-Custom Style Providers
----------------------
-
-For advanced styling needs, you can create a custom style provider:
-
-.. code-block:: python
-
-    from datafusion.html_formatter import StyleProvider, configure_formatter
-    
-    class MyStyleProvider(StyleProvider):
-        def get_table_styles(self):
-            return {
-                "table": "border-collapse: collapse; width: 100%;",
-                "th": "background-color: #007bff; color: white; padding: 8px; text-align: left;",
-                "td": "border: 1px solid #ddd; padding: 8px;",
-                "tr:nth-child(even)": "background-color: #f2f2f2;",
-            }
-            
-        def get_value_styles(self, dtype, value):
-            """Return custom styles for specific values"""
-            if dtype == "float" and value < 0:
-                return "color: red;"
-            return None
-    
-    # Apply the custom style provider
-    configure_formatter(style_provider=MyStyleProvider())
-
-Creating a Custom Formatter
---------------------------
-
-For complete control over rendering, you can implement a custom formatter:
-
-.. code-block:: python
-
-    from datafusion.html_formatter import Formatter, get_formatter
-    
-    class MyFormatter(Formatter):
-        def format_html(self, batches, schema, has_more=False, table_uuid=None):
-            # Create your custom HTML here
-            html = "<div class='my-custom-table'>"
-            # ... formatting logic ...
-            html += "</div>"
-            return html
-    
-    # Set as the global formatter
-    configure_formatter(formatter_class=MyFormatter)
-    
-    # Or use the formatter just for specific operations
-    formatter = get_formatter()
-    custom_html = formatter.format_html(batches, schema)
-
-Managing Formatters
-------------------
-
-Reset to default formatting:
-
-.. code-block:: python
-
-    from datafusion.html_formatter import reset_formatter
-    
-    # Reset to default settings
-    reset_formatter()
-
-Get the current formatter settings:
-
-.. code-block:: python
-
-    from datafusion.html_formatter import get_formatter
-    
-    formatter = get_formatter()
-    print(formatter.max_rows)
-    print(formatter.theme)
-
-Contextual Formatting
---------------------
-
-You can also use a context manager to temporarily change formatting settings:
-
-.. code-block:: python
-
-    from datafusion.html_formatter import formatting_context
-    
-    # Default formatting
-    df.show()
-    
-    # Temporarily use different formatting
-    with formatting_context(max_rows=100, theme="dark"):
-        df.show()  # Will use the temporary settings
-    
-    # Back to default formatting
-    df.show()

From 9a828702a489ddd7463a04fbbd878633d51130fb Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Sun, 27 Apr 2025 18:25:25 +0800
Subject: [PATCH 4/5] docs: fix formatting inconsistencies in DataFrame user
 guide

---
 docs/source/user-guide/dataframe.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/source/user-guide/dataframe.rst b/docs/source/user-guide/dataframe.rst
index a85f88cfb..a78fd8073 100644
--- a/docs/source/user-guide/dataframe.rst
+++ b/docs/source/user-guide/dataframe.rst
@@ -26,10 +26,10 @@ It offers a familiar API similar to pandas and other DataFrame libraries, but wi
 and Arrow.
 
 A DataFrame represents a logical plan that can be composed through operations like filtering, projection, and aggregation.
-The actual execution happens when terminal operations like `collect()` or `show()` are called.
+The actual execution happens when terminal operations like ``collect()`` or ``show()`` are called.
 
 Basic Usage
-----------
+-----------
 
 .. code-block:: python
 
@@ -55,17 +55,17 @@ Basic Usage
     df.show()
 
 HTML Rendering
--------------
+--------------
 
 When working in Jupyter notebooks or other environments that support HTML rendering, DataFrames will
 automatically display as formatted HTML tables, making it easier to visualize your data.
 
-The `_repr_html_` method is called automatically by Jupyter to render a DataFrame. This method 
+The ``_repr_html_`` method is called automatically by Jupyter to render a DataFrame. This method 
 controls how DataFrames appear in notebook environments, providing a richer visualization than
 plain text output.
 
 Customizing HTML Rendering
--------------------------
+--------------------------
 
 You can customize how DataFrames are rendered in HTML by configuring the formatter:
 
@@ -87,7 +87,7 @@ You can customize how DataFrames are rendered in HTML by configuring the formatt
 The formatter settings affect all DataFrames displayed after configuration.
 
 Custom Style Providers
----------------------
+----------------------
 
 For advanced styling needs, you can create a custom style provider:
 
@@ -114,7 +114,7 @@ For advanced styling needs, you can create a custom style provider:
     configure_formatter(style_provider=MyStyleProvider())
 
 Creating a Custom Formatter
---------------------------
+---------------------------
 
 For complete control over rendering, you can implement a custom formatter:
 
@@ -138,7 +138,7 @@ For complete control over rendering, you can implement a custom formatter:
     custom_html = formatter.format_html(batches, schema)
 
 Managing Formatters
-------------------
+-------------------
 
 Reset to default formatting:
 
@@ -160,7 +160,7 @@ Get the current formatter settings:
     print(formatter.theme)
 
 Contextual Formatting
---------------------
+---------------------
 
 You can also use a context manager to temporarily change formatting settings:
 

From ef47425e5c9d6ad21cc142fd1e9d7866f102f2c8 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sun, 27 Apr 2025 09:39:34 -0400
Subject: [PATCH 5/5] Two minor corrections to documentation rendering

---
 docs/source/index.rst             | 1 +
 docs/source/user-guide/basics.rst | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 558b2d572..c18793822 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -72,6 +72,7 @@ Example
    user-guide/introduction
    user-guide/basics
    user-guide/data-sources
+   user-guide/dataframe
    user-guide/common-operations/index
    user-guide/io/index
    user-guide/configuration
diff --git a/docs/source/user-guide/basics.rst b/docs/source/user-guide/basics.rst
index bff240b6b..2975d9a6b 100644
--- a/docs/source/user-guide/basics.rst
+++ b/docs/source/user-guide/basics.rst
@@ -21,7 +21,8 @@ Concepts
 ========
 
 In this section, we will cover a basic example to introduce a few key concepts. We will use the
-2021 Yellow Taxi Trip Records ([download](https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet)), from the [TLC Trip Record Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
+2021 Yellow Taxi Trip Records (`download <https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet>`_),
+from the `TLC Trip Record Data <https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page>`_.
 
 .. ipython:: python