Skip to content

Commit eb461ea

Browse files
authored
refactor IPython display formatter usage (#1)
1 parent a2f0e46 commit eb461ea

File tree

6 files changed

+189
-90
lines changed

6 files changed

+189
-90
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ All notable changes will be documented here.
22

33
---
44

5+
## `1.0.3`
6+
_2022-04-26_
7+
### **Fixed**
8+
* `dx.register()` (`dx.enable()`, deprecated) and `dx.deregister()` (`dx.disable()`, deprecated) will now update the default display formatting for pandas `DataFrame` objects as intended
9+
510
## `1.0.2`
611
_2022-04-25_
712
### **Fixed**

dx/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
from .dx import *
33
from .formatters import *
44

5-
__version__ = "1.0.2"
5+
__version__ = "1.0.3"

dx/dx.py

Lines changed: 9 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,19 @@
11
import pathlib
2-
from typing import List, Optional, Union
2+
from typing import List, Union
33

44
import pandas as pd
55
from IPython.display import display as ipydisplay
6-
from pandas.io.json import build_table_schema
76

8-
from .config import in_noteable_env
9-
10-
DX_MEDIA_TYPE = "application/vnd.dex.v1+json"
11-
DATARESOURCE_MEDIA_TYPE = "application/vnd.dataresource+json"
12-
13-
14-
class DXDataFrame(pd.DataFrame):
15-
"""Convenience class to provide DEX-focused methods for IPython rendering"""
16-
17-
_display_index = False
18-
media_type = DX_MEDIA_TYPE
19-
20-
def display(self, media_type: Optional[str] = None, index: bool = False) -> None:
21-
"""Render DXDataFrame based on provided media type."""
22-
23-
if not in_noteable_env():
24-
# TODO: should this be treated differently?
25-
ipydisplay(self)
26-
return
27-
28-
media_type = media_type or self.media_type
29-
self._display_index = index
30-
payload = {
31-
"schema": self.table_schema,
32-
"data": self.data_transform(media_type=media_type),
33-
# "summary_statistics": {},
34-
# "dx-seed": {},
35-
}
36-
ipydisplay({media_type: payload}, raw=True)
37-
return
38-
39-
def data_transform(self, media_type: str) -> List:
40-
"""
41-
Transforms the current dataframe into a list of dictionaries
42-
or list of columnar values, depending on the media type provided.
43-
"""
44-
if media_type != self.media_type:
45-
# use default data orient
46-
return self.to_dict(orient="records")
47-
48-
# we can't use `.to_dict(orient='list')` here since that would return a dictionary of {column: [values]} pairs
49-
if self._display_index:
50-
return self.reset_index().transpose().values.tolist()
51-
return self.transpose().values.tolist()
52-
53-
@property
54-
def table_schema(self):
55-
return build_table_schema(self, index=self._display_index)
7+
from .formatters import format_dx
568

579

5810
def display(
5911
data: Union[List[dict], pd.DataFrame, Union[pathlib.Path, str]],
60-
media_type: Optional[str] = None,
61-
index: bool = False,
6212
) -> None:
63-
"""Convenience function to allow calling `dx.display(df)` on a pandas Dataframe, tabular data structure, or filepath."""
13+
"""
14+
Display a single object (pd.DataFrame, .csv/.json filepath, or tabular dataset) with the DX display format.
15+
"""
6416

65-
# TODO: handle this in DXDataFrame init instead?
6617
if isinstance(data, str):
6718
path = pathlib.PurePosixPath(data)
6819
if path.suffix == ".csv":
@@ -72,7 +23,10 @@ def display(
7223
else:
7324
raise ValueError(f"Unsupported file type: `{path.suffix}`")
7425

75-
return DXDataFrame(data).display(media_type=media_type, index=index)
26+
df = pd.DataFrame(data)
27+
payload, _ = format_dx(df)
28+
ipydisplay(payload, raw=True)
29+
return
7630

7731

7832
# backwards-compatibility

dx/formatters.py

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,48 @@
11
import pandas as pd
22
from IPython import get_ipython
3-
from IPython.core.formatters import BaseFormatter
3+
from IPython.core.formatters import DisplayFormatter
4+
from pandas.io.json import build_table_schema
45

5-
from .dx import DATARESOURCE_MEDIA_TYPE, DX_MEDIA_TYPE
6+
DEFAULT_IPYTHON_DISPLAY_FORMATTER = get_ipython().display_formatter
7+
DX_MEDIA_TYPE = "application/vnd.dex.v1+json"
68

79

8-
class DXSchemaFormatter(BaseFormatter):
9-
# FOLLOWUP: does anything need to change here?
10-
print_method = "_repr_data_resource_"
11-
_return_type = (dict,)
10+
class DXDisplayFormatter(DisplayFormatter):
11+
def format(self, obj, **kwargs):
1212

13+
if isinstance(obj, pd.DataFrame):
14+
return format_dx(obj)
1315

14-
class TableSchemaFormatter(BaseFormatter):
15-
print_method = "_repr_data_resource_"
16-
_return_type = (dict,)
16+
return DEFAULT_IPYTHON_DISPLAY_FORMATTER.format(obj, **kwargs)
1717

1818

19-
def deregister_dx_formatting(media_type: str = DX_MEDIA_TYPE) -> None:
20-
"""Reverts IPython.display_formatter.formatters to original states"""
21-
pd.options.display.html.table_schema = False
22-
pd.options.display.max_rows = 60
19+
def format_dx(df) -> tuple:
20+
"""
21+
Transforms the dataframe to a payload dictionary containing the table schema
22+
and column values as arrays.
23+
"""
24+
# this will include the `df.index` by default (e.g. slicing/sampling)
25+
payload = {
26+
DX_MEDIA_TYPE: {
27+
"schema": build_table_schema(df),
28+
"data": df.reset_index().transpose().values.tolist(),
29+
}
30+
}
31+
metadata = {}
32+
return (payload, metadata)
2333

24-
formatters = get_ipython().display_formatter.formatters
25-
if media_type in formatters:
26-
formatters.pop(media_type)
2734

28-
# this should effectively be the same as using
29-
# `pandas.io.formats.printing.enable_data_resource_formatter(True)`,
30-
# except calling that directly doesn't update the IPython formatters
31-
formatters[DATARESOURCE_MEDIA_TYPE] = TableSchemaFormatter()
32-
formatters[DATARESOURCE_MEDIA_TYPE].enabled = True
35+
def deregister() -> None:
36+
"""Reverts IPython.display_formatter to its original state"""
37+
pd.options.display.max_rows = 60
38+
get_ipython().display_formatter = DEFAULT_IPYTHON_DISPLAY_FORMATTER
3339

3440

35-
def register_dx_formatter(media_type: str = DX_MEDIA_TYPE) -> None:
36-
"""Registers a media_type for IPython display formatting"""
37-
pd.options.display.html.table_schema = True
41+
def register() -> None:
42+
"""Overrides the default IPython display formatter to use DXDisplayFormatter"""
3843
pd.options.display.max_rows = 100_000
39-
40-
formatters = get_ipython().display_formatter.formatters
41-
formatters[media_type] = DXSchemaFormatter()
42-
# the default pandas `Dataframe._repl_html_` will not work correctly
43-
# if enabled=True here
44-
formatters[media_type].enabled = False
44+
get_ipython().display_formatter = DXDisplayFormatter()
4545

4646

47-
disable = deregister_dx_formatting
48-
enable = register_dx_formatter
47+
disable = deregister
48+
enable = register

0 commit comments

Comments
 (0)