Skip to content

Commit 8acb679

Browse files
authored
Use PrettyTables.jl as HTML backend (#3096)
1 parent a45c104 commit 8acb679

File tree

11 files changed

+1563
-367
lines changed

11 files changed

+1563
-367
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ InvertedIndices = "1"
3131
IteratorInterfaceExtensions = "0.1.1, 1"
3232
Missings = "0.4.2, 1"
3333
PooledArrays = "1.4.2"
34-
PrettyTables = "0.12, 1"
34+
PrettyTables = "2.1"
3535
Reexport = "0.1, 0.2, 1"
3636
ShiftedArrays = "1, 2"
3737
SortingAlgorithms = "0.1, 0.2, 0.3, 1"

docs/src/man/getting_started.md

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,27 @@ relevant variables into your current namespace.
1515

1616
!!! note
1717

18-
By default Jupyter Notebook will limit the number of rows and columns when displaying a data frame to roughly
19-
fit the screen size (like in the REPL).
20-
21-
You can override this behavior by changing the values of the `ENV["COLUMNS"]` and `ENV["LINES"]`
22-
variables to hold the maximum width and height of output in characters respectively.
18+
By default DataFrames.jl limits the number of rows and columns when displaying a data frame in a Jupyter
19+
Notebook to 25 and 100, respectively. You can override this behavior by changing the values of the
20+
`ENV["DATAFRAMES_COLUMNS"]` and `ENV["DATAFRAMES_ROWS"]` variables to hold the maximum number of columns
21+
and rows of the output. All columns or rows will be printed if those numbers are equal or lower than 0.
2322

2423
Alternatively, you may want to set the maximum number of data frame rows to print to `100` and the maximum
25-
output width in characters to `1000` for every Julia session using some Jupyter kernel file (numbers `100`
26-
and `1000` are only examples and can be adjusted). In such case add a `"COLUMNS": "1000", "LINES": "100"`
27-
entry to the `"env"` variable in this Jupyter kernel file.
28-
See [here](https://jupyter-client.readthedocs.io/en/stable/kernels.html) for information about location
24+
number of columns to print to `1000` for every Julia session using some Jupyter kernel file (numbers `100`
25+
and `1000` are only examples and can be adjusted). In such case add a
26+
`"DATAFRAME_COLUMNS": "1000", "DATAFRAMES_ROWS": "100"` entry to the `"env"` variable in this Jupyter kernel
27+
file. See [here](https://jupyter-client.readthedocs.io/en/stable/kernels.html) for information about location
2928
and specification of Jupyter kernels.
3029

30+
The package [PrettyTables.jl](https://github.com/ronisbr/PrettyTables.jl) renders the `DataFrame` in the
31+
Jupyter notebook. Users can customize the output by passing keywords arguments `kwargs...` to the
32+
function `show`: `show(stdout, MIME("text/html"), df; kwargs...)`, where `df` is the `DataFrame`. Any
33+
argument supported by PrettyTables.jl in the HTML backend can be used here. Hence, for example, if the user
34+
wants to change the color of all numbers smaller than 0 to red in Jupyter, they can execute:
35+
`show(stdout, MIME("text/html"), df; highlighters = hl_lt(0, HtmlDecoration(color = "red")))` after
36+
`using PrettyTables`. For more information about the available options, check
37+
[PrettyTables.jl documentation](https://ronisbr.github.io/PrettyTables.jl/stable/man/usage/).
38+
3139
## The `DataFrame` Type
3240

3341
Objects of the `DataFrame` type represent a data table as a series of vectors,

src/abstractdataframe/io.jl

Lines changed: 150 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,15 @@ Render a data frame to an I/O stream in MIME type `mime`.
101101
Additionally selected MIME types support passing the following keyword arguments:
102102
- MIME type `"text/plain"` accepts all listed keyword arguments and their behavior
103103
is identical as for `show(::IO, ::AbstractDataFrame)`
104-
- MIME type `"text/html"` accepts `summary` keyword argument which
105-
allows to choose whether to print a brief string summary of the data frame.
104+
- MIME type `"text/html"` accepts the following keyword arguments:
105+
- `eltypes::Bool = true`: Whether to print the column types under column names.
106+
- `summary::Bool = true`: Whether to print a brief string summary of the data frame.
107+
- `max_column_width::AbstractString = ""`: The maximum column width. It must
108+
be a string containing a valid CSS length. For example, passing
109+
"100px" will limit the width of all columns to 100 pixels. If empty,
110+
the columns will be rendered without limits.
111+
- `kwargs...`: Any keyword argument supported by the function `pretty_table`
112+
of PrettyTables.jl can be passed here to customize the output.
106113
107114
# Examples
108115
```jldoctest
@@ -126,9 +133,14 @@ julia> show(stdout, MIME("text/csv"), DataFrame(A=1:3, B=["x", "y", "z"]))
126133
```
127134
"""
128135
Base.show(io::IO, mime::MIME, df::AbstractDataFrame)
129-
Base.show(io::IO, mime::MIME"text/html", df::AbstractDataFrame;
130-
summary::Bool=true, eltypes::Bool=true) =
131-
_show(io, mime, df, summary=summary, eltypes=eltypes)
136+
function Base.show(io::IO, mime::MIME"text/html", df::AbstractDataFrame;
137+
summary::Bool=true, eltypes::Bool=true,
138+
max_column_width::AbstractString="", kwargs...)
139+
_verify_kwargs_for_html(; kwargs...)
140+
return _show(io, mime, df; summary=summary, eltypes=eltypes,
141+
max_column_width=max_column_width, kwargs...)
142+
end
143+
132144
Base.show(io::IO, mime::MIME"text/latex", df::AbstractDataFrame; eltypes::Bool=true) =
133145
_show(io, mime, df, eltypes=eltypes)
134146
Base.show(io::IO, mime::MIME"text/csv", df::AbstractDataFrame) =
@@ -144,15 +156,6 @@ Base.show(io::IO, mime::MIME"text/plain", df::AbstractDataFrame; kwargs...) =
144156
#
145157
##############################################################################
146158

147-
function digitsep(value::Integer)
148-
# Adapted from https://github.com/IainNZ/Humanize.jl
149-
value = string(abs(value))
150-
group_ends = reverse(collect(length(value):-3:1))
151-
groups = [value[max(end_index - 2, 1):end_index]
152-
for end_index in group_ends]
153-
return join(groups, ',')
154-
end
155-
156159
function html_escape(cell::AbstractString)
157160
cell = replace(cell, "&"=>"&")
158161
cell = replace(cell, "<"=>"&lt;")
@@ -164,128 +167,140 @@ function html_escape(cell::AbstractString)
164167
return cell
165168
end
166169

167-
function _show(io::IO, ::MIME"text/html", df::AbstractDataFrame;
168-
summary::Bool=true, eltypes::Bool=true, rowid::Union{Int, Nothing}=nothing)
170+
function _show(io::IO,
171+
::MIME"text/html",
172+
df::AbstractDataFrame;
173+
summary::Bool=true,
174+
eltypes::Bool=true,
175+
rowid::Union{Int, Nothing}=nothing,
176+
title::AbstractString="",
177+
max_column_width::AbstractString="",
178+
kwargs...)
169179
_check_consistency(df)
170180

171-
# we will pass around this buffer to avoid its reallocation in ourstrwidth
172-
buffer = IOBuffer(Vector{UInt8}(undef, 80), read=true, write=true)
181+
names_str = names(df)
182+
types = Any[eltype(c) for c in eachcol(df)]
183+
types_str = batch_compacttype(types, 9)
184+
types_str_complete = batch_compacttype(types, 256)
173185

174-
if rowid !== nothing
175-
if size(df, 2) == 0
176-
rowid = nothing
177-
elseif size(df, 1) != 1
178-
throw(ArgumentError("rowid may be passed only with a single row data frame"))
186+
# For consistency, if `kwargs` has `compact_printing`, we must use it.
187+
compact_printing::Bool = get(kwargs, :compact_printing, get(io, :compact, true))
188+
189+
num_rows, num_cols = size(df)
190+
191+
# By default, we align the columns to the left unless they are numbers,
192+
# which is checked in the following.
193+
alignment = fill(:l, num_cols)
194+
195+
for i = 1:num_cols
196+
type_i = nonmissingtype(types[i])
197+
198+
if type_i <: Number
199+
alignment[i] = :r
179200
end
180201
end
181202

182-
mxrow, mxcol = size(df)
183203
if get(io, :limit, false)
184-
tty_rows, tty_cols = displaysize(io)
185-
mxrow = min(mxrow, tty_rows)
186-
maxwidths = getmaxwidths(df, io, 1:mxrow, 0:-1, :X, nothing, true, buffer, 0) .+ 2
187-
mxcol = min(mxcol, searchsortedfirst(cumsum(maxwidths), tty_cols))
204+
# Obtain the maximum number of rows and columns that we can print from
205+
# environment variables.
206+
mxrow = something(tryparse(Int, get(ENV, "DATAFRAMES_ROWS", "25")), 25)
207+
mxcol = something(tryparse(Int, get(ENV, "DATAFRAMES_COLUMNS", "100")), 100)
208+
else
209+
mxrow = -1
210+
mxcol = -1
188211
end
189212

190-
cnames = _names(df)[1:mxcol]
191-
write(io, "<div class=\"data-frame\">")
213+
# Check if the user wants to display a summary about the DataFrame that is
214+
# being printed. This will be shown using the `title` option of
215+
# `pretty_table`.
192216
if summary
193-
write(io, "<p>$(digitsep(nrow(df))) rows × $(digitsep(ncol(df))) columns")
194-
if mxcol < size(df, 2)
195-
write(io, " (omitted printing of $(size(df, 2)-mxcol) columns)")
196-
end
197-
write(io, "</p>")
198-
end
199-
write(io, "<table class=\"data-frame\">")
200-
write(io, "<thead>")
201-
write(io, "<tr>")
202-
write(io, "<th></th>")
203-
for column_name in cnames
204-
write(io, "<th>$(html_escape(String(column_name)))</th>")
205-
end
206-
write(io, "</tr>")
207-
if eltypes
208-
write(io, "<tr>")
209-
write(io, "<th></th>")
210-
# We put a longer string for the type into the title argument of the <th> element,
211-
# which the users can hover over. The limit of 256 characters is arbitrary, but
212-
# we want some maximum limit, since the types can sometimes get really-really long.
213-
types = Any[eltype(df[!, idx]) for idx in 1:mxcol]
214-
ct, ct_title = batch_compacttype(types, 9), batch_compacttype(types, 256)
215-
for j in 1:mxcol
216-
s = html_escape(ct[j])
217-
title = html_escape(ct_title[j])
218-
write(io, "<th title=\"$title\">$s</th>")
217+
if isempty(title)
218+
title = Base.summary(df)
219219
end
220-
write(io, "</tr>")
220+
else
221+
title = ""
221222
end
222-
write(io, "</thead>")
223-
write(io, "<tbody>")
224-
for row in 1:mxrow
225-
write(io, "<tr>")
226-
if rowid === nothing
227-
write(io, "<th>$row</th>")
223+
224+
# If `rowid` is not `nothing`, then we are printing a data row. In this
225+
# case, we will add this information using the row name column of
226+
# PrettyTables.jl. Otherwise, we can just use the row number column.
227+
if (rowid === nothing) || (ncol(df) == 0)
228+
show_row_number::Bool = get(kwargs, :show_row_number, true)
229+
row_labels = nothing
230+
231+
# If the columns with row numbers is not shown, then we should not
232+
# display a vertical line after the first column.
233+
vlines = fill(1, show_row_number)
234+
else
235+
nrow(df) != 1 &&
236+
throw(ArgumentError("rowid may be passed only with a single row data frame"))
237+
238+
# In this case, if the user does not want to show the row number, then
239+
# we must hide the row name column, which is used to display the
240+
# `rowid`.
241+
if !get(kwargs, :show_row_number, true)
242+
row_labels = nothing
243+
vlines = Int[]
228244
else
229-
write(io, "<th>$rowid</th>")
245+
row_labels = [string(rowid)]
246+
vlines = Int[1]
230247
end
231-
for column_name in cnames
232-
if isassigned(df[!, column_name], row)
233-
cell_val = df[row, column_name]
234-
if ismissing(cell_val)
235-
write(io, "<td><em>missing</em></td>")
236-
elseif cell_val isa Markdown.MD
237-
write(io, "<td>")
238-
show(io, "text/html", cell_val)
239-
write(io, "</td>")
240-
elseif cell_val isa SHOW_TABULAR_TYPES
241-
write(io, "<td><em>")
242-
cell = sprint(ourshow, cell_val, 0)
243-
write(io, html_escape(cell))
244-
write(io, "</em></td>")
245-
else
246-
cell = sprint(ourshow, cell_val, 0)
247-
write(io, "<td>$(html_escape(cell))</td>")
248-
end
249-
else
250-
write(io, "<td><em>#undef</em></td>")
251-
end
252-
end
253-
write(io, "</tr>")
254-
end
255-
if size(df, 1) > mxrow
256-
write(io, "<tr>")
257-
write(io, "<th>&vellip;</th>")
258-
for column_name in cnames
259-
write(io, "<td>&vellip;</td>")
260-
end
261-
write(io, "</tr>")
248+
249+
show_row_number = false
262250
end
263-
write(io, "</tbody>")
264-
write(io, "</table>")
265-
write(io, "</div>")
251+
252+
pretty_table(io, df;
253+
alignment = alignment,
254+
backend = Val(:html),
255+
compact_printing = compact_printing,
256+
formatters = (_pretty_tables_general_formatter,),
257+
header = (names_str, types_str),
258+
header_alignment = :l,
259+
header_cell_titles = (nothing, types_str_complete),
260+
highlighters = (_PRETTY_TABLES_HTML_HIGHLIGHTER,),
261+
max_num_of_columns = mxcol,
262+
max_num_of_rows = mxrow,
263+
maximum_columns_width = max_column_width,
264+
minify = true,
265+
row_label_column_title = "Row",
266+
row_labels = row_labels,
267+
row_number_alignment = :r,
268+
row_number_column_title = "Row",
269+
show_omitted_cell_summary = true,
270+
show_row_number = show_row_number,
271+
show_subheader = eltypes,
272+
standalone = false,
273+
table_class = "data-frame",
274+
table_div_class = "data-frame",
275+
table_style = _PRETTY_TABLES_HTML_TABLE_STYLE,
276+
top_left_str = String(title),
277+
top_right_str_decoration = HtmlDecoration(font_style = "italic"),
278+
vcrop_mode = :middle,
279+
wrap_table_in_div = true,
280+
kwargs...)
281+
282+
return nothing
266283
end
267284

268-
function Base.show(io::IO, mime::MIME"text/html", dfr::DataFrameRow;
269-
summary::Bool=true, eltypes::Bool=true)
285+
function Base.show(io::IO, mime::MIME"text/html", dfr::DataFrameRow; kwargs...)
286+
_verify_kwargs_for_html(; kwargs...)
270287
r, c = parentindices(dfr)
271-
summary && write(io, "<p>DataFrameRow ($(length(dfr)) columns)</p>")
272-
_show(io, mime, view(parent(dfr), [r], c), summary=false, eltypes=eltypes, rowid=r)
288+
title = "DataFrameRow ($(length(dfr)) columns)"
289+
_show(io, mime, view(parent(dfr), [r], c); rowid=r, title=title, kwargs...)
273290
end
274291

275-
function Base.show(io::IO, mime::MIME"text/html", dfrs::DataFrameRows;
276-
summary::Bool=true, eltypes::Bool=true)
292+
function Base.show(io::IO, mime::MIME"text/html", dfrs::DataFrameRows; kwargs...)
293+
_verify_kwargs_for_html(; kwargs...)
277294
df = parent(dfrs)
278-
summary && write(io, "<p>$(nrow(df))×$(ncol(df)) DataFrameRows</p>")
279-
_show(io, mime, df, summary=false, eltypes=eltypes)
295+
title = "$(nrow(df))×$(ncol(df)) DataFrameRows"
296+
_show(io, mime, df; title=title, kwargs...)
280297
end
281298

282-
function Base.show(io::IO, mime::MIME"text/html", dfcs::DataFrameColumns;
283-
summary::Bool=true, eltypes::Bool=true)
299+
function Base.show(io::IO, mime::MIME"text/html", dfcs::DataFrameColumns; kwargs...)
300+
_verify_kwargs_for_html(; kwargs...)
284301
df = parent(dfcs)
285-
if summary
286-
write(io, "<p>$(nrow(df))×$(ncol(df)) DataFrameColumns</p>")
287-
end
288-
_show(io, mime, df, summary=false, eltypes=eltypes)
302+
title = "$(nrow(df))×$(ncol(df)) DataFrameColumns"
303+
_show(io, mime, df; title=title, kwargs...)
289304
end
290305

291306
function Base.show(io::IO, mime::MIME"text/html", gd::GroupedDataFrame)
@@ -298,31 +313,42 @@ function Base.show(io::IO, mime::MIME"text/html", gd::GroupedDataFrame)
298313
nrows = size(gd[1], 1)
299314
rows = nrows > 1 ? "rows" : "row"
300315

301-
identified_groups = [html_escape(string(col, " = ",
302-
repr(first(gd[1][!, col]))))
316+
identified_groups = [string(col, " = ", repr(first(gd[1][!, col])))
303317
for col in gd.cols]
304318

305-
write(io, "<p><i>First Group ($nrows $rows): ")
306-
join(io, identified_groups, ", ")
307-
write(io, "</i></p>")
308-
show(io, mime, gd[1], summary=false)
319+
title = "First Group ($nrows $rows): " * join(identified_groups, ", ")
320+
_show(io, mime, gd[1], title=title)
309321
end
310322
if N > 1
311323
nrows = size(gd[N], 1)
312324
rows = nrows > 1 ? "rows" : "row"
313325

314-
identified_groups = [html_escape(string(col, " = ",
315-
repr(first(gd[N][!, col]))))
326+
identified_groups = [string(col, " = ", repr(first(gd[N][!, col])))
316327
for col in gd.cols]
317328

318329
write(io, "<p>&vellip;</p>")
319-
write(io, "<p><i>Last Group ($nrows $rows): ")
320-
join(io, identified_groups, ", ")
321-
write(io, "</i></p>")
322-
show(io, mime, gd[N], summary=false)
330+
title = "Last Group ($nrows $rows): " * join(identified_groups, ", ")
331+
_show(io, mime, gd[N], title=title)
323332
end
324333
end
325334

335+
# Internal function to verify the keywords in show functions using the HTML
336+
# backend.
337+
function _verify_kwargs_for_html(; kwargs...)
338+
haskey(kwargs, :rowid) &&
339+
throw(ArgumentError("Keyword argument `rowid` is reserved and must not be used."))
340+
341+
haskey(kwargs, :title) &&
342+
throw(ArgumentError("Use the `top_left_str` keyword argument instead of `title` " *
343+
"to change the label above the data frame."))
344+
345+
haskey(kwargs, :truncate) &&
346+
throw(ArgumentError("`truncate` is not supported in HTML. " *
347+
"Use `max_column_width` to limit the size of the columns in this case."))
348+
349+
return nothing
350+
end
351+
326352
##############################################################################
327353
#
328354
# LaTeX output

0 commit comments

Comments
 (0)