Skip to content

feat: render READMEs in ORG format, better fallback if not MD #211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@ HTMLSanitizer = "9a15a9f4-ddd5-46ee-89fc-c219f813dd6f"
Highlights = "eafb193a-b7ab-5a9e-9068-77385905fa72"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
Org = "587fedb0-ad84-11e9-2bd6-d15ea4be1f9e"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[compat]
AbstractTrees = "0.2, 0.3"
Documenter = "0.24, 0.25, 0.26, 0.27"
GitHub = "5.1, 5.2, 5.3, 5.4"
GitForge = "0.4"
GitHub = "5.1, 5.2, 5.3, 5.4"
GithubMarkdown = "0.2"
Gumbo = "0.8"
HTMLSanitizer = "0.2"
Expand Down
84 changes: 65 additions & 19 deletions src/builders.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using HTMLSanitizer
using Highlights
using Downloads
using Documenter
import Org
function build_git_docs(packagespec, buildpath, uri; src_prefix="", href_prefix="")
pkgname = packagespec.name
return mktempdir() do dir
Expand Down Expand Up @@ -242,7 +243,11 @@ function build_readme_docs(pkgname, pkgroot, docsdir, mod, src_prefix, href_pref
mkpath(doc_src)
index = joinpath(doc_src, "index.md")

render_html(readme, index, src_prefix, href_prefix; documenter = true)
if isnothing(readme)
@error "Readme missing, skipping generation." pkgroot
else
render_html(readme, index, src_prefix, href_prefix; documenter = true)
end

if !isfile(index)
open(index, "w") do io
Expand Down Expand Up @@ -285,7 +290,7 @@ function find_readme(pkgroot)
allfiles = readdir(pkgroot)
# look for readme.md/readme first
for file in allfiles
if lowercase(file) in ("readme.md", "readme")
if lowercase(file) in ("readme.md", "readme", "readme.org")
readme = joinpath(pkgroot, file)
if isfile(readme)
return readme
Expand Down Expand Up @@ -387,27 +392,66 @@ function copy_package_source(packagespec, buildpath)
end
end

function render_html(input, output, src_prefix="", href_prefix=""; documenter = false)
if input === nothing
@error("Package doesn't seem to have a readme. ")

return
abstract type ReadmeFormat end
struct GFMFormat <: ReadmeFormat end
struct OrgFormat <: ReadmeFormat end
struct TextFormat <: ReadmeFormat end
struct UnknownFormat <: ReadmeFormat end

function readme_format(readme_path::AbstractString)
_, ext = splitext(readme_path)
ext = lowercase(ext)
return if ext == ".md"
GFMFormat()
elseif ext == ".org"
OrgFormat()
elseif ext == ""
TextFormat()
else
UnknownFormat()
end
end

function render_html(::GFMFormat, readme_path::AbstractString)
io = IOBuffer()
GithubMarkdown.rendergfm(io, input; documenter = false)
GithubMarkdown.rendergfm(io, readme_path; documenter = false)
return String(take!(io))
end

function render_html(::OrgFormat, readme_path::AbstractString)
doc = parse(Org.OrgDoc, read(readme_path, String))
return sprint(show, "text/html", doc)
end

function render_html(format::Union{TextFormat,UnknownFormat}, readme_path::AbstractString)
out = IOBuffer()
if isa(format, UnknownFormat)
write(out, """
<strong><pre>WARNING! Unknown README file format: $(escapehtml(basename(readme_path)))</pre></strong>
""")
end
write(out, "<pre>")
write(out, escapehtml(read(readme_path, String)))
write(out, "</pre>")
return String(take!(out))
end

function render_html(
input::AbstractString,
output::AbstractString,
src_prefix::AbstractString="",
href_prefix::AbstractString="";
documenter::Bool = false
)
allow_class_on_code = deepcopy(HTMLSanitizer.WHITELIST)
allow_class_on_code[:attributes]["code"] = ["class"]

out = sanitize(String(take!(io)), prettyprint = false, whitelist = allow_class_on_code)

readme_html = render_html(readme_format(input), input)
out = sanitize(readme_html, prettyprint = false, whitelist = allow_class_on_code)
out = postprocess_html_readme(out; src_prefix = src_prefix, href_prefix = href_prefix)
out = out.children[2]

print(io, out)
out = String(take!(io))

out = sprint(print, out)
out = replace(out, r"^<body>" => "")
out = replace(out, r"</body>$" => "")

Expand All @@ -421,15 +465,17 @@ end
function render_readme_html(pkgroot, buildpath, src_prefix="", href_prefix=""; documenter = false)
outpath = joinpath(buildpath, "_readme")
try
readme = find_readme(pkgroot)

mkpath(outpath)
readmehtml = joinpath(outpath, "readme.html")

@info("Rendering readme to HTML.")
render_html(readme, readmehtml, src_prefix, href_prefix; documenter = false)
@info("Done rendering readme to HTML.")

readme = find_readme(pkgroot)
if isnothing(readme)
@error "Readme missing, skipping generation." pkgroot
else
@info("Rendering readme to HTML.")
render_html(readme, readmehtml, src_prefix, href_prefix; documenter = false)
@info("Done rendering readme to HTML.")
end
return readmehtml
catch err
@error("Error trying to render readme to HTML.", exception=err)
Expand Down
32 changes: 32 additions & 0 deletions src/utils/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,35 @@ function find_free_x_servernum()

return i
end

# Borrowed from Documenter.jl (MIT)
#
# https://github.com/JuliaDocs/Documenter.jl/blob/5dafb6488f90d173ca4fcfeead0396332bdc6de6/src/utilities/DOM.jl#L269-L296
"""
Escape characters in the provided string. This converts the following characters:

- `<` to `&lt;`
- `>` to `&gt;`
- `&` to `&amp;`
- `'` to `&#39;`
- `\"` to `&quot;`

When no escaping is needed then the same object is returned, otherwise a new
string is constructed with the characters escaped. The returned object should
always be treated as an immutable copy and compared using `==` rather than `===`.
"""
function escapehtml(text::AbstractString)
if occursin(r"[<>&'\"]", text)
buffer = IOBuffer()
for char in text
char === '<' ? write(buffer, "&lt;") :
char === '>' ? write(buffer, "&gt;") :
char === '&' ? write(buffer, "&amp;") :
char === '\'' ? write(buffer, "&#39;") :
char === '"' ? write(buffer, "&quot;") : write(buffer, char)
end
String(take!(buffer))
else
text
end
end
3 changes: 3 additions & 0 deletions test/fixtures/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This is a plaintext README

This should render just as plaintext (<pre>).
4 changes: 4 additions & 0 deletions test/fixtures/README.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<pre># This is a plaintext README

This should render just as plaintext (&lt;pre&gt;).
</pre>
100 changes: 100 additions & 0 deletions test/fixtures/readme.ORG
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#+title: Org.jl
#+author: tecosaur

#+html: <img src="org-mode-jl.svg" align="right">

A library for working with Org files. Specifically, this provides utilities for:

+ Parsing Org text to an AST (Abstract Syntax Tree)
+ Regenerating Org text from an AST
+ Basic manipulation and analysis of an Org document AST
+ Generating basic representations Org content in the terminal, and a few other forms.

With the exception of some of the /particularly fancy/ capabilities provided by
=org-mode= (like Babel and Calc-based spreadsheeting), this project aims to
exactly match the interpretation of Org mode markup --- specifically the AST
generated by =org-element.el=. This goal is not yet achieved, however
the bulk of the work is now complete.

*Org.jl* implements the vast majority of the [[https://orgmode.org/worg/dev/org-syntax.html][org-syntax]] document (see the
Progress table). This can be checked by looking at ~Org.compatability~ in
Julia.

* Basic usage

#+begin_src julia
# after installing with ~] add Org~ or ~Pkg.add("Org")~
using Org
text1 = org"Some *Org* markup, written with ease using the ~org\"\"~ macro."
parsetree(text1) # show the generated parse tree

text2 = parse(OrgDoc, "Some *Org* markup, written with ease using the ~parse~ function.")
diff(text1, text2) # show the components of the parse trees that differ

dochead = @doc Org.Heading # the documentation for the Heading component (::OrgDoc)
org(dochead) # generate Org text that produces the Org.Heading object
string(dochead) # as above, but produces a String

parse(OrgDoc, string(dochead)) == dochead # round-trip equality

# get the lang of each source block
[c.lang for c in dochead.components if c isa Org.SourceBlock]
#+end_src

* Progress

| Component | Type | Parse | Org | Term | HTML |
|---------------------+------+-------+-----+------+------|
| Heading | X | X | X | X | X |
| Section | X | X | X | X | X |
|---------------------+------+-------+-----+------+------|
| Affiliated Keywords | X | X | X | X | |
|---------------------+------+-------+-----+------+------|
| GreaterBlock | X | X | X | X | X |
| Drawer | X | X | X | X | X |
| DynamicBlock | X | X | X | X | X |
| FootnoteDefinition | X | X | X | X | |
| InlineTask | X | | | | |
| Item | X | X | X | X | X |
| List | X | X | X | X | X |
| PropertyDrawer | X | X | X | X | X |
| Table | X | X | X | X | X |
|---------------------+------+-------+-----+------+------|
| BabelCall | X | X | X | X | X |
| Block | X | X | X | X | |
| Clock | X | X | X | X | |
| DiarySexp | X | X | X | X | |
| Planning | X | X | X | X | |
| Comment | X | X | X | X | X |
| FixedWidth | X | X | X | X | X |
| HorizontalRule | X | X | X | X | X |
| Keyword | X | X | X | X | |
| LaTeXEnvironment | X | X | X | X | |
| NodeProperty | X | X | X | X | X |
| Paragraph | X | X | X | X | X |
| TableRow | X | X | X | X | X |
| TableHRule | X | X | X | X | X |
| BlankLine | X | X | X | X | X |
|---------------------+------+-------+-----+------+------|
| OrgEntity | X | X | X | X | X |
| LaTeXFragment | X | X | X | X | |
| ExportSnippet | X | X | X | X | X |
| FootnoteReference | X | X | X | X | X |
| InlineBabelCall | X | X | X | X | X |
| InlineSrcBlock | X | X | X | X | X |
| RadioLink | X | X | X | X | X |
| PlainLink | X | X | X | X | X |
| AngleLink | X | X | X | X | X |
| RegularLink | X | X | X | X | X |
| LineBreak | X | X | X | X | X |
| Macro | X | X | X | X | X |
| Citation | X | X | X | X | X |
| RadioTarget | X | X | X | X | X |
| Target | X | X | X | X | X |
| StatisticsCookie | X | X | X | X | X |
| Subscript | X | X | X | X | X |
| Superscript | X | X | X | X | X |
| TableCell | X | X | X | X | X |
| Timestamp | X | X | X | X | X |
| TextPlain | X | X | X | X | X |
| TextMarkup | X | X | X | X | X |
Loading