From 7b89268f5d87a42e9dc01d33e3c21e7ca9f06f76 Mon Sep 17 00:00:00 2001 From: Cuihtlauac ALVARADO Date: Mon, 5 Jun 2023 18:59:41 +0200 Subject: [PATCH] Vendoring Omd 1.3.2 This removes the dependency on omd.1.3.2 and add the corresponding sources, without tests. Requiring to have omd strictly before 2.0.0~alpha1 is a foot gun for newbies. ocaml-lsp-server is among the first package newbies have to install. In that process, old omd 1.3.2 gets installed. Later, when starting to code with Omd, they either don't understand why their installed API doesn't match the publicly documented one or start coding with an obsolete API. --- dune-project | 1 - ocaml-lsp-server.opam | 1 - ocaml-lsp-server/src/omd/ABOUT.md | 241 + ocaml-lsp-server/src/omd/CHANGES.md | 117 + ocaml-lsp-server/src/omd/Makefile | 20 + ocaml-lsp-server/src/omd/README.md | 160 + ocaml-lsp-server/src/omd/bmd/README.md | 20 + ocaml-lsp-server/src/omd/dune-project | 28 + ocaml-lsp-server/src/omd/omd.opam | 44 + ocaml-lsp-server/src/omd/setup.ml | 41 + ocaml-lsp-server/src/omd/src/dune | 16 + .../src/omd/src/html_characters.ml | 1827 +++++++ .../src/omd/src/implementation_notes.md | 113 + ocaml-lsp-server/src/omd/src/omd.ml | 170 + ocaml-lsp-server/src/omd/src/omd.mli | 165 + ocaml-lsp-server/src/omd/src/omd_backend.ml | 1225 +++++ ocaml-lsp-server/src/omd/src/omd_backend.mli | 97 + ocaml-lsp-server/src/omd/src/omd_html.ml | 61 + ocaml-lsp-server/src/omd/src/omd_lexer.ml | 399 ++ ocaml-lsp-server/src/omd/src/omd_lexer.mli | 45 + ocaml-lsp-server/src/omd/src/omd_lexer_fs.ml | 23 + ocaml-lsp-server/src/omd/src/omd_lexer_fs.mli | 10 + ocaml-lsp-server/src/omd/src/omd_main.ml | 448 ++ ocaml-lsp-server/src/omd/src/omd_main.mli | 75 + ocaml-lsp-server/src/omd/src/omd_parser.ml | 4459 +++++++++++++++++ ocaml-lsp-server/src/omd/src/omd_parser.mli | 379 ++ .../src/omd/src/omd_representation.ml | 502 ++ .../src/omd/src/omd_representation.mli | 188 + ocaml-lsp-server/src/omd/src/omd_utils.ml | 310 ++ ocaml-lsp-server/src/omd/src/omd_utils.mli | 118 + ocaml-lsp-server/src/omd/src/omd_xtxt.ml | 28 + ocaml-lsp-server/src/omd/src/omd_xtxt.mli | 9 + 32 files changed, 11338 insertions(+), 2 deletions(-) create mode 100644 ocaml-lsp-server/src/omd/ABOUT.md create mode 100644 ocaml-lsp-server/src/omd/CHANGES.md create mode 100644 ocaml-lsp-server/src/omd/Makefile create mode 100644 ocaml-lsp-server/src/omd/README.md create mode 100644 ocaml-lsp-server/src/omd/bmd/README.md create mode 100644 ocaml-lsp-server/src/omd/dune-project create mode 100644 ocaml-lsp-server/src/omd/omd.opam create mode 100644 ocaml-lsp-server/src/omd/setup.ml create mode 100644 ocaml-lsp-server/src/omd/src/dune create mode 100644 ocaml-lsp-server/src/omd/src/html_characters.ml create mode 100644 ocaml-lsp-server/src/omd/src/implementation_notes.md create mode 100644 ocaml-lsp-server/src/omd/src/omd.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd.mli create mode 100644 ocaml-lsp-server/src/omd/src/omd_backend.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd_backend.mli create mode 100644 ocaml-lsp-server/src/omd/src/omd_html.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd_lexer.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd_lexer.mli create mode 100644 ocaml-lsp-server/src/omd/src/omd_lexer_fs.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd_lexer_fs.mli create mode 100644 ocaml-lsp-server/src/omd/src/omd_main.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd_main.mli create mode 100644 ocaml-lsp-server/src/omd/src/omd_parser.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd_parser.mli create mode 100644 ocaml-lsp-server/src/omd/src/omd_representation.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd_representation.mli create mode 100644 ocaml-lsp-server/src/omd/src/omd_utils.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd_utils.mli create mode 100644 ocaml-lsp-server/src/omd/src/omd_xtxt.ml create mode 100644 ocaml-lsp-server/src/omd/src/omd_xtxt.mli diff --git a/dune-project b/dune-project index 8420d9c41..2ca07e928 100644 --- a/dune-project +++ b/dune-project @@ -59,7 +59,6 @@ possible and does not make any assumptions about IO. (ppx_expect (and (>= v0.15.0) :with-test)) (ocamlformat (and :with-test (= 0.24.1))) (ocamlc-loc (and (>= 3.5.0) (< 3.7.0))) - (omd (and (>= 1.3.2) (< 2.0.0~alpha1))) (octavius (>= 1.2.2)) (uutf (>= 1.0.2)) (pp (>= 1.1.2)) diff --git a/ocaml-lsp-server.opam b/ocaml-lsp-server.opam index 8ef8542f2..c3198dbf1 100644 --- a/ocaml-lsp-server.opam +++ b/ocaml-lsp-server.opam @@ -35,7 +35,6 @@ depends: [ "ppx_expect" {>= "v0.15.0" & with-test} "ocamlformat" {with-test & = "0.24.1"} "ocamlc-loc" {>= "3.5.0" & < "3.7.0"} - "omd" {>= "1.3.2" & < "2.0.0~alpha1"} "octavius" {>= "1.2.2"} "uutf" {>= "1.0.2"} "pp" {>= "1.1.2"} diff --git a/ocaml-lsp-server/src/omd/ABOUT.md b/ocaml-lsp-server/src/omd/ABOUT.md new file mode 100644 index 000000000..5a5e972d6 --- /dev/null +++ b/ocaml-lsp-server/src/omd/ABOUT.md @@ -0,0 +1,241 @@ + + +About [OMD](https://github.com/pw374/omd/) +========================================== + +The implementation of this library and command-line tool +is based on [DFMSD][]. +That description doesn't define a grammar but a sort of guide for +human users who are not trying to implement it. In other words, +it's ambiguous, which is a problem since there are no errors in the +Markdown language, which design is mostly based on some +email-writing experience: the meaning of a phrase is the meaning +a human would give when reading the phrase as some email contents. +For instance, if there are blank lines that have spaces +(lines that read empty but actually contain some characters, from +the computer point of view since spaces are represented by characters), +since they're invisible to the normal human reader, they should be ignored. + + +Specificities +------------- + +There follows a list of specificities of OMD. +This list is probably not exhaustive. + +**Please note that OMD's semantics have changed over time, but they are becoming +more and more stable with time and new releases. The goal is to eventually +have a semantics that's as sane as it can possibly be for a Markdown parser. +Please [browse and open issues](https://github.com/pw374/omd/issues/) +if you find something that seems wrong.** + +- Email addresses encoding: email addresses are not hex entity-encoded. + +- `[foo]` is a short-cut for `[foo][]`, but if `foo` is not a reference + then `[foo]` is printed `[foo]`, not `[foo][]`. + *(Taken from Github Flavour Markdown.)* + +- The Markdown to Markdown conversion may performe + some cleaning (some meaningless characters may disappear) + or spoiling (some meaningless characters may appear), + but both inputs and ouputs should have the same semantics (otherwise + please do report the bug). + +- A list containing at least one item which has at least one paragraph + is a list for which all items have paragraphs and/or blocks. + In HTML words, in practice, if an `li` of a `ul` or `ol` has a `p`, + then all other `li`s of that list have at least a `p` or a `pre`. + +- It's not possible to emphasise a part of a word using underscores. + *(Taken from Github Flavour Markdown.)* + +- A code section declared with at least 3 backquotes (`` ` ``) at the + first element on a line is a code block. The backquotes should be + followed by a language name (made of a-z characters) or by a newline. + +- A code block starting with several backquotes (e.g., ```` ``` ````) + immediately followed by a word W made of a-z characters is a code block + for which the code language is W. (If you use other characters than + a-z, the semantics is currently undefined although it's deterministic + of course, because it may change in the near future.) Also, if you use + the command line tool `omd`, you can define programs to process code + blocks specifically to the languages that are declared for those code + blocks. + +- Each and every tabulation is converted by OMD to 4 spaces at the lexing + step. And the behaviour of the parser is undefined for tabulations. + - Note that it does mean that if you have a document with some code written + using the + [Whitespace](http://en.wikipedia.org/wiki/Whitespace_(programming_language)) + language, it will not work very well. This might be fixed in the future + but unless you have a very good reason for OMD to support tabulations, + it will probably not. + +- Parentheses and square brackets are generally parsed in a way such that + `[a[b]](http://c/(d))` is the URL `http://c/(d)` with the text `a[b]`. + If you want a parenthesis or bracket not to count in the balanced parsing, + escape it with a backslash, such as in `[a\[b](http://c/\(d)`. + *This is typically something that's not defined in [DFMSD].* + - Note about backslashes in URLs: some web browsers (e.g., Safari) + automatically convert `\` to `/`. It's not the case of CURL. + However I assume it's safe to consider that backslashes are not + to be used in URLs. Still it's always possible to + backslashe-escape them anyways. + +- HTML is somewhat a part of Markdown. OMD will partially parse HTML tags + and if you have a tag that isn't a known HTML tag, then it's possible + that OMD will not consider it as HTML. For instance, a document + containing just `` will be converted to + `

<foo></foo>

`. + - It's possible to ask `omd` to relax this constraint. + +- Some additional features are available on the command line. + For more information, used the command `omd -help` + + + +[DFMSD]: http://daringfireball.net/projects/markdown/syntax + "John Gruber's description of the syntax of Markdown" + +"DFMSD" is short for "Daring Fireball: Markdown Syntax Documentation", +which is the HTML title of the page located at +. + +Extension mechanisms +-------------------- + +The parser is implemented using a big (very big) recursive function +(`Omd_parser.Make(Env).main_loop_rev`), with a set of some auxiliary +functions. Some parts are easy to understand, some parts are +not. However, overall, it should be easy enough. + + +The parser has a double extension mechanism. + +1. To use the first mechanism, you may define a set of functions in +the module `Env` given to instanciate the functor `Omd_parser.Make`. + * The value `Env.extensions` is a list of elements of + type `Omd_representation.extension` which is equal to + `r -> p -> l -> (r * p * l) option` where + * `r = Omd_representation.t` + and represents the result of the parsing process, + * `p = Omd_representation.tok list` + and represents the tokens preceding `l`, + * and `l = tok list` and is the list of tokens to parse. + * The result, of type `(r * p * l) option`, is `None` if + the extension has no effect (and the parser will continue + doing its job with its state it had before using the + extension), and is `Some(r,p,l)` when it gives a new set of + data to the parser. + * Each element of the list `Env.extensions` is applied in a fold left + manner. (The first element of that list is applied first.) + * And they are applied when a standard parsing rule fails. + +2. The second extension stands in the representation of the lexemes + (`Tag of string * extension`). + It allows to insert extensions directly into the lexeme list. + +The Markdown representation also provides an extension mechanism, +which is useful if you want to insert “smart objects” (which are as +“smart” as smartphones). Those objects have four methods, 2 of them +are particularly useful: `to_html` and `to_t`, and implementing one +of them is necessary. They both return a `string option`, and a default +dummy such smart object can be defined as follows: + +```ocaml +let dummy = + X (object + method name = "dummy" + method to_html ?(indent=0) _ _ = None + method to_sexpr _ _ = None + method to_t _ = None + end) +``` + + + +History +------- + +OMD has been developed by [Philippe Wang](https://github.com/pw374/) +at [OCaml Labs](http://ocaml.io/) in [Cambridge](http://www.cl.cam.ac.uk), +with precious feedbacks and [pull requests](https://github.com/pw374/omd/pulls) +(cf. next section). + +Its development was motivated by at least these facts: + +- We wanted an OCaml implementation of Markdown; some OCaml parsers of + Markdown existed before but they were incomplete. It's easier for an + OCaml project to depend on an pure-OCaml implementation of Markdown than + to depend some interface to a library implemented using another language, + and this is ever more important since [Opam](https://opam.ocaml.org) exists. + +- We wanted to provide a way to make the contents of + the [OCaml.org](http://ocaml.org/) website be essentially in Markdown + instead of HTML. And we wanted to this website to be implemented in + OCaml. + +- Having an OCaml implementation of Markdown is virtually mandatory for + those who want to use a Markdown parser in + a [Mirage](http://www.openmirage.org) application. + Note that OMD has replaced the previous Markdown parser of + [COW](https://github.com/mirage/ocaml-cow), which has been developed + as part of the Mirage project. + + + +Thanks +------ + +Thank you to +[Christophe Troestler](https://github.com/Chris00), +[Ashish Argawal](https://github.com/agarwal), +[Sebastien Mondet](https://github.com/smondet), +[Thomas Gazagnaire](https://github.com/samoht), +[Daniel Bünzli](https://github.com/dbuenzli), +[Amir Chaudry](https://github.com/amirmc), +[Anil Madhavapeddy](https://github.com/avsm/), +[David Sheets](https://github.com/dsheets/), +[Jeremy Yallop](https://github.com/yallop/), +and \ +for their feedbacks and contributions to this project. + + + +Miscellaneous notes +------------------- + +- There's been absolutely no effort in making OMD fast, but it should be + amongst the fastest parsers of Markdown, just thanks to the fact that + it is implemented in OCaml. That being said, there's quite some room + for performance improvements. One way would be to make a several-pass + parser with different intermediate representations (there're currently + only 2 representations: one for the lexing tokens and one for the parse + tree). + +- The hardest part of implementing a parser of Markdown is the process + of understanding and unravelling the grammar of Markdown to turn it into + a program. + +- OMD 1.0.0 will probably use some external libraries, + e.g., [UUNF](http://erratique.ch/software/uunf) + and perhaps [Xmlm](http://erratique.ch/software/xmlm/doc/Xmlm) + + +- "OMD" is the name of this library and command-line tool. + - It might be written "Omd" or "omd" sometimes, but it should + be written using capital letters because it should be read + `əʊ ɛm diː` rather than `ə'md` or `ˌɒmd`. + +- "`Omd`" is a module. + - It's written using monospace font and it's capitalized. + +- "`omd`" is a command-line tool. + - It's written using monospace font and it's always lowercase letters only + because unless you have a non-sensitive file system, calling `Omd` on the + command line is not just another way of calling `omd`. + +- OMD has been added on the quite long list of Markdown parsers + + on the 29th of January. + diff --git a/ocaml-lsp-server/src/omd/CHANGES.md b/ocaml-lsp-server/src/omd/CHANGES.md new file mode 100644 index 000000000..06221a103 --- /dev/null +++ b/ocaml-lsp-server/src/omd/CHANGES.md @@ -0,0 +1,117 @@ +# Document Title + +1.3.2 +------ + +- port from oasis to dune (#273, @tmattio) + +1.3.x +----- + +- might stop checking validity of HTML tag *names* and accept any XML-parsable + tag name. + +1.2.5 +----- + +- only fixes a single bug (an ordered list could be transformed into an + unordered list) + +1.2.4 +----- + +- only fixes a single bug (some spaces were wrongly handled in the HTML parsing) + +1.2.2/3 +------- + +- fix a few issues with HTML parsing. + +1.2.1 +----- + +- mainly fixes issues with HTML parsing. + +1.2.0 +----- + +- introduces options `-w` and `-W`. Fixes mostly concern subtle uses of `\n`s in + HTML and Markdown outputs. + +1.1.2 +----- + +- fix: some URL-related parsing issues. + +1.1.0/1.1.1 +----------- + +- fix: some HTML-related issues. + +1.0.1 +----- + +- fixes some parsing issues, improves output. (2014-10-02) + +1.0.0 +----- + +- warning: this release is only partially compatible with previous versions. + +- accept HTML blocks which directly follow each other + +- fix: accept all XML-compatible attribute names for HTML + attributes + +- fix backslash-escaping for hash-ending ATX-titles + fix Markdown output for + Html_block + +- fix (HTML parsing) bugs introduced in 1.0.0.b and 1.0.0.c + +- rewrite parser of block HTML to use the updated Omd.t + +- rewrite parser of inline HTML to use the updated Omd.t + +- upgrade Omd.t for HTML representation + +There will not be any newer 0.9.x release although new bugs have been +discovered. Thus it's recommended to upgrade to the latest 1.x.y. + +0.9.7 +----- + +- introduction of media:end + bug fixes. + +If you need to have a version that still has `Tag of extension` instead of `Tag +of name * extension` and don't want to upgrade, you may use 0.9.3 + +0.9.6 +----- + +- fix a bug (concerning extensions) introduced by 0.9.4. + +0.9.5 +----- + +- bug fix + `Tag of extension` changed to `Tag of name * extension` + +0.9.4 +----- + +- fixes a bug for the new feature + +0.9.3 +----- + +- new feature `media:type="text/omd"`. This version is recommended if you do + not use that new feature and want to use 0.9.x + +0.9.2 +----- + +- not released... + +older versions +-------------- + +- cf. [commit log](https://github.com/ocaml/omd/commits/master) diff --git a/ocaml-lsp-server/src/omd/Makefile b/ocaml-lsp-server/src/omd/Makefile new file mode 100644 index 000000000..f8a8c3621 --- /dev/null +++ b/ocaml-lsp-server/src/omd/Makefile @@ -0,0 +1,20 @@ +## +# Omd +# +# @file + +.PHONY: test build fmt deps + +build: deps + dune build + +deps: + opam install . --deps-only --yes + +test: + dune build @gen --auto-promote + dune runtest + +fmt: + dune build @fmt --auto-promote +# end diff --git a/ocaml-lsp-server/src/omd/README.md b/ocaml-lsp-server/src/omd/README.md new file mode 100644 index 000000000..6e04abb8f --- /dev/null +++ b/ocaml-lsp-server/src/omd/README.md @@ -0,0 +1,160 @@ +OMD: extensible Markdown library and tool in OCaml +================================================== + +OMD provides two things: + +1. the command-line tool `omd`, which takes some Markdown and + converts it to HTML or Markdown. + + Use `omd -help` for more information on how to use it. + +2. the library for OCaml contains several modules: + - the module `Omd` contains most functions a user will need for basic + Markdown manipulation. + - the modules `Omd_parser`, `Omd_lexer`, `Omd_backend`, `Omd_representation` and `Omd_utils` basically implement what their names say: + * `Omd_parser` implements the parser (the most complex part). + * `Omd_lexer` implements a (basic) lexer. + * `Omd_backend` implements 3 backends: + 1. HTML: default backend. + 2. Markdown: sometimes it's useful to show that + the fix-point is easily reachable. + 3. S-expression: it's mainly used for debugging. + * `Omd_representation` declares the datatypes used in `Omd`. + It also provides some functions to work on those datatypes. + * `Omd_utils` provides some useful tools that are not very specific + to the OMD-specific datatypes. + + +OMD aims at implementing the ["original Markdown +specs"](http://daringfireball.net/projects/markdown/syntax) with a few +Github Flavour Markdown characteristics. OMD is also meant to be more +"sane" than other Markdown parsers from the semantics point of view: if +something bothers you from the semantics point of view, please [open an +issue on Github](https://github.com/ocaml/omd/issues). + + +Encoding +-------- + +**OMD assumes its input is US-ASCII or UTF-8 encoded.** + +Dependencies +------------ + +OMD is implemented in OCaml, therefore it needs it to be compiled. +OCaml 4.00.1 and then 4.01.0 have been used. OMD should be compatible +with 3.12.0 as well, if it's not then please [open an +issue](https://github.com/ocaml/omd/issues). + + +The opam package for OMD depends on ocamlfind, which is only +used to compile and install OMD. + +The root Makefile uses oasis, ocamlbuild and oasis2opam. +The Makefile in src/ only use the compilers from the standard +distribution of OCaml. + +OMD, compiled as a library and/or a tool, doesn't depend on +anything other than the OCaml standard library and runtime. + +---------------- + +Usage +----- + +- to install `omd` using opam (recommended) + + `opam install omd` + +- to get the development version of omd + + `git clone git://github.com/ocaml/omd.git` + +- to compile `omd` + - without `oasis` nor `ocamlbuild` + + `cd omd/src && make` + + - using `oasis` and `ocamlbuild` + + `cd omd && make` + + +---------------- + +Log +--- + +The recommended version numbers are typefaced in **bold**. +As new releases come out and bugs are discovered, a version can stop +being recommended. + +Version numbers are trying to follow this scheme: +`x.y.z`, `z` is is for minor changes, `y` may include +algorithm, interface or editorial policy changes, +and `x` is for deeper changes. + +- 1.3.x might stop checking validity of HTML tag *names* + and accept any XML-parsable tag name. + +- **1.2.5** only fixes a single bug (an ordered list could be transformed into an unordered list) + +- 1.2.4 only fixes a single bug (some spaces were wrongly handled in the HTML parsing) + +- 1.2.2 and 1.2.3 fix a few issues with HTML parsing. + +- 1.2.1 mainly fixes issues with HTML parsing. + +- 1.2.0 introduces options `-w` and `-W`. Fixes mostly concern subtle + uses of `\n`s in HTML and Markdown outputs. + +- 1.1.2: fix: some URL-related parsing issues. + +- 1.1.0 and 1.1.1: fix: some HTML-related issues. + +- 1.0.1: fixes some parsing issues, improves output. (2014-10-02) + +- 1.0.0: warning: this release is only partially compatible with previous versions. + +- tags 1.0.0.x precede 1.0.0. +Also, tags 1.0.0.x will not be released in OPAM, next release will be 1.0.0. +And 1.0.0.x may not be compatible with each other. + +- tag 1.0.0.g: accept HTML blocks which directly follow each other + +- tag 1.0.0.f: fix: accept all XML-compatible attribute names for HTML attributes + +- tag 1.0.0.e: fix backslash-escaping for hash-ending ATX-titles + fix Markdown output for Html_block + +- tag 1.0.0.d: fix (HTML parsing) bugs introduced in 1.0.0.b and 1.0.0.c + +- tag 1.0.0.c: rewrite parser of block HTML to use the updated Omd.t + +- tag 1.0.0.b: rewrite parser of inline HTML to use the updated Omd.t + +- tag 1.0.0.a: upgrade Omd.t for HTML representation + + +There will not be any newer 0.9.x release although new bugs have been +discovered. Thus it's recommended to upgrade to the latest 1.x.y. + +- **0.9.7**: introduction of media:end + bug fixes + + If you need to have a version that still has + `Tag of extension` instead of `Tag of name * extension` and don't want + to upgrade, you may use 0.9.3 + +- 0.9.6: fix a bug (concerning extensions) introduced by 0.9.4. + +- 0.9.5: bug fix + `Tag of extension` changed to `Tag of name * extension` + +- 0.9.4: fixes a bug for the new feature + +- 0.9.3: new feature `media:type="text/omd"`. + + This version is recommended if you do not use that new feature + and want to use 0.9.x + +- 0.9.2: not released... + +- older versions: cf. [commit log](https://github.com/ocaml/omd/commits/master) diff --git a/ocaml-lsp-server/src/omd/bmd/README.md b/ocaml-lsp-server/src/omd/bmd/README.md new file mode 100644 index 000000000..938ea5c81 --- /dev/null +++ b/ocaml-lsp-server/src/omd/bmd/README.md @@ -0,0 +1,20 @@ +This directory contains expressions that **"break"** Markdown. + +For instance: +``` + * Starting a list with 3 spaces and a star. + * Continuing the list. + * 2 spaces and a star: are we still in the same list? +``` + +On Github it renders like this: + + * Starting a list with 3 spaces and a star. + * Continuing the list. + * 2 spaces and a star: are we still in the same list? + + +So you can see that on Github, the star with preceded by less spaces starts an **inner** list, which is kind of very wrong... +Pandoc considers that the 3rd bullet starts the 3rd element of the unique list, which is not so right either. +I'm not blaming those tools, but rather the language. (On second thoughts, I might endup blaming the tools rather than the language.) + diff --git a/ocaml-lsp-server/src/omd/dune-project b/ocaml-lsp-server/src/omd/dune-project new file mode 100644 index 000000000..a31bd66a4 --- /dev/null +++ b/ocaml-lsp-server/src/omd/dune-project @@ -0,0 +1,28 @@ +(lang dune 2.7) +(name omd) +(version 1.3.1) + +(generate_opam_files) + +(license ISC) +(authors "Philippe Wang " + "Nicolás Ojeda Bär ") +(maintainers "Shon Feder " + "Raphael Sousa Santos <@sonologico>") +(source (github ocaml/omd)) + +(package + (name omd) + (synopsis "A Markdown frontend in pure OCaml") + (description + "This Markdown library is implemented using only pure OCaml (including +I/O operations provided by the standard OCaml compiler distribution). +OMD is meant to be as faithful as possible to the original Markdown. +Additionally, OMD implements a few Github markdown features, an +extension mechanism, and some other features. Note that the opam +package installs both the OMD library and the command line tool `omd`.") + (tags (org:ocamllabs org:mirage)) + (depends + (ocaml (>= 4.04)) + base-bigarray + base-bytes)) diff --git a/ocaml-lsp-server/src/omd/omd.opam b/ocaml-lsp-server/src/omd/omd.opam new file mode 100644 index 000000000..95550069b --- /dev/null +++ b/ocaml-lsp-server/src/omd/omd.opam @@ -0,0 +1,44 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "1.3.1" +synopsis: "A Markdown frontend in pure OCaml" +description: """ +This Markdown library is implemented using only pure OCaml (including +I/O operations provided by the standard OCaml compiler distribution). +OMD is meant to be as faithful as possible to the original Markdown. +Additionally, OMD implements a few Github markdown features, an +extension mechanism, and some other features. Note that the opam +package installs both the OMD library and the command line tool `omd`.""" +maintainer: [ + "Shon Feder " "Raphael Sousa Santos <@sonologico>" +] +authors: [ + "Philippe Wang " + "Nicolás Ojeda Bär " +] +license: "ISC" +tags: ["org:ocamllabs" "org:mirage"] +homepage: "https://github.com/ocaml/omd" +bug-reports: "https://github.com/ocaml/omd/issues" +depends: [ + "dune" {>= "2.7"} + "ocaml" {>= "4.04"} + "base-bigarray" + "base-bytes" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/ocaml/omd.git" diff --git a/ocaml-lsp-server/src/omd/setup.ml b/ocaml-lsp-server/src/omd/setup.ml new file mode 100644 index 000000000..389f42279 --- /dev/null +++ b/ocaml-lsp-server/src/omd/setup.ml @@ -0,0 +1,41 @@ +let () = + try Topdirs.dir_directory (Sys.getenv "OCAML_TOPLEVEL_PATH") + with Not_found -> ();; + +(* OASIS_START *) +(* DO NOT EDIT (digest: a426e2d026defb34183b787d31fbdcff) *) +(******************************************************************************) +(* OASIS: architecture for building OCaml libraries and applications *) +(* *) +(* Copyright (C) 2011-2016, Sylvain Le Gall *) +(* Copyright (C) 2008-2011, OCamlCore SARL *) +(* *) +(* This library is free software; you can redistribute it and/or modify it *) +(* under the terms of the GNU Lesser General Public License as published by *) +(* the Free Software Foundation; either version 2.1 of the License, or (at *) +(* your option) any later version, with the OCaml static compilation *) +(* exception. *) +(* *) +(* This library is distributed in the hope that it will be useful, but *) +(* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *) +(* or FITNESS FOR A PARTICULAR PURPOSE. See the file COPYING for more *) +(* details. *) +(* *) +(* You should have received a copy of the GNU Lesser General Public License *) +(* along with this library; if not, write to the Free Software Foundation, *) +(* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) +(******************************************************************************) + +let () = + try + Topdirs.dir_directory (Sys.getenv "OCAML_TOPLEVEL_PATH") + with Not_found -> () +;; +#use "topfind";; +#require "oasis.dynrun";; +open OASISDynRun;; + +let setup_t = BaseCompat.Compat_0_4.adapt_setup_t setup_t +open BaseCompat.Compat_0_4 +(* OASIS_STOP *) +let () = setup ();; diff --git a/ocaml-lsp-server/src/omd/src/dune b/ocaml-lsp-server/src/omd/src/dune new file mode 100644 index 000000000..ac8ead7f9 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/dune @@ -0,0 +1,16 @@ +(library + (name omd) + (wrapped false) + (modules :standard \ omd_main) + (flags + (:standard -w -50-6-27-32-39-33-35)) + (public_name omd) + (libraries + bigarray + bytes)) + +(executable + (name omd_main) + (public_name omd) + (modules omd_main) + (libraries omd)) diff --git a/ocaml-lsp-server/src/omd/src/html_characters.ml b/ocaml-lsp-server/src/omd/src/html_characters.ml new file mode 100644 index 000000000..937ba5541 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/html_characters.ml @@ -0,0 +1,1827 @@ +(* UTF-8 and HTML entities *) +let characters_htmlentities_descriptions = +(* data extracted from http://www.w3schools.com/ + on December, 18th, 2013 *) +[ +" ", +" ", +"space"; +"!", +"!", +"exclamation mark"; +"\"", +""", +"quotation mark"; +"#", +"#", +"number sign"; +"$", +"$", +"dollar sign"; +"%", +"%", +"percent sign"; +"&", +"&", +"ampersand"; +"'", +"'", +"apostrophe"; +"(", +"(", +"left parenthesis"; +")", +")", +"right parenthesis"; +"*", +"*", +"asterisk"; +"+", +"+", +"plus sign"; +",", +",", +"comma"; +"-", +"-", +"hyphen"; +".", +".", +"period"; +"/", +"/", +"slash"; +"0", +"0", +"digit 0"; +"1", +"1", +"digit 1"; +"2", +"2", +"digit 2"; +"3", +"3", +"digit 3"; +"4", +"4", +"digit 4"; +"5", +"5", +"digit 5"; +"6", +"6", +"digit 6"; +"7", +"7", +"digit 7"; +"8", +"8", +"digit 8"; +"9", +"9", +"digit 9"; +":", +":", +"colon"; +";", +";", +"semicolon"; +"<", +"<", +"less-than"; +"=", +"=", +"equals-to"; +">", +">", +"greater-than"; +"?", +"?", +"question mark"; +"@", +"@", +"at sign"; +"A", +"A", +"uppercase A"; +"B", +"B", +"uppercase B"; +"C", +"C", +"uppercase C"; +"D", +"D", +"uppercase D"; +"E", +"E", +"uppercase E"; +"F", +"F", +"uppercase F"; +"G", +"G", +"uppercase G"; +"H", +"H", +"uppercase H"; +"I", +"I", +"uppercase I"; +"J", +"J", +"uppercase J"; +"K", +"K", +"uppercase K"; +"L", +"L", +"uppercase L"; +"M", +"M", +"uppercase M"; +"N", +"N", +"uppercase N"; +"O", +"O", +"uppercase O"; +"P", +"P", +"uppercase P"; +"Q", +"Q", +"uppercase Q"; +"R", +"R", +"uppercase R"; +"S", +"S", +"uppercase S"; +"T", +"T", +"uppercase T"; +"U", +"U", +"uppercase U"; +"V", +"V", +"uppercase V"; +"W", +"W", +"uppercase W"; +"X", +"X", +"uppercase X"; +"Y", +"Y", +"uppercase Y"; +"Z", +"Z", +"uppercase Z"; +"[", +"[", +"left square bracket"; +"\\", +"\", +"backslash"; +"]", +"]", +"right square bracket"; +"^", +"^", +"caret"; +"_", +"_", +"underscore"; +"`", +"`", +"grave accent"; +"a", +"a", +"lowercase a"; +"b", +"b", +"lowercase b"; +"c", +"c", +"lowercase c"; +"d", +"d", +"lowercase d"; +"e", +"e", +"lowercase e"; +"f", +"f", +"lowercase f"; +"g", +"g", +"lowercase g"; +"h", +"h", +"lowercase h"; +"i", +"i", +"lowercase i"; +"j", +"j", +"lowercase j"; +"k", +"k", +"lowercase k"; +"l", +"l", +"lowercase l"; +"m", +"m", +"lowercase m"; +"n", +"n", +"lowercase n"; +"o", +"o", +"lowercase o"; +"p", +"p", +"lowercase p"; +"q", +"q", +"lowercase q"; +"r", +"r", +"lowercase r"; +"s", +"s", +"lowercase s"; +"t", +"t", +"lowercase t"; +"u", +"u", +"lowercase u"; +"v", +"v", +"lowercase v"; +"w", +"w", +"lowercase w"; +"x", +"x", +"lowercase x"; +"y", +"y", +"lowercase y"; +"z", +"z", +"lowercase z"; +"{", +"{", +"left curly brace"; +"|", +"|", +"vertical bar"; +"}", +"}", +"right curly brace"; +"~", +"~", +"tilde"; +"\000", +"�", +"null character"; +"\001", +"", +"start of header"; +"\002", +"", +"start of text"; +"\003", +"", +"end of text"; +"\004", +"", +"end of transmission"; +"\005", +"", +"enquiry"; +"\006", +"", +"acknowledge"; +"\007", +"", +"bell (ring)"; +"\008", +"", +"backspace"; +"\009", +" ", +"horizontal tab"; +"\010", +" ", +"line feed"; +"\011", +" ", +"vertical tab"; +"\012", +" ", +"form feed"; +"\013", +" ", +"carriage return"; +"\014", +"", +"shift out"; +"\015", +"", +"shift in"; +"\016", +"", +"data link escape"; +"\017", +"", +"device control 1"; +"\018", +"", +"device control 2"; +"\019", +"", +"device control 3"; +"\020", +"", +"device control 4"; +"\021", +"", +"negative acknowledge"; +"\022", +"", +"synchronize"; +"\023", +"", +"end transmission block"; +"\024", +"", +"cancel"; +"\025", +"", +"end of medium"; +"\026", +"", +"substitute"; +"\027", +"", +"escape"; +"\028", +"", +"file separator"; +"\029", +"", +"group separator"; +"\030", +"", +"record separator"; +"\031", +"", +"unit separator"; +"\127", +"", +"delete (rubout)"; +"\"", +""", +"quotation mark"; +"'", +"'", +"apostrophe"; +"&", +"&", +"ampersand"; +"<", +"<", +"less-than"; +">", +">", +"greater-than"; +"\xc2\xa0", +" ", +"non-breaking space"; +"\xc2\xa0", +" ", +"non-breaking space"; +"¡", +"¡", +"inverted exclamation mark"; +"¡", +"¡", +"inverted exclamation mark"; +"¢", +"¢", +"cent"; +"¢", +"¢", +"cent"; +"£", +"£", +"pound"; +"£", +"£", +"pound"; +"¤", +"¤", +"currency"; +"¤", +"¤", +"currency"; +"¥", +"¥", +"yen"; +"¥", +"¥", +"yen"; +"¦", +"¦", +"broken vertical bar"; +"¦", +"¦", +"broken vertical bar"; +"§", +"§", +"section"; +"§", +"§", +"section"; +"¨", +"¨", +"spacing diaeresis"; +"¨", +"¨", +"spacing diaeresis"; +"©", +"©", +"copyright"; +"©", +"©", +"copyright"; +"ª", +"ª", +"feminine ordinal indicator"; +"ª", +"ª", +"feminine ordinal indicator"; +"«", +"«", +"angle quotation mark (left)"; +"«", +"«", +"angle quotation mark (left)"; +"¬", +"¬", +"negation"; +"¬", +"¬", +"negation"; +"�­", +"­", +"soft hyphen"; +"�­", +"­", +"soft hyphen"; +"®", +"®", +"registered trademark"; +"®", +"®", +"registered trademark"; +"¯", +"¯", +"spacing macron"; +"¯", +"¯", +"spacing macron"; +"°", +"°", +"degree"; +"°", +"°", +"degree"; +"±", +"±", +"plus-or-minus"; +"±", +"±", +"plus-or-minus"; +"²", +"²", +"superscript 2"; +"²", +"²", +"superscript 2"; +"³", +"³", +"superscript 3"; +"³", +"³", +"superscript 3"; +"´", +"´", +"spacing acute"; +"´", +"´", +"spacing acute"; +"µ", +"µ", +"micro"; +"µ", +"µ", +"micro"; +"¶", +"¶", +"paragraph"; +"¶", +"¶", +"paragraph"; +"·", +"·", +"middle dot"; +"·", +"·", +"middle dot"; +"¸", +"¸", +"spacing cedilla"; +"¸", +"¸", +"spacing cedilla"; +"¹", +"¹", +"superscript 1"; +"¹", +"¹", +"superscript 1"; +"º", +"º", +"masculine ordinal indicator"; +"º", +"º", +"masculine ordinal indicator"; +"»", +"»", +"angle quotation mark (right)"; +"»", +"»", +"angle quotation mark (right)"; +"¼", +"¼", +"fraction 1/4"; +"¼", +"¼", +"fraction 1/4"; +"½", +"½", +"fraction 1/2"; +"½", +"½", +"fraction 1/2"; +"¾", +"¾", +"fraction 3/4"; +"¾", +"¾", +"fraction 3/4"; +"¿", +"¿", +"inverted question mark"; +"¿", +"¿", +"inverted question mark"; +"×", +"×", +"multiplication"; +"×", +"×", +"multiplication"; +"÷", +"÷", +"division"; +"÷", +"÷", +"division"; +"À", +"À", +"capital a, grave accent"; +"À", +"À", +"capital a, grave accent"; +"Á", +"Á", +"capital a, acute accent"; +"Á", +"Á", +"capital a, acute accent"; +"Â", +"Â", +"capital a, circumflex accent"; +"Â", +"Â", +"capital a, circumflex accent"; +"Ã", +"Ã", +"capital a, tilde"; +"Ã", +"Ã", +"capital a, tilde"; +"Ä", +"Ä", +"capital a, umlaut mark"; +"Ä", +"Ä", +"capital a, umlaut mark"; +"Å", +"Å", +"capital a, ring"; +"Å", +"Å", +"capital a, ring"; +"Æ", +"Æ", +"capital ae"; +"Æ", +"Æ", +"capital ae"; +"Ç", +"Ç", +"capital c, cedilla"; +"Ç", +"Ç", +"capital c, cedilla"; +"È", +"È", +"capital e, grave accent"; +"È", +"È", +"capital e, grave accent"; +"É", +"É", +"capital e, acute accent"; +"É", +"É", +"capital e, acute accent"; +"Ê", +"Ê", +"capital e, circumflex accent"; +"Ê", +"Ê", +"capital e, circumflex accent"; +"Ë", +"Ë", +"capital e, umlaut mark"; +"Ë", +"Ë", +"capital e, umlaut mark"; +"Ì", +"Ì", +"capital i, grave accent"; +"Ì", +"Ì", +"capital i, grave accent"; +"Í", +"Í", +"capital i, acute accent"; +"Í", +"Í", +"capital i, acute accent"; +"Î", +"Î", +"capital i, circumflex accent"; +"Î", +"Î", +"capital i, circumflex accent"; +"Ï", +"Ï", +"capital i, umlaut mark"; +"Ï", +"Ï", +"capital i, umlaut mark"; +"Ð", +"Ð", +"capital eth, Icelandic"; +"Ð", +"Ð", +"capital eth, Icelandic"; +"Ñ", +"Ñ", +"capital n, tilde"; +"Ñ", +"Ñ", +"capital n, tilde"; +"Ò", +"Ò", +"capital o, grave accent"; +"Ò", +"Ò", +"capital o, grave accent"; +"Ó", +"Ó", +"capital o, acute accent"; +"Ó", +"Ó", +"capital o, acute accent"; +"Ô", +"Ô", +"capital o, circumflex accent"; +"Ô", +"Ô", +"capital o, circumflex accent"; +"Õ", +"Õ", +"capital o, tilde"; +"Õ", +"Õ", +"capital o, tilde"; +"Ö", +"Ö", +"capital o, umlaut mark"; +"Ö", +"Ö", +"capital o, umlaut mark"; +"Ø", +"Ø", +"capital o, slash"; +"Ø", +"Ø", +"capital o, slash"; +"Ù", +"Ù", +"capital u, grave accent"; +"Ù", +"Ù", +"capital u, grave accent"; +"Ú", +"Ú", +"capital u, acute accent"; +"Ú", +"Ú", +"capital u, acute accent"; +"Û", +"Û", +"capital u, circumflex accent"; +"Û", +"Û", +"capital u, circumflex accent"; +"Ü", +"Ü", +"capital u, umlaut mark"; +"Ü", +"Ü", +"capital u, umlaut mark"; +"Ý", +"Ý", +"capital y, acute accent"; +"Ý", +"Ý", +"capital y, acute accent"; +"Þ", +"Þ", +"capital THORN, Icelandic"; +"Þ", +"Þ", +"capital THORN, Icelandic"; +"ß", +"ß", +"small sharp s, German"; +"ß", +"ß", +"small sharp s, German"; +"à", +"à", +"small a, grave accent"; +"à", +"à", +"small a, grave accent"; +"á", +"á", +"small a, acute accent"; +"á", +"á", +"small a, acute accent"; +"â", +"â", +"small a, circumflex accent"; +"â", +"â", +"small a, circumflex accent"; +"ã", +"ã", +"small a, tilde"; +"ã", +"ã", +"small a, tilde"; +"ä", +"ä", +"small a, umlaut mark"; +"ä", +"ä", +"small a, umlaut mark"; +"å", +"å", +"small a, ring"; +"å", +"å", +"small a, ring"; +"æ", +"æ", +"small ae"; +"æ", +"æ", +"small ae"; +"ç", +"ç", +"small c, cedilla"; +"ç", +"ç", +"small c, cedilla"; +"è", +"è", +"small e, grave accent"; +"è", +"è", +"small e, grave accent"; +"é", +"é", +"small e, acute accent"; +"é", +"é", +"small e, acute accent"; +"ê", +"ê", +"small e, circumflex accent"; +"ê", +"ê", +"small e, circumflex accent"; +"ë", +"ë", +"small e, umlaut mark"; +"ë", +"ë", +"small e, umlaut mark"; +"ì", +"ì", +"small i, grave accent"; +"ì", +"ì", +"small i, grave accent"; +"í", +"í", +"small i, acute accent"; +"í", +"í", +"small i, acute accent"; +"î", +"î", +"small i, circumflex accent"; +"î", +"î", +"small i, circumflex accent"; +"ï", +"ï", +"small i, umlaut mark"; +"ï", +"ï", +"small i, umlaut mark"; +"ð", +"ð", +"small eth, Icelandic"; +"ð", +"ð", +"small eth, Icelandic"; +"ñ", +"ñ", +"small n, tilde"; +"ñ", +"ñ", +"small n, tilde"; +"ò", +"ò", +"small o, grave accent"; +"ò", +"ò", +"small o, grave accent"; +"ó", +"ó", +"small o, acute accent"; +"ó", +"ó", +"small o, acute accent"; +"ô", +"ô", +"small o, circumflex accent"; +"ô", +"ô", +"small o, circumflex accent"; +"õ", +"õ", +"small o, tilde"; +"õ", +"õ", +"small o, tilde"; +"ö", +"ö", +"small o, umlaut mark"; +"ö", +"ö", +"small o, umlaut mark"; +"ø", +"ø", +"small o, slash"; +"ø", +"ø", +"small o, slash"; +"ù", +"ù", +"small u, grave accent"; +"ù", +"ù", +"small u, grave accent"; +"ú", +"ú", +"small u, acute accent"; +"ú", +"ú", +"small u, acute accent"; +"û", +"û", +"small u, circumflex accent"; +"û", +"û", +"small u, circumflex accent"; +"ü", +"ü", +"small u, umlaut mark"; +"ü", +"ü", +"small u, umlaut mark"; +"ý", +"ý", +"small y, acute accent"; +"ý", +"ý", +"small y, acute accent"; +"þ", +"þ", +"small thorn, Icelandic"; +"þ", +"þ", +"small thorn, Icelandic"; +"ÿ", +"ÿ", +"small y, umlaut mark"; +"ÿ", +"ÿ", +"small y, umlaut mark"; +"∀", +"∀", +"for all"; +"∀", +"∀", +"for all"; +"∂", +"∂", +"part"; +"∂", +"∂", +"part"; +"∃", +"∃", +"exists"; +"∃", +"∃", +"exists"; +"∅", +"∅", +"empty"; +"∅", +"∅", +"empty"; +"∇", +"∇", +"nabla"; +"∇", +"∇", +"nabla"; +"∈", +"∈", +"isin"; +"∈", +"∈", +"isin"; +"∉", +"∉", +"notin"; +"∉", +"∉", +"notin"; +"∋", +"∋", +"ni"; +"∋", +"∋", +"ni"; +"∏", +"∏", +"prod"; +"∏", +"∏", +"prod"; +"∑", +"∑", +"sum"; +"∑", +"∑", +"sum"; +"−", +"−", +"minus"; +"−", +"−", +"minus"; +"∗", +"∗", +"lowast"; +"∗", +"∗", +"lowast"; +"√", +"√", +"square root"; +"√", +"√", +"square root"; +"∝", +"∝", +"proportional to"; +"∝", +"∝", +"proportional to"; +"∞", +"∞", +"infinity"; +"∞", +"∞", +"infinity"; +"∠", +"∠", +"angle"; +"∠", +"∠", +"angle"; +"∧", +"∧", +"and"; +"∧", +"∧", +"and"; +"∨", +"∨", +"or"; +"∨", +"∨", +"or"; +"∩", +"∩", +"cap"; +"∩", +"∩", +"cap"; +"∪", +"∪", +"cup"; +"∪", +"∪", +"cup"; +"∫", +"∫", +"integral"; +"∫", +"∫", +"integral"; +"∴", +"∴", +"therefore"; +"∴", +"∴", +"therefore"; +"∼", +"∼", +"similar to"; +"∼", +"∼", +"similar to"; +"≅", +"≅", +"congruent to"; +"≅", +"≅", +"congruent to"; +"≈", +"≈", +"almost equal"; +"≈", +"≈", +"almost equal"; +"≠", +"≠", +"not equal"; +"≠", +"≠", +"not equal"; +"≡", +"≡", +"equivalent"; +"≡", +"≡", +"equivalent"; +"≤", +"≤", +"less or equal"; +"≤", +"≤", +"less or equal"; +"≥", +"≥", +"greater or equal"; +"≥", +"≥", +"greater or equal"; +"⊂", +"⊂", +"subset of"; +"⊂", +"⊂", +"subset of"; +"⊃", +"⊃", +"superset of"; +"⊃", +"⊃", +"superset of"; +"⊄", +"⊄", +"not subset of"; +"⊄", +"⊄", +"not subset of"; +"⊆", +"⊆", +"subset or equal"; +"⊆", +"⊆", +"subset or equal"; +"⊇", +"⊇", +"superset or equal"; +"⊇", +"⊇", +"superset or equal"; +"⊕", +"⊕", +"circled plus"; +"⊕", +"⊕", +"circled plus"; +"⊗", +"⊗", +"circled times"; +"⊗", +"⊗", +"circled times"; +"⊥", +"⊥", +"perpendicular"; +"⊥", +"⊥", +"perpendicular"; +"⋅", +"⋅", +"dot operator"; +"⋅", +"⋅", +"dot operator"; +"Α", +"Α", +"Alpha"; +"Α", +"Α", +"Alpha"; +"Β", +"Β", +"Beta"; +"Β", +"Β", +"Beta"; +"Γ", +"Γ", +"Gamma"; +"Γ", +"Γ", +"Gamma"; +"Δ", +"Δ", +"Delta"; +"Δ", +"Δ", +"Delta"; +"Ε", +"Ε", +"Epsilon"; +"Ε", +"Ε", +"Epsilon"; +"Ζ", +"Ζ", +"Zeta"; +"Ζ", +"Ζ", +"Zeta"; +"Η", +"Η", +"Eta"; +"Η", +"Η", +"Eta"; +"Θ", +"Θ", +"Theta"; +"Θ", +"Θ", +"Theta"; +"Ι", +"Ι", +"Iota"; +"Ι", +"Ι", +"Iota"; +"Κ", +"Κ", +"Kappa"; +"Κ", +"Κ", +"Kappa"; +"Λ", +"Λ", +"Lambda"; +"Λ", +"Λ", +"Lambda"; +"Μ", +"Μ", +"Mu"; +"Μ", +"Μ", +"Mu"; +"Ν", +"Ν", +"Nu"; +"Ν", +"Ν", +"Nu"; +"Ξ", +"Ξ", +"Xi"; +"Ξ", +"Ξ", +"Xi"; +"Ο", +"Ο", +"Omicron"; +"Ο", +"Ο", +"Omicron"; +"Π", +"Π", +"Pi"; +"Π", +"Π", +"Pi"; +"Ρ", +"Ρ", +"Rho"; +"Ρ", +"Ρ", +"Rho"; +"Σ", +"Σ", +"Sigma"; +"Σ", +"Σ", +"Sigma"; +"Τ", +"Τ", +"Tau"; +"Τ", +"Τ", +"Tau"; +"Υ", +"Υ", +"Upsilon"; +"Υ", +"Υ", +"Upsilon"; +"Φ", +"Φ", +"Phi"; +"Φ", +"Φ", +"Phi"; +"Χ", +"Χ", +"Chi"; +"Χ", +"Χ", +"Chi"; +"Ψ", +"Ψ", +"Psi"; +"Ψ", +"Ψ", +"Psi"; +"Ω", +"Ω", +"Omega"; +"Ω", +"Ω", +"Omega"; +"α", +"α", +"alpha"; +"α", +"α", +"alpha"; +"β", +"β", +"beta"; +"β", +"β", +"beta"; +"γ", +"γ", +"gamma"; +"γ", +"γ", +"gamma"; +"δ", +"δ", +"delta"; +"δ", +"δ", +"delta"; +"ε", +"ε", +"epsilon"; +"ε", +"ε", +"epsilon"; +"ζ", +"ζ", +"zeta"; +"ζ", +"ζ", +"zeta"; +"η", +"η", +"eta"; +"η", +"η", +"eta"; +"θ", +"θ", +"theta"; +"θ", +"θ", +"theta"; +"ι", +"ι", +"iota"; +"ι", +"ι", +"iota"; +"κ", +"κ", +"kappa"; +"κ", +"κ", +"kappa"; +"λ", +"λ", +"lambda"; +"λ", +"λ", +"lambda"; +"μ", +"μ", +"mu"; +"μ", +"μ", +"mu"; +"ν", +"ν", +"nu"; +"ν", +"ν", +"nu"; +"ξ", +"ξ", +"xi"; +"ξ", +"ξ", +"xi"; +"ο", +"ο", +"omicron"; +"ο", +"ο", +"omicron"; +"π", +"π", +"pi"; +"π", +"π", +"pi"; +"ρ", +"ρ", +"rho"; +"ρ", +"ρ", +"rho"; +"ς", +"ς", +"sigmaf"; +"ς", +"ς", +"sigmaf"; +"σ", +"σ", +"sigma"; +"σ", +"σ", +"sigma"; +"τ", +"τ", +"tau"; +"τ", +"τ", +"tau"; +"υ", +"υ", +"upsilon"; +"υ", +"υ", +"upsilon"; +"φ", +"φ", +"phi"; +"φ", +"φ", +"phi"; +"χ", +"χ", +"chi"; +"χ", +"χ", +"chi"; +"ψ", +"ψ", +"psi"; +"ψ", +"ψ", +"psi"; +"ω", +"ω", +"omega"; +"ω", +"ω", +"omega"; +"ϑ", +"ϑ", +"theta symbol"; +"ϑ", +"ϑ", +"theta symbol"; +"ϒ", +"ϒ", +"upsilon symbol"; +"ϒ", +"ϒ", +"upsilon symbol"; +"ϖ", +"ϖ", +"pi symbol"; +"ϖ", +"ϖ", +"pi symbol"; +"Œ", +"Œ", +"capital ligature OE"; +"Œ", +"Œ", +"capital ligature OE"; +"œ", +"œ", +"small ligature oe"; +"œ", +"œ", +"small ligature oe"; +"Š", +"Š", +"capital S with caron"; +"Š", +"Š", +"capital S with caron"; +"š", +"š", +"small S with caron"; +"š", +"š", +"small S with caron"; +"Ÿ", +"Ÿ", +"capital Y with diaeres"; +"Ÿ", +"Ÿ", +"capital Y with diaeres"; +"ƒ", +"ƒ", +"f with hook"; +"ƒ", +"ƒ", +"f with hook"; +"ˆ", +"ˆ", +"modifier letter circumflex accent"; +"ˆ", +"ˆ", +"modifier letter circumflex accent"; +"˜", +"˜", +"small tilde"; +"˜", +"˜", +"small tilde"; +" ", +" ", +"en space"; +" ", +" ", +"en space"; +" ", +" ", +"em space"; +" ", +" ", +"em space"; +" ", +" ", +"thin space"; +" ", +" ", +"thin space"; +"‌", +"‌", +"zero width non-joiner"; +"‌", +"‌", +"zero width non-joiner"; +"‍", +"‍", +"zero width joiner"; +"‍", +"‍", +"zero width joiner"; +"‎", +"‎", +"left-to-right mark"; +"‎", +"‎", +"left-to-right mark"; +"‏", +"‏", +"right-to-left mark"; +"‏", +"‏", +"right-to-left mark"; +"–", +"–", +"en dash"; +"–", +"–", +"en dash"; +"—", +"—", +"em dash"; +"—", +"—", +"em dash"; +"‘", +"‘", +"left single quotation mark"; +"‘", +"‘", +"left single quotation mark"; +"’", +"’", +"right single quotation mark"; +"’", +"’", +"right single quotation mark"; +"‚", +"‚", +"single low-9 quotation mark"; +"‚", +"‚", +"single low-9 quotation mark"; +"“", +"“", +"left double quotation mark"; +"“", +"“", +"left double quotation mark"; +"”", +"”", +"right double quotation mark"; +"”", +"”", +"right double quotation mark"; +"„", +"„", +"double low-9 quotation mark"; +"„", +"„", +"double low-9 quotation mark"; +"†", +"†", +"dagger"; +"†", +"†", +"dagger"; +"‡", +"‡", +"double dagger"; +"‡", +"‡", +"double dagger"; +"•", +"•", +"bullet"; +"•", +"•", +"bullet"; +"…", +"…", +"horizontal ellipsis"; +"…", +"…", +"horizontal ellipsis"; +"‰", +"‰", +"per mille "; +"‰", +"‰", +"per mille "; +"′", +"′", +"minutes"; +"′", +"′", +"minutes"; +"″", +"″", +"seconds"; +"″", +"″", +"seconds"; +"‹", +"‹", +"single left angle quotation"; +"‹", +"‹", +"single left angle quotation"; +"›", +"›", +"single right angle quotation"; +"›", +"›", +"single right angle quotation"; +"‾", +"‾", +"overline"; +"‾", +"‾", +"overline"; +"€", +"€", +"euro"; +"€", +"€", +"euro"; +"™", +"™", +"trademark"; +"™", +"™", +"trademark"; +"™", +"™", +"trademark"; +"™", +"™", +"trademark"; +"←", +"←", +"left arrow"; +"←", +"←", +"left arrow"; +"↑", +"↑", +"up arrow"; +"↑", +"↑", +"up arrow"; +"→", +"→", +"right arrow"; +"→", +"→", +"right arrow"; +"↓", +"↓", +"down arrow"; +"↓", +"↓", +"down arrow"; +"↔", +"↔", +"left right arrow"; +"↔", +"↔", +"left right arrow"; +"↵", +"↵", +"carriage return arrow"; +"↵", +"↵", +"carriage return arrow"; +"⌈", +"⌈", +"left ceiling"; +"⌈", +"⌈", +"left ceiling"; +"⌉", +"⌉", +"right ceiling"; +"⌉", +"⌉", +"right ceiling"; +"⌊", +"⌊", +"left floor"; +"⌊", +"⌊", +"left floor"; +"⌋", +"⌋", +"right floor"; +"⌋", +"⌋", +"right floor"; +"◊", +"◊", +"lozenge"; +"◊", +"◊", +"lozenge"; +"♠", +"♠", +"spade"; +"♠", +"♠", +"spade"; +"♣", +"♣", +"club"; +"♣", +"♣", +"club"; +"♥", +"♥", +"heart"; +"♥", +"♥", +"heart"; +"♦", +"♦", +"diamond"; +"♦", +"♦", +"diamond"; +] diff --git a/ocaml-lsp-server/src/omd/src/implementation_notes.md b/ocaml-lsp-server/src/omd/src/implementation_notes.md new file mode 100644 index 000000000..e8d126f49 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/implementation_notes.md @@ -0,0 +1,113 @@ +# Notes on the Implementation and Semantics of omd + +## In short + +I believe that all features described in + have now been +implemented. Extensive testing should be done. + + +Parsing: it mainly relies on the property that two consecutive tokens +produced by the lexer cannot designate the same "thing". For instance, +there can't be [Word "foo"; Word "bar"] because it should be +[Word "foobar"] instead. There can't be [Newlines 4; Newline; Newlines 3] because it should be +[Newlines(10)] (yes it's 10, not 8). + + +## More details + + +### Checklist + * HTML + * As in "standard" Markdown, it's a "subset" of HTML with **restrictions** on the syntax that is supported. For instance, one cannot write `< a href...` instead of ``. For instance, `"plop\\nhello"` is translated to `"

plop
hello

"` (this has been implemented on 2013.08.15) + * Code: + * verbatim: done (but needs more testing) + * syntax-highlighted code: *todo* + + +### Flaws in Markdown + +Since there are no errors in Markdown, it means taht everything has a +meaning. Sometimes, one has to imagine a meaning that is not too much +nonsense. + + +#### Lists + +##### Problem Description +There are several semantics for a "broken" list such as the following one: +``` + * Indentation 1, Element 1 + * Indentation 1, Element 2 + * Indentation 3, Element 1 + * Indentation 3, Element 2 + * Indentation 2, Element 1 + * Indentation 2, Element 2 + * Indentation 3, Element 1 (not 3) + * Indentation 1, Element 3 +``` + +I have chosen the following semantics, because to me that it's the less nonsense I have ever thought about: + +##### Semantics +Let N be the indentation level of the current element. +- If N is equal to the previous indentation, then it's still the same list as the current one. +- If N is greater than the previous indentation, then it's a new list. +- If N is lesser than the previous indentation, then I check its predecessors: + * if N is the level of a predecessor and no other level inbetween is lesser, then it means that the current item belongs to a list that hasn't been closed yet, so I close the current list and I delegate the rest to the closest parent (which does mean that the current item will be processed _again_). + * else, it means that it's a kind of wrong level (don't forget N is smaller than the previous indentation), so I close the current list and open a new one with the current item. + + +``` + * Indentation 1, Element 1 + * Indentation 1, Element 2 +% here I do not close (I1), and I open for the next one (I3) + * Indentation 3, Element 1 + * Indentation 3, Element 2 +% here I close (I3) and open for the next one (I2) + * Indentation 2, Element 1 + * Indentation 2, Element 2 +% here I close (I2) and open for the next one (I3) + * Indentation 3, Element 1 (not 3) +% here I close (I3) and continue for the next one (I1) + * Indentation 1, Element 3 +``` + + +----- +file implementation_notes.md + diff --git a/ocaml-lsp-server/src/omd/src/omd.ml b/ocaml-lsp-server/src/omd/src/omd.ml new file mode 100644 index 000000000..8d907a827 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd.ml @@ -0,0 +1,170 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +include Omd_representation +include Omd_backend + +let of_input lex ?extensions:e ?default_lang:d s = + let module E = Omd_parser.Default_env(struct end) in + let module Parser = Omd_parser.Make( + struct + include E + let extensions = match e with Some x -> x | None -> E.extensions + let default_lang = match d with Some x -> x | None -> E.default_lang + end + ) in + let md = + Parser.parse (lex s) + in + Parser.make_paragraphs md + +let of_string = of_input Omd_lexer.lex +let of_bigarray = of_input Omd_lexer.lex_bigarray + +let to_html : + ?override:(Omd_representation.element -> string option) -> + ?pindent:bool -> + ?nl2br:bool -> + ?cs:code_stylist -> + t -> + string + = + html_of_md + +let to_text : t -> string = text_of_md + +let to_markdown : t -> string = markdown_of_md + +let html_of_string (html:string) : string = + html_of_md (Omd_parser.default_parse (Omd_lexer.lex html)) + + +let rec set_default_lang lang = function + | Code("", code) :: tl -> Code(lang, code) :: set_default_lang lang tl + | Code_block("", code) :: tl -> Code_block(lang, code) + :: set_default_lang lang tl + (* Recurse on all elements even though code (blocks) are not allowed + everywhere. *) + | H1 t :: tl -> H1(set_default_lang lang t) :: set_default_lang lang tl + | H2 t :: tl -> H2(set_default_lang lang t) :: set_default_lang lang tl + | H3 t :: tl -> H3(set_default_lang lang t) :: set_default_lang lang tl + | H4 t :: tl -> H4(set_default_lang lang t) :: set_default_lang lang tl + | H5 t :: tl -> H5(set_default_lang lang t) :: set_default_lang lang tl + | H6 t :: tl -> H6(set_default_lang lang t) :: set_default_lang lang tl + | Paragraph t :: tl -> Paragraph(set_default_lang lang t) + :: set_default_lang lang tl + | Emph t :: tl -> Emph(set_default_lang lang t) :: set_default_lang lang tl + | Bold t :: tl -> Bold(set_default_lang lang t) :: set_default_lang lang tl + | Ul t :: tl -> Ul(List.map (set_default_lang lang) t) + :: set_default_lang lang tl + | Ol t :: tl -> Ol(List.map (set_default_lang lang) t) + :: set_default_lang lang tl + | Ulp t :: tl -> Ulp(List.map (set_default_lang lang) t) + :: set_default_lang lang tl + | Olp t :: tl -> Olp(List.map (set_default_lang lang) t) + :: set_default_lang lang tl + | Url(href, t, title) :: tl -> Url(href, set_default_lang lang t, title) + :: set_default_lang lang tl + | Blockquote t :: tl -> Blockquote(set_default_lang lang t) + :: set_default_lang lang tl + (* Elements that do not contain Markdown. *) + | (Text _|Code _|Code_block _|Br|Hr|NL|Ref _|Img_ref _|Raw _|Raw_block _ + |Html _|Html_block _|Html_comment _|Img _|X _) as e :: tl -> + e :: set_default_lang lang tl + | [] -> [] + + +(* Table of contents + ***********************************************************************) + +(* Given a list of headers — in the order of the document — go to the + requested subsection. We first seek for the [number]th header at + [level]. *) +let rec find_start headers level number subsections = + match headers with + | [] -> [] + | (H1 _, _, _) :: tl -> deal_with_header 1 headers tl level number subsections + | (H2 _, _, _) :: tl -> deal_with_header 2 headers tl level number subsections + | (H3 _, _, _) :: tl -> deal_with_header 3 headers tl level number subsections + | (H4 _, _, _) :: tl -> deal_with_header 4 headers tl level number subsections + | (H5 _, _, _) :: tl -> deal_with_header 5 headers tl level number subsections + | (H6 _, _, _) :: tl -> deal_with_header 6 headers tl level number subsections + | _ :: _ -> assert false + +and deal_with_header h_level headers tl level number subsections = + if h_level > level then (* Skip, right [level]-header not yet reached. *) + if number = 0 then + (* Assume empty section at [level], do not consume token. *) + (match subsections with + | [] -> headers (* no subsection to find *) + | n :: subsections -> find_start headers (level + 1) n subsections) + else find_start tl level number subsections + else if h_level = level then ( + (* At proper [level]. Have we reached the [number] one? *) + if number <= 1 then ( + match subsections with + | [] -> tl (* no subsection to find *) + | n :: subsections -> find_start tl (level + 1) n subsections + ) + else find_start tl level (number - 1) subsections + ) + else (* h_level < level *) + [] (* Sought [level] has not been found in the current section *) + +(* Assume we are at the start of the headers we are interested in. + Return the list of TOC entries for [min_level] and the [headers] + not used for the TOC entries. *) +let rec make_toc (headers:(element*string*string)list) ~min_level ~max_level = + if min_level > max_level then [], headers + else ( + match headers with + | [] -> [], [] + | (H1 t, id, _) :: tl -> toc_entry headers 1 t id tl ~min_level ~max_level + | (H2 t, id, _) :: tl -> toc_entry headers 2 t id tl ~min_level ~max_level + | (H3 t, id, _) :: tl -> toc_entry headers 3 t id tl ~min_level ~max_level + | (H4 t, id, _) :: tl -> toc_entry headers 4 t id tl ~min_level ~max_level + | (H5 t, id, _) :: tl -> toc_entry headers 5 t id tl ~min_level ~max_level + | (H6 t, id, _) :: tl -> toc_entry headers 6 t id tl ~min_level ~max_level + | _ :: _ -> assert false + ) +and toc_entry headers h_level t id tl ~min_level ~max_level = + if h_level > max_level then (* too deep, skip *) + make_toc tl ~min_level ~max_level + else if h_level < min_level then + (* section we wanted the TOC for is finished, do not comsume the token *) + [], headers + else if h_level = min_level then ( + let sub_toc, tl = make_toc tl ~min_level:(min_level + 1) ~max_level in + let toc_entry = match sub_toc with + | [] -> [Url("#" ^ id, t, ""); NL] + | _ -> [Url("#" ^ id, t, ""); NL; Ul sub_toc; NL] in + let toc, tl = make_toc tl ~min_level ~max_level in + toc_entry :: toc, tl + ) else (* h_level > min_level *) + let sub_toc, tl = make_toc headers ~min_level:(min_level + 1) ~max_level in + let toc, tl = make_toc tl ~min_level ~max_level in + [Ul sub_toc] :: toc, tl + +let toc ?(start=[]) ?(depth=2) md = + if depth < 1 then invalid_arg "Omd.toc: ~depth must be >= 1"; + let headers = Omd_backend.headers_of_md ~remove_header_links:true md in + let headers = match start with + | [] -> headers + | number :: subsections -> + if number < 0 then invalid_arg("Omd.toc: level 1 start must be >= 0"); + find_start headers 1 number subsections in + let len = List.length start in + let toc, _ = make_toc headers + ~min_level:(len + 1) ~max_level:(len + depth) in + match toc with + | [] -> [] + | _ -> [Ul(toc)] + +let add_toc ?start ?depth ?title md = + let toc = toc ?start ?depth md in + (* Replace "*Table of contents*" with the actual TOC. *) + toc diff --git a/ocaml-lsp-server/src/omd/src/omd.mli b/ocaml-lsp-server/src/omd/src/omd.mli new file mode 100644 index 000000000..99274f49c --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd.mli @@ -0,0 +1,165 @@ +(** A markdown parser in OCaml, with no extra dependencies. + + This module represents this entire Markdown library written in + OCaml only. + + Its main purpose is to allow you to use the Markdown library while + keeping you away from the other modules. + + If you want to extend the Markdown parser, you can do it without + accessing any module of this library but this one, and by doing + so, you are free from having to maintain a fork of this library. + + N.B. This module is supposed to be reentrant, + if it's not then please report the bug. *) + + +(************************************************************************) +(** {2 Representation of Markdown documents} *) + +type t = element list +(** Representation of a Markdown document. *) + +and ref_container = + (< add_ref: string -> string -> string -> unit ; + get_ref : string -> (string*string) option; + get_all : (string * (string * string)) list; + >) + +(** A element of a Markdown document. *) +and element = Omd_representation.element = + | H1 of t (** Header of level 1 *) + | H2 of t (** Header of level 2 *) + | H3 of t (** Header of level 3 *) + | H4 of t (** Header of level 4 *) + | H5 of t (** Header of level 5 *) + | H6 of t (** Header of level 6 *) + | Paragraph of t + (** A Markdown paragraph (must be enabled in {!of_string}) *) + | Text of string (** Text. *) + | Emph of t (** Emphasis (italic) *) + | Bold of t (** Bold *) + | Ul of t list (** Unumbered list *) + | Ol of t list (** Ordered (i.e. numbered) list *) + | Ulp of t list + | Olp of t list + | Code of name * string + (** [Code(lang, code)] represent [code] within the text (Markdown: + `code`). The language [lang] cannot be specified from Markdown, + it can be from {!of_string} though or when programatically + generating Markdown documents. Beware that the [code] is taken + verbatim from Markdown and may contain characters that must be + escaped for HTML. *) + | Code_block of name * string + (** [Code_block(lang, code)]: a code clock (e.g. indented by 4 + spaces in the text). The first parameter [lang] is the language + if specified. Beware that the [code] is taken verbatim from + Markdown and may contain characters that must be escaped for + HTML. *) + | Br (** (Forced) line break *) + | Hr (** Horizontal rule *) + | NL (** Newline character. Newline characters that act + like delimiters (e.g. for paragraphs) are removed from the AST. *) + | Url of href * t * title + | Ref of ref_container * name * string * fallback + | Img_ref of ref_container * name * alt * fallback + | Html of name * (string * string option) list * t + | Html_block of name * (string * string option) list * t + | Html_comment of string + (** An HTML comment, including "". *) + | Raw of string + (** Raw: something that shall never be converted *) + | Raw_block of string + (** Raw_block: a block with contents that shall never be converted *) + | Blockquote of t (** Quoted block *) + | Img of alt * src * title + | X of (< (* extension of [element]. *) + name: string; + (* N.B. [to_html] means that htmlentities will not + be applied to its output. *) + to_html: ?indent:int -> (t -> string) -> t -> string option; + to_sexpr: (t -> string) -> t -> string option; + to_t: t -> t option >) + +and fallback = < to_string : string ; to_t : t > +(** Fallback for references in case they refer to non-existant references *) + +and name = string +(** Markdown reference name. *) + +and alt = string +(** HTML img tag attribute. *) + +and src = string +(** HTML attribute. *) + +and href = string +(** HTML attribute. *) + +and title = string +(** HTML attribute. *) + +type code_stylist = lang:string -> string -> string +(** Function that takes a language name and some code and returns + that code with style. *) + + +(************************************************************************) +(** {2 Input and Output} *) + +val of_string : ?extensions:Omd_representation.extensions -> + ?default_lang: name -> + string -> t +(** [of_string s] returns the Markdown representation of the string + [s]. + + @param lang language for blocks of code where it was not + specified. Default: [""]. + + If you want to use a custom lexer or parser, use {!Omd_lexer.lex} + and {!Omd_parser.parse}. *) + +val of_bigarray : ?extensions:Omd_representation.extensions -> + ?default_lang: name -> + Omd_lexer.bigstring -> t +(** As {!of_string}, but read input from a bigarray rather than from a + string. *) + +val set_default_lang : name -> t -> t +(** [set_default_lang lang md] return a copy of [md] where the + language of all [Code] or [Code_block] with an empty language is + set to [lang]. *) + +val to_html : + ?override:(Omd_representation.element -> string option) -> + ?pindent:bool -> ?nl2br:bool -> ?cs:code_stylist -> t -> string +(** Translate markdown representation into raw HTML. If you need a + full HTML representation, you mainly have to figure out how to + convert [Html of string] and [Html_block of string] + into your HTML representation. *) + +val to_markdown : t -> string +(** Translate markdown representation into textual markdown. *) + +val to_text : t -> string +(** Translate markdown representation into raw text. *) + + +(************************************************************************) +(** {2 Tansforming Markdown documents} *) + +val toc : ?start:int list -> ?depth:int -> t -> t +(** [toc md] returns [toc] a table of contents for [md]. + + @param start gives the section for which the TOC must be built. + For example [~start:[2;3]] will build the TOC for subsections of + the second [H1] header, and within that section, the third [h2] + header. If a number is [0], it means to look for the first + section at that level but stop if one encounters any other + subsection. If no subsection exists, an empty TOC [[]] will be + returned. Default: [[]] i.e. list all sections, starting with the + first [H1]. + + @param depth the table of contents. Default: [2]. *) + +;; diff --git a/ocaml-lsp-server/src/omd/src/omd_backend.ml b/ocaml-lsp-server/src/omd/src/omd_backend.ml new file mode 100644 index 000000000..deea4b5bf --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_backend.ml @@ -0,0 +1,1225 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +type code_stylist = lang:string -> string -> string + +open Printf +open Omd_representation +open Omd_utils + +let default_language = ref "" + + + +let text_of_md md = + let b = Buffer.create 128 in + let rec loop = function + | X _ :: tl -> + loop tl + | Blockquote q :: tl -> + loop q; + loop tl + | Ref(rc, name, text, fallback) :: tl -> + Buffer.add_string b (htmlentities ~md:true name); + loop tl + | Img_ref(rc, name, alt, fallback) :: tl -> + Buffer.add_string b (htmlentities ~md:true name); + loop tl + | Paragraph md :: tl -> + loop md; + Buffer.add_char b '\n'; + Buffer.add_char b '\n'; + loop tl + | Img(alt, src, title) :: tl -> + Buffer.add_string b (htmlentities ~md:true alt); + loop tl + | Text t :: tl -> + Buffer.add_string b (htmlentities ~md:true t); + loop tl + | Raw t :: tl -> + Buffer.add_string b t; + loop tl + | Raw_block t :: tl -> + Buffer.add_char b '\n'; + Buffer.add_string b t; + Buffer.add_char b '\n'; + loop tl + | Emph md :: tl -> + loop md; + loop tl + | Bold md :: tl -> + loop md; + loop tl + | (Ul l | Ol l) :: tl -> + List.iter (fun item -> loop item; Buffer.add_char b '\n') l; + loop tl + | (Ulp l | Olp l) :: tl -> + List.iter loop l; + loop tl + | Code_block(lang, c) :: tl -> + Buffer.add_string b (htmlentities ~md:false c); + loop tl + | Code(lang, c) :: tl -> + Buffer.add_string b (htmlentities ~md:false c); + loop tl + | Br :: tl -> + loop tl + | Hr :: tl -> + loop tl + | Html(tagname, attrs, body) :: tl -> + loop body; + loop tl + | Html_block(tagname, attrs, body) :: tl -> + loop body; + loop tl + | Html_comment s :: tl -> + loop tl + | Url (href,s,title) :: tl -> + loop s; + loop tl + | H1 md :: tl + | H2 md :: tl + | H3 md :: tl + | H4 md :: tl + | H5 md :: tl + | H6 md :: tl -> + loop md; + loop tl + | NL :: tl -> + Buffer.add_string b "\n"; + loop tl + | [] -> () + in + loop md; + Buffer.contents b + +let default_code_stylist ~lang code = code + +let filter_text_omd_rev l = + let rec loop b r = function + | [] -> if b then r else l + | ("media:type", Some "text/omd")::tl -> + loop true r tl + | e::tl -> + loop b (e::r) tl + in + loop false [] l + +let remove_links : t -> t = + Omd_representation.visit + (fun e -> + match e with + | Url(_, t, _) -> Some t + | _ -> None + ) + +let rec html_and_headers_of_md + ?(remove_header_links=false) + ?(override=(fun (e:element) -> (None:string option))) + ?(pindent=false) + ?(nl2br=false) + ?cs:(code_style=default_code_stylist) + md + = + let ids = object(this) + val mutable ids = StringSet.add "" StringSet.empty + method mangle id = + let rec m i = + if StringSet.mem id ids then + let idx = if i > 0 then id^"_"^string_of_int i else id in + if StringSet.mem idx ids then + m (i+1) + else + (ids <- StringSet.add idx ids; + idx) + else + (ids <- StringSet.add id ids; + id) + in m 0 + end in + let empty s = + let rec loop i = + if i < String.length s then + match s.[i] with + | ' ' | '\n' -> loop (i+1) + | _ -> false + else + true + in + loop 0 + in + let remove_trailing_blanks s = + let rec loop i = + if i < 0 then "" + else + match s.[i] with + | ' '|'\t'|'\n' -> + loop (pred i) + | _ -> + if i = String.length s - 1 then + s + else + String.sub s 0 (i+1) + in loop (String.length s - 1) + in + let b = Buffer.create 64 in + let headers = ref [] in + let rec loop indent = function + | X x as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + (match x#to_t md with + | Some t -> loop indent t + | None -> + match x#to_html ~indent:indent + (html_of_md ~override ~pindent ~nl2br ~cs:code_style) md + with + | Some s -> Buffer.add_string b s + | None -> ()); + loop indent tl + end + | Blockquote q as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b "
"; + loop indent q; + Buffer.add_string b "
"; + loop indent tl + end + | Ref(rc, name, text, fallback) as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + begin match rc#get_ref name with + | Some(href, title) -> + loop indent + (Url(htmlentities ~md:true href, + [Text(text)], + htmlentities ~md:true title) + ::tl) + | None -> + loop indent (fallback#to_t); + loop indent tl + end + end + | Img_ref(rc, name, alt, fallback) as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + begin match rc#get_ref name with + | Some(src, title) -> + loop indent + (Img(htmlentities ~md:true alt, + htmlentities ~md:true src, + htmlentities ~md:true title)::tl) + | None -> + loop indent (fallback#to_t); + loop indent tl + end + end + | Paragraph [] :: tl -> loop indent tl + | Paragraph md as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + (let s = html_of_md ~override ~pindent ~nl2br ~cs:code_style md in + if empty s then + () + else + begin + Buffer.add_string b "

"; + Buffer.add_string b (remove_trailing_blanks s); + Buffer.add_string b "

\n"; + end); + loop indent tl + end + | Img(alt, src, title) as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b "";
+          Buffer.add_string b (htmlentities ~md:true alt);
+          Buffer.add_string b " "" then + (Buffer.add_string b " title='"; + Buffer.add_string b (htmlentities ~md:true title); + Buffer.add_string b "' "); + Buffer.add_string b "/>"; + loop indent tl + end + | Text t as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + (* Buffer.add_string b t; *) + Buffer.add_string b (htmlentities ~md:true t); + loop indent tl + end + | Emph md as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b ""; + loop indent md; + Buffer.add_string b ""; + loop indent tl + end + | Bold md as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b ""; + loop indent md; + Buffer.add_string b ""; + loop indent tl + end + | (Ul l|Ol l|Ulp l|Olp l as e) :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b (match e with + | Ol _|Olp _ -> "
    " + | _ -> "
      "); + List.iter + ( + fun li -> + Buffer.add_string b "
    • "; + loop (indent+2) li; + Buffer.add_string b "
    • " + ) + l; + Buffer.add_string b (match e with + | Ol _|Olp _ -> "
" + | _ -> ""); + loop indent tl + end + | Code_block(lang, c) as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + if lang = "" && !default_language = "" then + Buffer.add_string b "
"
+          else if lang = "" then
+            bprintf b "
"
+              !default_language !default_language
+          else
+            bprintf b "
" lang lang;
+          let new_c = code_style ~lang:lang c in
+          if c = new_c then
+            Buffer.add_string b (htmlentities ~md:false c)
+          else
+            Buffer.add_string b new_c;
+          Buffer.add_string b "
"; + loop indent tl + end + | Code(lang, c) as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + if lang = "" && !default_language = "" then + Buffer.add_string b "" + else if lang = "" then + bprintf b "" !default_language + else + bprintf b "" lang; + let new_c = code_style ~lang:lang c in + if c = new_c then + Buffer.add_string b (htmlentities ~md:false c) + else + Buffer.add_string b new_c; + Buffer.add_string b ""; + loop indent tl + end + | Br as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b "
"; + loop indent tl + end + | Hr as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b "
"; + loop indent tl + end + | Raw s as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b s; + loop indent tl + end + | Raw_block s as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b s; + loop indent tl + end + | Html(tagname, attrs, []) as e :: tl + when StringSet.mem tagname html_void_elements -> + let attrs = filter_text_omd_rev attrs in + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Printf.bprintf b "<%s" tagname; + Buffer.add_string b (string_of_attrs attrs); + Printf.bprintf b " />"; + loop indent tl + end + | Html(tagname, attrs, body) as e :: tl -> + let attrs = filter_text_omd_rev attrs in + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Printf.bprintf b "<%s" tagname; + Buffer.add_string b (string_of_attrs attrs); + Buffer.add_string b ">"; + loop indent body; + Printf.bprintf b "" tagname; + loop indent tl + end + | Html_block(tagname, attrs, body) as e :: tl -> + let attrs = filter_text_omd_rev attrs in + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + if body = [] && StringSet.mem tagname html_void_elements then + ( + Printf.bprintf b "<%s" tagname; + Buffer.add_string b (string_of_attrs attrs); + Buffer.add_string b " />"; + loop indent tl + ) + else + ( + Printf.bprintf b "<%s" tagname; + Buffer.add_string b (string_of_attrs attrs); + Buffer.add_string b ">"; + loop indent body; + Printf.bprintf b "" tagname; + loop indent tl + ) + end + | Html_comment s as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + Buffer.add_string b s; + loop indent tl + end + | Url (href,s,title) as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + let s = html_of_md ~override ~pindent ~nl2br ~cs:code_style s in + Buffer.add_string b "
"" then + begin + Buffer.add_string b " title='"; + Buffer.add_string b (htmlentities ~md:true title); + Buffer.add_string b "'"; + end; + Buffer.add_string b ">"; + Buffer.add_string b s; + Buffer.add_string b ""; + loop indent tl + end + | (H1 md as e) :: tl -> + let e, md = + if not remove_header_links then + e, md + else + let md = remove_links md in + H1 md, md in + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + let ih = html_of_md ~override ~pindent ~nl2br ~cs:code_style md in + let id = id_of_string ids (text_of_md md) in + headers := (e, id, ih) :: !headers; + Buffer.add_string b "

"; + Buffer.add_string b ih; + Buffer.add_string b "

"; + loop indent tl + end + | (H2 md as e) :: tl -> + let e, md = + if not remove_header_links then + e, md + else + let md = remove_links md in + H2 md, md in + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + let ih = html_of_md ~override ~pindent ~nl2br ~cs:code_style md in + let id = id_of_string ids (text_of_md md) in + headers := (e, id, ih) :: !headers; + Buffer.add_string b "

"; + Buffer.add_string b ih; + Buffer.add_string b "

"; + loop indent tl + end + | (H3 md as e) :: tl -> + let e, md = + if not remove_header_links then + e, md + else + let md = remove_links md in + H3 md, md in + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + let ih = html_of_md ~override ~pindent ~nl2br ~cs:code_style md in + let id = id_of_string ids (text_of_md md) in + headers := (e, id, ih) :: !headers; + Buffer.add_string b "

"; + Buffer.add_string b ih; + Buffer.add_string b "

"; + loop indent tl + end + | (H4 md as e) :: tl -> + let e, md = + if not remove_header_links then + e, md + else + let md = remove_links md in + H4 md, md in + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + let ih = html_of_md ~override ~pindent ~nl2br ~cs:code_style md in + let id = id_of_string ids (text_of_md md) in + headers := (e, id, ih) :: !headers; + Buffer.add_string b "

"; + Buffer.add_string b ih; + Buffer.add_string b "

"; + loop indent tl + end + | (H5 md as e) :: tl -> + let e, md = + if not remove_header_links then + e, md + else + let md = remove_links md in + H5 md, md in + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + let ih = html_of_md ~override ~pindent ~nl2br ~cs:code_style md in + let id = id_of_string ids (text_of_md md) in + headers := (e, id, ih) :: !headers; + Buffer.add_string b "
"; + Buffer.add_string b ih; + Buffer.add_string b "
"; + loop indent tl + end + | (H6 md as e) :: tl -> + let e, md = + if not remove_header_links then + e, md + else + let md = remove_links md in + H6 md, md in + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + let ih = html_of_md ~override ~pindent ~nl2br ~cs:code_style md in + let id = id_of_string ids (text_of_md md) in + headers := (e, id, ih) :: !headers; + Buffer.add_string b "
"; + Buffer.add_string b ih; + Buffer.add_string b "
"; + loop indent tl + end + | NL as e :: tl -> + begin match override e with + | Some s -> + Buffer.add_string b s; + loop indent tl + | None -> + if nl2br then + Buffer.add_string b "
" + else + Buffer.add_string b "\n"; + loop indent tl + end + | [] -> + () + in + loop 0 md; + Buffer.contents b, List.rev !headers + +and string_of_attrs attrs = + let b = Buffer.create 1024 in + List.iter + (function + | (a, Some v) -> + if not(String.contains v '\'') then + Printf.bprintf b " %s='%s'" a v + else if not(String.contains v '"') then + Printf.bprintf b " %s=\"%s\"" a v + else + Printf.bprintf b " %s=\"%s\"" a v + | a, None -> + (* if html4 then *) + (* Printf.bprintf b " %s='%s'" a a *) + (* else *) + Printf.bprintf b " %s=''" a (* HTML5 *) + ) + attrs; + Buffer.contents b + +and html_of_md + ?(override=(fun (e:element) -> (None:string option))) + ?(pindent=false) + ?(nl2br=false) + ?cs + md + = + fst (html_and_headers_of_md ~override ~pindent ~nl2br ?cs md) +and headers_of_md ?remove_header_links md = + snd (html_and_headers_of_md ?remove_header_links md) + + +let rec sexpr_of_md md = + let b = Buffer.create 64 in + let rec loop = function + | X x :: tl -> + (match x#to_t md with + | Some t -> + Buffer.add_string b "(X"; + loop t; + Buffer.add_string b ")" + | None -> + match x#to_sexpr sexpr_of_md md with + | Some s -> + Buffer.add_string b "(X"; + Buffer.add_string b s; + Buffer.add_string b ")" + | None -> + match x#to_html ~indent:0 html_of_md md with + | Some s -> + Buffer.add_string b "(X"; + Buffer.add_string b s; + Buffer.add_string b ")" + | None -> ()); + loop tl + | Blockquote q :: tl -> + Buffer.add_string b "(Blockquote"; + loop q; + Buffer.add_string b ")"; + loop tl + | Ref(rc, name, text, _) :: tl -> + bprintf b "(Ref %S %S)" name text; + loop tl + | Img_ref(rc, name, alt, _) :: tl -> + bprintf b "(Img_ref %S %S)" name alt; + loop tl + | Paragraph md :: tl -> + Buffer.add_string b "(Paragraph"; + loop md; + Buffer.add_string b ")"; + loop tl + | Img(alt, src, title) :: tl -> + bprintf b "(Img %S %S %S)" alt src title; + loop tl + | Text t :: tl -> + bprintf b "(Text %S" t; + let rec f = function + | Text t :: tl -> + bprintf b " %S" t; + f tl + | x -> x + in + let tl = f tl in + bprintf b ")"; + loop tl + | Emph md :: tl -> + Buffer.add_string b "(Emph"; + loop md; + Buffer.add_string b ")"; + loop tl + | Bold md :: tl -> + Buffer.add_string b "(Bold"; + loop md; + Buffer.add_string b ")"; + loop tl + | Ol l :: tl -> + bprintf b "(Ol"; + List.iter(fun li -> bprintf b "(Li "; loop li; bprintf b ")") l; + bprintf b ")"; + loop tl + | Ul l :: tl -> + bprintf b "(Ul"; + List.iter(fun li -> bprintf b "(Li "; loop li;bprintf b ")") l; + bprintf b ")"; + loop tl + | Olp l :: tl -> + bprintf b "(Olp"; + List.iter(fun li -> bprintf b "(Li "; loop li; bprintf b ")") l; + bprintf b ")"; + loop tl + | Ulp l :: tl -> + bprintf b "(Ulp"; + List.iter(fun li -> bprintf b "(Li "; loop li;bprintf b ")") l; + bprintf b ")"; + loop tl + | Code(lang, c) :: tl -> + bprintf b "(Code %S)" c; + loop tl + | Code_block(lang, c) :: tl -> + bprintf b "(Code_block %s)" c; + loop tl + | Br :: tl -> + Buffer.add_string b "(Br)"; + loop tl + | Hr :: tl -> + Buffer.add_string b "(Hr)"; + loop tl + | Raw s :: tl -> + bprintf b "(Raw %S)" s; + loop tl + | Raw_block s :: tl -> + bprintf b "(Raw_block %S)" s; + loop tl + | Html(tagname, attrs, body) :: tl -> + bprintf b "(Html %s %s " tagname (string_of_attrs attrs); + loop body; + bprintf b ")"; + loop tl + | Html_block(tagname, attrs, body) :: tl -> + bprintf b "(Html_block %s %s " tagname (string_of_attrs attrs); + loop body; + bprintf b ")"; + loop tl + | Html_comment s :: tl -> + bprintf b "(Html_comment %S)" s; + loop tl + | Url (href,s,title) :: tl -> + bprintf b "(Url %S %S %S)" href (html_of_md s) title; + loop tl + | H1 md :: tl -> + Buffer.add_string b "(H1"; + loop md; + Buffer.add_string b ")"; + loop tl + | H2 md :: tl -> + Buffer.add_string b "(H2"; + loop md; + Buffer.add_string b ")"; + loop tl + | H3 md :: tl -> + Buffer.add_string b "(H3"; + loop md; + Buffer.add_string b ")"; + loop tl + | H4 md :: tl -> + Buffer.add_string b "(H4"; + loop md; + Buffer.add_string b ")"; + loop tl + | H5 md :: tl -> + Buffer.add_string b "(H5"; + loop md; + Buffer.add_string b ")"; + loop tl + | H6 md :: tl -> + Buffer.add_string b "(H6"; + loop md; + Buffer.add_string b ")"; + loop tl + | NL :: tl -> + Buffer.add_string b "(NL)"; + loop tl + | [] -> () + in + loop md; + Buffer.contents b + + +let escape_markdown_characters s = + let b = Buffer.create (String.length s * 2) in + for i = 0 to String.length s - 1 do + match s.[i] with + | '.' as c -> + if i > 0 && + match s.[i-1] with + | '0' .. '9' -> i+1 < String.length s && s.[i+1] = ' ' + | _ -> false + then + Buffer.add_char b '\\'; + Buffer.add_char b c + | '-' as c -> + if (i = 0 || match s.[i-1] with ' '| '\n' -> true | _ -> false) + && (i+1 < String.length s && (s.[i+1] = ' '||s.[i+1] = '-')) + then + Buffer.add_char b '\\'; + Buffer.add_char b c + | '+' as c -> + if (i = 0 || match s.[i-1] with ' '| '\n' -> true | _ -> false) + && (i+1 < String.length s && s.[i+1] = ' ') + then + Buffer.add_char b '\\'; + Buffer.add_char b c + | '!' as c -> + if i+1 < String.length s && s.[i+1] = '[' then + Buffer.add_char b '\\'; + Buffer.add_char b c + | '<' as c -> + if i <> String.length s - 1 && + (match s.[i+1] with 'a' .. 'z' | 'A' .. 'Z' -> false | _ -> true) + then + Buffer.add_char b '\\'; + Buffer.add_char b c + | '>' as c -> + if i = 0 || + (match s.[i-1] with ' ' | '\n' -> false | _ -> true) + then + Buffer.add_char b '\\'; + Buffer.add_char b c + | '#' as c -> + if i = 0 || s.[i-1] = '\n' then + Buffer.add_char b '\\'; + Buffer.add_char b c + | '\\' | '[' | ']' | '(' | ')' | '`' | '*' as c -> + Buffer.add_char b '\\'; + Buffer.add_char b c + | c -> + Buffer.add_char b c + done; + Buffer.contents b + +let rec markdown_of_md md = + if debug then eprintf "(OMD) markdown_of_md(%S)\n%!" (sexpr_of_md md); + let quote ?(indent=0) s = + let b = Buffer.create (String.length s) in + let l = String.length s in + let rec loop nl i = + if i < l then + begin + if nl && i < l - 1 then + (for i = 1 to indent do + Buffer.add_char b ' ' + done; + Buffer.add_string b "> "); + match s.[i] with + | '\n' -> + Buffer.add_char b '\n'; + loop true (succ i) + | c -> + Buffer.add_char b c; + loop false (succ i) + end + else + Buffer.contents b + in loop true 0 + in + let b = Buffer.create 64 in + let add_spaces n = for i = 1 to n do Buffer.add_char b ' ' done in + let references = ref None in + let rec loop ?(fst_p_in_li=true) ?(is_in_list=false) list_indent l = + (* [list_indent: int] is the indentation level in number of spaces. *) + (* [is_in_list: bool] is necessary to know if we are inside a paragraph + which is inside a list item because those need to be indented! *) + let loop ?(fst_p_in_li=fst_p_in_li) ?(is_in_list=is_in_list) list_indent l = + loop ~fst_p_in_li:fst_p_in_li ~is_in_list:is_in_list list_indent l + in + match l with + | X x :: tl -> + (match x#to_t md with + | Some t -> loop list_indent t + | None -> + match x#to_html ~indent:0 html_of_md md with + | Some s -> Buffer.add_string b s + | None -> ()); + loop list_indent tl + | Blockquote q :: tl -> + Buffer.add_string b (quote ~indent:list_indent (markdown_of_md q)); + if tl <> [] then Buffer.add_string b "\n"; + loop list_indent tl + | Ref(rc, name, text, fallback) :: tl -> + if !references = None then references := Some rc; + loop list_indent (Raw(fallback#to_string)::tl) + | Img_ref(rc, name, alt, fallback) :: tl -> + if !references = None then references := Some rc; + loop list_indent (Raw(fallback#to_string)::tl) + | Paragraph [] :: tl -> loop list_indent tl + | Paragraph md :: tl -> + if is_in_list then + if fst_p_in_li then + add_spaces (list_indent-2) + else + add_spaces list_indent; + loop ~fst_p_in_li:false list_indent md; + Printf.bprintf b "\n\n"; + loop ~fst_p_in_li:false list_indent tl + | Img(alt, src, title) :: tl -> + Printf.bprintf b "![%s](%s \"%s\")" alt src title; + loop list_indent tl + | Text t :: tl -> + Printf.bprintf b "%s" (escape_markdown_characters t); + loop list_indent tl + | Emph md :: tl -> + Buffer.add_string b "*"; + loop list_indent md; + Buffer.add_string b "*"; + loop list_indent tl + | Bold md :: tl -> + Buffer.add_string b "**"; + loop list_indent md; + Buffer.add_string b "**"; + loop list_indent tl + | Ol l :: tl -> + if Buffer.length b > 0 && Buffer.nth b (Buffer.length b - 1) <> '\n' then + Buffer.add_char b '\n'; + let c = ref 0 in (* don't use List.iteri because it's not in 3.12 *) + List.iter(fun li -> + incr c; + add_spaces list_indent; + Printf.bprintf b "%d. " !c; + loop ~is_in_list:true (list_indent+4) li; + Buffer.add_char b '\n'; + ) l; + if list_indent = 0 then Buffer.add_char b '\n'; + loop list_indent tl + | Ul l :: tl -> + if Buffer.length b > 0 && Buffer.nth b (Buffer.length b - 1) <> '\n' then + Buffer.add_char b '\n'; + List.iter(fun li -> + add_spaces list_indent; + Printf.bprintf b "- "; + loop ~is_in_list:true (list_indent+4) li; + Buffer.add_char b '\n'; + ) l; + if list_indent = 0 then Buffer.add_char b '\n'; + loop list_indent tl + | Olp l :: tl -> + let c = ref 0 in (* don't use List.iteri because it's not in 3.12 *) + List.iter(fun li -> + if Buffer.length b > 0 && Buffer.nth b (Buffer.length b - 1) <> '\n' + then Buffer.add_char b '\n'; + add_spaces list_indent; + incr c; + bprintf b "%d. " !c; + loop ~is_in_list:true (list_indent+4) li; + (* Paragraphs => No need of '\n' *) + ) l; + loop list_indent tl + | Ulp l :: tl -> + List.iter(fun li -> + if Buffer.length b > 0 && Buffer.nth b (Buffer.length b - 1) <> '\n' + then Buffer.add_char b '\n'; + add_spaces list_indent; + bprintf b "+ "; + loop ~is_in_list:true (list_indent+4) li; + (* Paragraphs => No need of '\n' *) + ) l; + begin match tl with + | (H1 _ | H2 _ | H3 _ | H4 _ | H5 _ | H6 _)::_ + | NL::(H1 _ | H2 _ | H3 _ | H4 _ | H5 _ | H6 _)::_ -> + Buffer.add_char b '\n' + | _ -> () + end; + loop list_indent tl + | Code(_lang, c) :: tl -> (* FIXME *) + let n = (* compute how many backquotes we need to use *) + let filter (n:int) (s:int list) = + if n > 0 && n < 10 then + List.filter (fun e -> e <> n) s + else + s + in + let l = String.length c in + let rec loop s x b i = + if i = l then + match filter b s with + | hd::_ -> hd + | [] -> x+1 + else + match c.[i] with + | '`' -> loop s x (succ b) (succ i) + | _ -> loop (filter b s) (max b x) 0 (succ i) + in + loop [1;2;3;4;5;6;7;8;9;10] 0 0 0 + in + begin + Printf.bprintf b "%s" (String.make n '`'); + if c.[0] = '`' then Buffer.add_char b ' '; + Printf.bprintf b "%s" c; + if c.[String.length c - 1] = '`' then Buffer.add_char b ' '; + Printf.bprintf b "%s" (String.make n '`'); + end; + loop list_indent tl + | Code_block(lang, c) :: tl -> + let n = (* compute how many backquotes we need to use *) + let filter n s = + if n > 0 && n < 10 then + List.filter (fun e -> e <> n) s + else + s + in + let l = String.length c in + let rec loop s b i = + if i = l then + match filter b s with + | hd::_ -> hd + | [] -> 0 + else + match c.[i] with + | '`' -> loop s (succ b) (succ i) + | _ -> loop (filter b s) 0 (succ i) + in + loop [3;4;5;6;7;8;9;10] 0 0 + in + let output_indented_block n s = + let rec loop p i = + if i = String.length s then + () + else + match p with + | '\n' -> + Printf.bprintf b "%s" (String.make n ' '); + Buffer.add_char b s.[i]; + loop s.[i] (succ i) + | _ -> + Buffer.add_char b s.[i]; + loop s.[i] (succ i) + in loop '\n' 0 + in + if n = 0 then (* FIXME *) + begin + (* case where we can't use backquotes *) + Buffer.add_char b '\n'; + output_indented_block (4+list_indent) c; + if tl <> [] then Buffer.add_string b "\n\n" + end + else + begin + Buffer.add_string b (String.make (list_indent) ' '); + Printf.bprintf b "%s%s\n" (String.make n '`') + (if lang = "" then !default_language else lang); + output_indented_block (list_indent) c; + if Buffer.nth b (Buffer.length b - 1) <> '\n' then + Buffer.add_char b '\n'; + Buffer.add_string b (String.make (list_indent) ' '); + Printf.bprintf b "%s\n" (String.make n '`'); + end; + loop list_indent tl + | Br :: tl -> + Buffer.add_string b "
"; + loop list_indent tl + | Hr :: tl -> + Buffer.add_string b "* * *\n"; + loop list_indent tl + | Raw s :: tl -> + Buffer.add_string b s; + loop list_indent tl + | Raw_block s :: tl -> + Buffer.add_char b '\n'; + Buffer.add_string b s; + Buffer.add_char b '\n'; + loop list_indent tl + | Html(tagname, attrs, []) :: tl + when StringSet.mem tagname html_void_elements -> + Printf.bprintf b "<%s" tagname; + Buffer.add_string b (string_of_attrs attrs); + Buffer.add_string b " />"; + loop list_indent tl + | Html(tagname, attrs, body) :: tl -> + let a = filter_text_omd_rev attrs in + Printf.bprintf b "<%s" tagname; + Buffer.add_string b (string_of_attrs a); + Buffer.add_string b ">"; + if a == attrs then + loop list_indent body + else + Buffer.add_string b (html_of_md body); + Printf.bprintf b "" tagname; + loop list_indent tl + | (Html_block(tagname, attrs, body))::tl -> + let needs_newlines = + match tl with + | NL :: Paragraph p :: _ + | Paragraph p :: _ -> p <> [] + | (H1 _ | H2 _ | H3 _ | H4 _ | H5 _ | H6 _ + | Ul _ | Ol _ | Ulp _ | Olp _ | Code (_, _) | Code_block (_, _) + | Text _ | Emph _ | Bold _ | Br |Hr | Url (_, _, _) + | Ref (_, _, _, _) | Img_ref (_, _, _, _) + | Html (_, _, _) + | Blockquote _ | Img (_, _, _)) :: _ -> true + | ( Html_block (_, _, _) | Html_comment _ + | Raw _|Raw_block _) :: _-> false + | X _ :: _ -> false + | NL :: _ -> false + | [] -> false + in + if body = [] && StringSet.mem tagname html_void_elements then + ( + Printf.bprintf b "<%s" tagname; + Buffer.add_string b (string_of_attrs attrs); + Buffer.add_string b " />"; + if needs_newlines then Buffer.add_string b "\n\n"; + loop list_indent tl + ) + else + ( + let a = filter_text_omd_rev attrs in + Printf.bprintf b "<%s" tagname; + Buffer.add_string b (string_of_attrs a); + Buffer.add_string b ">"; + if a == attrs then + loop list_indent body + else + Buffer.add_string b (html_of_md body); + Printf.bprintf b "" tagname; + if needs_newlines then Buffer.add_string b "\n\n"; + loop list_indent tl + ) + | Html_comment s :: tl -> + Buffer.add_string b s; + loop list_indent tl + | Url (href,s,title) :: tl -> + if title = "" then + bprintf b "[%s](%s)" (markdown_of_md s) href + else + bprintf b "[%s](%s \"%s\")" (markdown_of_md s) href title; + loop list_indent tl + | H1 md :: tl -> + Buffer.add_string b "# "; + loop list_indent md; + Buffer.add_string b "\n"; + loop list_indent tl + | H2 md :: tl -> + Buffer.add_string b "## "; + loop list_indent md; + Buffer.add_string b "\n"; + loop list_indent tl + | H3 md :: tl -> + Buffer.add_string b "### "; + loop list_indent md; + Buffer.add_string b "\n"; + loop list_indent tl + | H4 md :: tl -> + Buffer.add_string b "#### "; + loop list_indent md; + Buffer.add_string b "\n"; + loop list_indent tl + | H5 md :: tl -> + Buffer.add_string b "##### "; + loop list_indent md; + Buffer.add_string b "\n"; + loop list_indent tl + | H6 md :: tl -> + Buffer.add_string b "###### "; + loop list_indent md; + Buffer.add_string b "\n"; + loop list_indent tl + | NL :: tl -> + if Buffer.length b = 1 + || (Buffer.length b > 1 && + not(Buffer.nth b (Buffer.length b - 1) = '\n' + && Buffer.nth b (Buffer.length b - 2) = '\n')) + then + Buffer.add_string b "\n"; + loop list_indent tl + | [] -> () + in + loop 0 md; + begin match !references with + | None -> () + | Some r -> + Buffer.add_char b '\n'; + List.iter + (fun (name, (url, title)) -> + if title = "" then + bprintf b "[%s]: %s \n" name url + else + bprintf b "[%s]: %s \"%s\"\n" name url title + ) + r#get_all + end; + let res = Buffer.contents b in + if debug then + eprintf "(OMD) markdown_of_md(%S) => %S\n%!" + (sexpr_of_md md) res; + res diff --git a/ocaml-lsp-server/src/omd/src/omd_backend.mli b/ocaml-lsp-server/src/omd/src/omd_backend.mli new file mode 100644 index 000000000..cbdad2e56 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_backend.mli @@ -0,0 +1,97 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +type code_stylist = lang:string -> string -> string +(** Function that takes a language name and some code and returns + that code with style. *) + +val default_language : string ref +(** default language for code blocks can be set to any name, + by default it is the empty string *) + +val html_of_md : + ?override:(Omd_representation.element -> string option) -> + ?pindent:bool -> + ?nl2br:bool -> + ?cs:code_stylist -> + Omd_representation.t -> string +(** [html_of_md md] returns a string containing the HTML version of + [md]. Note that [md] uses the internal representation of + Markdown. + + The optional parameter [override] allows to override an precise + behaviour for a constructor of Omd_representation.element, + as in the following example: + +let customized_to_html = + Omd.html_of_md + ~override:(function + | Url (href,s,title) -> + Some(" "" then + " title='" ^ (Omd_utils.htmlentities ~md:true title) ^ "'" + else "") + ^ ">" + ^ Omd_backend.html_of_md s ^ " target='_blank'") + | _ -> None) + *) + +val headers_of_md : + ?remove_header_links:bool -> + Omd_representation.t -> + (Omd_representation.element * string * string) list +(** [headers_of_md md] returns a list of 3-tuples; in each of them the + first element is the header (e.g., [H1(foo)]), the second is the + HTML id (as produced by [html_of_md]), and the third element is + the HTML version of [foo]. The third elements of those 3-tuples + exist because if you use [html_and_headers_of_md], then you have + the guarantee that the HTML version of [foo] is the same for + both the headers and the HTML version of [md]. + If [remove_header_links], then remove links inside headers (h1, h2, ...). + Default value of [remove_header_links]: cf. [html_and_headers_of_md]. + *) + +val html_and_headers_of_md : + ?remove_header_links:bool -> + ?override:(Omd_representation.element -> string option) -> + ?pindent:bool -> + ?nl2br:bool -> + ?cs:code_stylist -> + Omd_representation.t -> + string * + (Omd_representation.element * Omd_utils.StringSet.elt * string) list +(** [html_and_headers_of_md md] is the same as [(html_of_md md, + headers_of_md md)] except that it's two times faster. + If you need both headers and html, don't use [html_of_md] + and [headers_of_md] but this function instead. + If [remove_header_links], then remove links inside headers (h1, h2, ...). + Default value of [remove_header_links]: false. +*) + +val escape_markdown_characters : string -> string +(** [escape_markdown_characters s] returns a string where + markdown-significant characters in [s] have been + backslash-escaped. Note that [escape_markdown_characters] takes a + "raw" string, therefore it doesn't have the whole context in which + the string appears, thus the escaping cannot really be + minimal. However the implementation tries to minimalise the extra + escaping. *) + +val text_of_md : Omd_representation.t -> string +(** [text_of_md md] is basically the same as [html_of_md md] but without + the HTML tags in the output. *) + +val markdown_of_md : Omd_representation.t -> string +(** [markdown_of_md md] is basically the same as [html_of_md md] but + with the output in Markdown syntax rather than HTML. *) + +val sexpr_of_md : Omd_representation.t -> string +(** [sexpr_of_md md] is basically the same as [html_of_md md] but with + the output in s-expressions rather than HTML. This is mainly used + for debugging. *) + diff --git a/ocaml-lsp-server/src/omd/src/omd_html.ml b/ocaml-lsp-server/src/omd/src/omd_html.ml new file mode 100644 index 000000000..a4bc7406f --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_html.ml @@ -0,0 +1,61 @@ +(***********************************************************************) +(* OMD: Markdown tool in OCaml *) +(* (c) 2014 by Philippe Wang *) +(* Licence: ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +type html = html_node list + +and html_node = + | Node of nodename * attributes * html + | Data of string + | Rawdata of string + | Comment of string + +and nodename = string + +and attributes = attribute list + +and attribute = string * string option + +let to_string html = + let b = Buffer.create 1024 in + let pp f = Printf.bprintf b f in + let rec loop = function + | Node(nodename, attributes, html) -> + pp "<%s" nodename; + ppa attributes; + pp ">"; + List.iter loop html; + pp "" nodename + | Data s -> pp "%s" s + | Rawdata s -> pp "%s" s + | Comment c -> pp "" c + and ppa attrs = + List.iter + (function + | (a, Some v) -> + if not (String.contains v '\'') then + pp " %s='%s'" a v + else if not (String.contains v '"') then + pp " %s=\"%s\"" a v + else + ( + pp " %s=\"" a; + for i = 0 to String.length v - 1 do + match v.[i] with + | '"' -> pp """ + | c -> pp "%c" c + done; + pp "\"" + ) + | a, None -> + Printf.bprintf b " %s=''" a (* HTML5 *) + ) + attrs + in + List.iter loop html; + Buffer.contents b + + diff --git a/ocaml-lsp-server/src/omd/src/omd_lexer.ml b/ocaml-lsp-server/src/omd/src/omd_lexer.ml new file mode 100644 index 000000000..36badeef2 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_lexer.ml @@ -0,0 +1,399 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +(* Implementation notes ********************************************* + + * - This module should depend on OCaml's standard library only and + * should be as 'pure OCaml' (i.e. depend as least as possible on + * external tools) as possible. + + * - `while' loops are sometimes preferred to recursion because this + * may be used on systems where tail recursion is not well + * supported. (I tried to write "while" as often as possible, but it + * turned out that it was pretty inconvenient, so I do use + * recursion. When I have time, I'll do some tests and see if I + * need to convert recursive loops into iterative loops. Sorry if it + * makes it harder to read.) + +*) + +(* class type tag = object method is_me : 'a. 'a -> bool end *) + +open Omd_representation + +type token = Omd_representation.tok +type t = Omd_representation.tok list + +let string_of_token = function + | Tag (name, o) -> + if Omd_utils.debug then "TAG("^name^")" ^ o#to_string else o#to_string + | Ampersand -> "&" + | Ampersands n -> assert (n >= 0); String.make (2+n) '&' + | At -> "@" + | Ats n -> assert (n >= 0); String.make (2+n) '@' + | Backquote -> "`" + | Backquotes n -> assert (n >= 0); String.make (2+n) '`' + | Backslash -> "\\" + | Backslashs n -> assert (n >= 0); String.make (2+n) '\\' + | Bar -> "|" + | Bars n -> assert (n >= 0); String.make (2+n) '|' + | Caret -> "^" + | Carets n -> assert (n >= 0); String.make (2+n) '^' + | Cbrace -> "}" + | Cbraces n -> assert (n >= 0); String.make (2+n) '}' + | Colon -> ":" + | Colons n -> assert (n >= 0); String.make (2+n) ':' + | Comma -> "," + | Commas n -> assert (n >= 0); String.make (2+n) ',' + | Cparenthesis -> ")" + | Cparenthesiss n -> assert (n >= 0); String.make (2+n) ')' + | Cbracket -> "]" + | Cbrackets n -> assert (n >= 0); String.make (2+n) ']' + | Dollar -> "$" + | Dollars n -> assert (n >= 0); String.make (2+n) '$' + | Dot -> "." + | Dots n -> assert (n >= 0); String.make (2+n) '.' + | Doublequote -> "\"" + | Doublequotes n -> assert (n >= 0); String.make (2+n) '"' + | Exclamation -> "!" + | Exclamations n -> assert (n >= 0); String.make (2+n) '!' + | Equal -> "=" + | Equals n -> assert (n >= 0); String.make (2+n) '=' + | Greaterthan -> ">" + | Greaterthans n -> assert (n >= 0); String.make (2+n) '>' + | Hash -> "#" + | Hashs n -> assert (n >= 0); String.make (2+n) '#' + | Lessthan -> "<" + | Lessthans n -> assert (n >= 0); String.make (2+n) '<' + | Minus -> "-" + | Minuss n -> assert (n >= 0); String.make (2+n) '-' + | Newline -> "\n" + | Newlines n -> assert (n >= 0); String.make (2+n) '\n' + | Number s -> s + | Obrace -> "{" + | Obraces n -> assert (n >= 0); String.make (2+n) '{' + | Oparenthesis -> "(" + | Oparenthesiss n -> assert (n >= 0); String.make (2+n) '(' + | Obracket -> "[" + | Obrackets n -> assert (n >= 0); String.make (2+n) '[' + | Percent -> "%" + | Percents n -> assert (n >= 0); String.make (2+n) '%' + | Plus -> "+" + | Pluss n -> assert (n >= 0); String.make (2+n) '+' + | Question -> "?" + | Questions n -> assert (n >= 0); String.make (2+n) '?' + | Quote -> "'" + | Quotes n -> assert (n >= 0); String.make (2+n) '\'' + | Semicolon -> ";" + | Semicolons n -> assert (n >= 0); String.make (2+n) ';' + | Slash -> "/" + | Slashs n -> assert (n >= 0); String.make (2+n) '/' + | Space -> " " + | Spaces n -> assert (n >= 0); String.make (2+n) ' ' + | Star -> "*" + | Stars n -> assert (n >= 0); String.make (2+n) '*' + | Tab -> " " + | Tabs n -> assert (n >= 0); String.make ((2+n)*4) ' ' + | Tilde -> "~" + | Tildes n -> assert (n >= 0); String.make (2+n) '~' + | Underscore -> "_" + | Underscores n -> assert (n >= 0); String.make (2+n) '_' + | Word s -> s + + +let size_and_newlines = function + | Tag _ -> (0, 0) + | Ampersand | At | Backquote | Backslash | Bar | Caret | Cbrace + | Colon | Comma | Cparenthesis | Cbracket | Dollar | Dot + | Doublequote | Exclamation | Equal | Greaterthan | Hash | Lessthan + | Minus | Obrace | Oparenthesis | Obracket | Percent | Plus + | Question | Quote | Semicolon | Slash | Space | Star | Tab + | Tilde | Underscore -> (1, 0) + | Ampersands x | Ats x | Backquotes x | Backslashs x | Bars x | Carets x + | Cbraces x | Colons x | Commas x | Cparenthesiss x | Cbrackets x + | Dollars x | Dots x + | Doublequotes x | Exclamations x | Equals x | Greaterthans x | Hashs x + | Lessthans x + | Minuss x | Obraces x | Oparenthesiss x | Obrackets x | Percents x | Pluss x + | Questions x | Quotes x | Semicolons x | Slashs x | Spaces x | Stars x + | Tabs x + | Tildes x | Underscores x -> (2+x, 0) + | Newline -> (0, 1) + | Newlines x -> (0, 2+x) + | Number s | Word s -> (String.length s, 0) + +let length t = + let c, nl = size_and_newlines t in + c + nl + +let split_first = function + | Ampersands n -> Ampersand, (if n > 0 then Ampersands(n-1) else Ampersand) + | Ats n -> At, (if n > 0 then Ats(n-1) else At) + | Backquotes n -> Backquote, (if n > 0 then Backquotes(n-1) else Backquote) + | Backslashs n -> Backslash, (if n > 0 then Backslashs(n-1) else Backslash) + | Bars n -> Bar, (if n > 0 then Bars(n-1) else Bar) + | Carets n -> Caret, (if n > 0 then Carets(n-1) else Caret) + | Cbraces n -> Cbrace, (if n > 0 then Cbraces(n-1) else Cbrace) + | Colons n -> Colon, (if n > 0 then Colons(n-1) else Colon) + | Commas n -> Comma, (if n > 0 then Commas(n-1) else Comma) + | Cparenthesiss n -> Cparenthesis, (if n > 0 then Cparenthesiss(n-1) + else Cparenthesis) + | Cbrackets n -> Cbracket, (if n > 0 then Cbrackets(n-1) else Cbracket) + | Dollars n -> Dollar, (if n > 0 then Dollars(n-1) else Dollar) + | Dots n -> Dot, (if n > 0 then Dots(n-1) else Dot) + | Doublequotes n -> Doublequote, (if n > 0 then Doublequotes(n-1) + else Doublequote) + | Exclamations n -> Exclamation, (if n > 0 then Exclamations(n-1) + else Exclamation) + | Equals n -> Equal, (if n > 0 then Equals(n-1) else Equal) + | Greaterthans n -> Greaterthan, (if n > 0 then Greaterthans(n-1) + else Greaterthan) + | Hashs n -> Hash, (if n > 0 then Hashs(n-1) else Hash) + | Lessthans n -> Lessthan, (if n > 0 then Lessthans(n-1) else Lessthan) + | Minuss n -> Minus, (if n > 0 then Minuss(n-1) else Minus) + | Newlines n -> Newline, (if n > 0 then Newlines(n-1) else Newline) + | Obraces n -> Obrace, (if n > 0 then Obraces(n-1) else Obrace) + | Oparenthesiss n -> Oparenthesis, (if n > 0 then Oparenthesiss(n-1) + else Oparenthesis) + | Obrackets n -> Obracket, (if n > 0 then Obrackets(n-1) else Obracket) + | Percents n -> Percent, (if n > 0 then Percents(n-1) else Percent) + | Pluss n -> Plus, (if n > 0 then Pluss(n-1) else Plus) + | Questions n -> Question, (if n > 0 then Questions(n-1) else Question) + | Quotes n -> Quote, (if n > 0 then Quotes(n-1) else Quote) + | Semicolons n -> Semicolon, (if n > 0 then Semicolons(n-1) else Semicolon) + | Slashs n -> Slash, (if n > 0 then Slashs(n-1) else Slash) + | Spaces n -> Space, (if n > 0 then Spaces(n-1) else Space) + | Stars n -> Star, (if n > 0 then Stars(n-1) else Star) + | Tabs n -> Tab, (if n > 0 then Tabs(n-1) else Tab) + | Tildes n -> Tilde, (if n > 0 then Tildes(n-1) else Tilde) + | Underscores n -> Underscore, (if n > 0 then Underscores(n-1) + else Underscore) + | Ampersand | At | Backquote | Backslash | Bar | Caret | Cbrace | Colon + | Comma | Cparenthesis | Cbracket | Dollar | Dot | Doublequote + | Exclamation | Equal | Greaterthan | Hash | Lessthan | Minus + | Newline | Number _ | Obrace | Oparenthesis | Obracket | Percent + | Plus | Question | Quote | Semicolon | Slash | Space | Star | Tab + | Tilde | Underscore | Tag _ | Word _ -> + invalid_arg "Omd_lexer.split_first" + +module type Input = +sig + type t + val length : t -> int + val get : t -> int -> char + val sub : t -> pos:int -> len:int -> string +end + +module Lex(I : Input) : +sig + val lex : I.t -> t +end = +struct + let lex (s : I.t) = + let result = ref [] in + let i = ref 0 in + let l = I.length s in + let rcount c = + (* [rcount c] returns the number of immediate consecutive + occurrences of [c]. By side-effect, it increases the reference + counter [i]. *) + let rec loop r = + if !i = l then r + else if I.get s !i = c then (incr i; loop (r+1)) + else r + in + loop 1 + in + let word () = + let start = !i in + let rec loop () = + begin + if !i = l then + Word(I.sub s ~pos:start ~len:(!i-start)) + else + match I.get s !i with + | ' ' | '\t' | '\n' | '\r' | '#' | '*' | '-' | '+' | '`' | '\'' + | '"' | '\\' | '_' | '[' | ']' | '{' | '}' | '(' | ')' | ':' + | ';' | '>' | '~' | '<' | '@' | '&' | '|' | '^' | '.' | '/' + | '$' | '%' | '!' | '?' | '=' -> + Word(I.sub s ~pos:start ~len:(!i-start)) + | c -> incr i; loop() + end + in + loop() + in + let maybe_number () = + let start = !i in + while + !i < l && + match I.get s !i with + | '0' .. '9' -> true + | _ -> false + do + incr i + done; + if !i = l then + Number(I.sub s ~pos:start ~len:(!i-start)) + else + begin match I.get s !i with + | ' ' | '\t' | '\n' | '\r' | '#' | '*' | '-' | '+' | '`' | '\'' | '"' + | '\\' | '_' | '[' | ']' | '{' | '}' | '(' | ')' | ':' | ';' | '>' + | '~' | '<' | '@' | '&' | '|' | '^' | '.' | '/' | '$' | '%' | '!' + | '?' | '=' -> + Number(I.sub s ~pos:start ~len:(!i-start)) + | _ -> + i := start; + word() + end + in + + let n_occ c = incr i; rcount c in + + while !i < l do + let c = I.get s !i in + let w = match c with + | ' ' -> let n = n_occ c in if n = 1 then Space else Spaces (n-2) + | '\t' -> let n = n_occ c in if n = 1 then Spaces(2) else Spaces(4*n-2) + | '\n' -> let n = n_occ c in if n = 1 then Newline else Newlines (n-2) + | '\r' -> (* eliminating \r by converting all styles to unix style *) + incr i; + let rec count_rn x = + if !i < l && I.get s (!i) = '\n' then + if !i + 1 < l && I.get s (!i+1) = '\r' then + (i := !i + 2; count_rn (x+1)) + else + x + else + x + in + let rn = 1 + count_rn 0 in + if rn = 1 then + match n_occ c with + | 1 -> Newline + | x -> assert(x>=2); Newlines(x-2) + else + (assert(rn>=2);Newlines(rn-2)) + | '#' -> let n = n_occ c in if n = 1 then Hash else Hashs (n-2) + | '*' -> let n = n_occ c in if n = 1 then Star else Stars (n-2) + | '-' -> let n = n_occ c in if n = 1 then Minus else Minuss (n-2) + | '+' -> let n = n_occ c in if n = 1 then Plus else Pluss (n-2) + | '`' -> let n = n_occ c in if n = 1 then Backquote else Backquotes (n-2) + | '\'' -> let n = n_occ c in if n = 1 then Quote else Quotes (n-2) + | '"' -> let n = n_occ c in if n = 1 then Doublequote + else Doublequotes (n-2) + | '\\' -> let n = n_occ c in if n = 1 then Backslash + else Backslashs (n-2) + | '_' -> let n = n_occ c in if n = 1 then Underscore + else Underscores (n-2) + | '[' -> let n = n_occ c in if n = 1 then Obracket + else Obrackets (n-2) + | ']' -> let n = n_occ c in if n = 1 then Cbracket else Cbrackets (n-2) + | '{' -> let n = n_occ c in if n = 1 then Obrace else Obraces (n-2) + | '}' -> let n = n_occ c in if n = 1 then Cbrace else Cbraces (n-2) + | '(' -> let n = n_occ c in if n = 1 then Oparenthesis + else Oparenthesiss (n-2) + | ')' -> let n = n_occ c in if n = 1 then Cparenthesis + else Cparenthesiss (n-2) + | ':' -> let n = n_occ c in if n = 1 then Colon else Colons (n-2) + | ';' -> let n = n_occ c in if n = 1 then Semicolon else Semicolons (n-2) + | '>' -> let n = n_occ c in if n = 1 then Greaterthan + else Greaterthans (n-2) + | '~' -> let n = n_occ c in if n = 1 then Tilde else Tildes (n-2) + | '<' -> let n = n_occ c in if n = 1 then Lessthan else Lessthans (n-2) + | '@' -> let n = n_occ c in if n = 1 then At else Ats (n-2) + | '&' -> let n = n_occ c in if n = 1 then Ampersand else Ampersands (n-2) + | '|' -> let n = n_occ c in if n = 1 then Bar else Bars (n-2) + | '^' -> let n = n_occ c in if n = 1 then Caret else Carets (n-2) + | ',' -> let n = n_occ c in if n = 1 then Comma else Commas (n-2) + | '.' -> let n = n_occ c in if n = 1 then Dot else Dots (n-2) + | '/' -> let n = n_occ c in if n = 1 then Slash else Slashs (n-2) + | '$' -> let n = n_occ c in if n = 1 then Dollar else Dollars (n-2) + | '%' -> let n = n_occ c in if n = 1 then Percent else Percents (n-2) + | '=' -> let n = n_occ c in if n = 1 then Equal else Equals (n-2) + | '!' -> let n = n_occ c in if n = 1 then Exclamation + else Exclamations (n-2) + | '?' -> let n = n_occ c in if n = 1 then Question else Questions (n-2) + | '0' .. '9' -> maybe_number() + | c -> word() in + result := w :: !result + done; + List.rev !result +end + +module Lex_string = Lex(StringLabels) +let lex = Lex_string.lex + +type bigstring = (char, + Bigarray.int8_unsigned_elt, + Bigarray.c_layout) Bigarray.Array1.t + +module Bigarray_input : Input with type t = bigstring = +struct + module BA = Bigarray + + type t = bigstring + let get = BA.Array1.get + let length = BA.Array1.dim + let sub arr ~pos ~len = + if len < 0 || pos < 0 || pos + len > BA.Array1.dim arr + then invalid_arg "Bigarray_input.sub"; + let s = Bytes.create len in + for i = 0 to len - 1 do + Bytes.unsafe_set s i (BA.Array1.unsafe_get arr (i + pos)) + done; + Bytes.unsafe_to_string s +end +module Lex_bigarray = Lex(Bigarray_input) +let lex_bigarray = Lex_bigarray.lex + +let make_space = function + | 0 -> invalid_arg "Omd_lexer.make_space" + | 1 -> Space + | n -> if n < 0 then invalid_arg "Omd_lexer.make_space" else Spaces (n-2) + + +(* +(** [string_of_tl l] returns the string representation of l. + [estring_of_tl l] returns the escaped string representation of l + (same semantics as [String.escaped (string_of_tl l)]). *) +let string_of_tl, estring_of_tl = + let g escaped tl = + let b = Buffer.create 42 in + let rec loop : 'a t list -> unit = function + | e::tl -> + Buffer.add_string b (if escaped then String.escaped (string_of_t e) + else string_of_t e); + loop tl + | [] -> + () + in + Buffer.contents (loop tl; b) + in g false, g true +*) + +let string_of_tokens tl = + let b = Buffer.create 128 in + List.iter (fun e -> Buffer.add_string b (string_of_token e)) tl; + Buffer.contents b + + +let destring_of_tokens ?(limit=max_int) tl = + let b = Buffer.create 1024 in + let rec loop (i:int) (tlist:tok list) : unit = match tlist with + | e::tl -> + if limit = i then + loop i [] + else + begin + Buffer.add_string b (String.escaped (string_of_token e)); + Buffer.add_string b "::"; + loop (succ i) tl + end + | [] -> + Buffer.add_string b "[]" + in + Buffer.contents (loop 0 tl; b) diff --git a/ocaml-lsp-server/src/omd/src/omd_lexer.mli b/ocaml-lsp-server/src/omd/src/omd_lexer.mli new file mode 100644 index 000000000..18beed6cd --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_lexer.mli @@ -0,0 +1,45 @@ +type token = Omd_representation.tok +type t = token list + +val lex : string -> t +(** Translate a raw string into tokens for the parser. To implement + an extension to the lexer, one may process its result before + giving it to the parser. To implement an extension to the + parser, one may extend it using the constructor [Tag] + from type [tok] and/or using the extensions mechanism + of the parser (cf. the optional argument [extensions]). + The main difference is that [Tag] is processed by the parser + in highest priority whereas functions in [extensions] are applied + with lowest priority. *) + +type bigstring = (char, + Bigarray.int8_unsigned_elt, + Bigarray.c_layout) Bigarray.Array1.t + +val lex_bigarray : bigstring -> t +(** As {!lex}, but read input from a bigarray rather than from a string. *) + +val string_of_tokens : t -> string +(** [string_of_tokens t] return the string corresponding to the token + list [t]. *) + +val length : token -> int +(** [length t] number of characters of the string represented as [t] + (i.e. [String.length(string_of_token t)]). *) + +val string_of_token : token -> string +(** [string_of_token tk] return the string corresponding to the token + [tk]. *) + +val make_space : int -> token + +val split_first : token -> token * token +(** [split_first(Xs n)] returns [(X, X(n-1))] where [X] is a token + carrying an int count. + + @raise Invalid_argument is passed a single token. *) + + +val destring_of_tokens : ?limit:int -> t -> string +(** Converts the tokens to a simple string representation useful for + debugging. *) diff --git a/ocaml-lsp-server/src/omd/src/omd_lexer_fs.ml b/ocaml-lsp-server/src/omd/src/omd_lexer_fs.ml new file mode 100644 index 000000000..37cd7e795 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_lexer_fs.ml @@ -0,0 +1,23 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +(** You should either use this module or Omd_lexer, not both. + This module includes Omd_lexer. +*) + +include Omd_lexer + +let lex_from_inchannel ic = + (* Maintenance-easiness-driven implementation. *) + let ic_content = + let b = Buffer.create 64 in + try while true do + Buffer.add_char b (input_char ic) + done; + assert false + with End_of_file -> Buffer.contents b in + lex ic_content diff --git a/ocaml-lsp-server/src/omd/src/omd_lexer_fs.mli b/ocaml-lsp-server/src/omd/src/omd_lexer_fs.mli new file mode 100644 index 000000000..36852f1dc --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_lexer_fs.mli @@ -0,0 +1,10 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +include module type of Omd_lexer + +val lex_from_inchannel : in_channel -> Omd_representation.tok list diff --git a/ocaml-lsp-server/src/omd/src/omd_main.ml b/ocaml-lsp-server/src/omd/src/omd_main.ml new file mode 100644 index 000000000..45cf48a8d --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_main.ml @@ -0,0 +1,448 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +(** This module implements an end-user interface for OMD. + + Treatments that are not specific to Markdown (such as table of + contents generation) are done here. If you want to build an + alternative end-user Markdown tool using OMD, you might want to + fork this file or get inspiration from it. + + Happy coding! +*) + +open Omd + +let remove_comments l = + let open Omd_representation in + let rec loop = function + | true, Exclamations n :: tl when n > 0 -> + loop (true, + Omd_utils.eat (function Newline|Newlines _ -> false|_-> true) tl) + | _, (Newline|Newlines _ as e)::tl -> + e::loop (true, tl) + | _, e::tl -> + e::loop (false, tl) + | _, [] -> [] + in loop (true, l) + +let remove_endline_comments l = + let open Omd_representation in + let rec loop = function + | Backslash :: (Exclamations n as e) :: tl when n > 0 -> + e :: loop tl + | Backslashs b :: (Exclamations n as e) :: tl when n > 0 && b mod 2 = 1 -> + Backslashs(b-1) :: e :: loop tl + | Exclamations n :: tl when n > 0 -> + loop (Omd_utils.eat (function Newline|Newlines _ -> false|_-> true) tl) + | e::tl -> + e::loop tl + | [] -> [] + in loop l + + +let preprocess_functions = ref [] + +(** [a += b] is a shortcut for [a := b :: !a] // NON-EXPORTED *) +let (+=) a b = a := b :: !a + +let preprocess l = + List.fold_left (fun r e -> e r) + l + !preprocess_functions + +let otoc = ref false + +let toc = ref false + +let omarkdown = ref false + +let notags = ref false + +let toc_depth = ref 2 + +let toc_start = ref([]: int list) + +let nl2br = ref false + +let protect_html_comments = ref false + +let code_stylist = + let module M = Map.Make(String) in +object + val mutable stylists = + M.empty + method style ~lang code = + try (M.find lang stylists) code + with Not_found -> + try (M.find "_" stylists) code + with Not_found -> code + method register ~lang stylist = + stylists <- M.add lang stylist stylists +end + +let code_stylist_of_program p = + fun code -> + let tmp1 = Filename.temp_file "code" "bef" in + let tmp2 = Filename.temp_file "code" "aft" in + let () = at_exit (fun () -> Sys.remove tmp1; Sys.remove tmp2) in + let otmp1 = open_out_bin tmp1 in + Printf.fprintf otmp1 "%s%!" code; + close_out otmp1; + match Sys.command (Printf.sprintf "( cat %s | %s ) > %s" tmp1 p tmp2) with + | 0 -> + let cat f = + let ic = open_in f in + let b = Buffer.create 64 in + try + while true do + Buffer.add_char b (input_char ic) + done; + assert false + with End_of_file -> Buffer.contents b + in + cat tmp2 + | _ -> code + +let register_code_stylist_of_program x = + try + let i = String.index x '=' in + code_stylist#register + ~lang:(String.sub x 0 i) + (code_stylist_of_program + (String.sub x (i+1) (String.length x - (i+1)))) + with Not_found | Invalid_argument _ -> + Printf.eprintf "Error: Something wrong with [-r %s]\n" x; + exit 1 + +let register_default_language l = + Omd_backend.default_language := l + + +(* HTML comments might contain some double-dash (--) that are not well + treated by HTML parsers. For instance "" should be + translated to "" when we want to ensure that + the generated HTML is correct! *) +let patch_html_comments l = + let htmlcomments s = + let b = Buffer.create (String.length s) in + for i = 0 to 3 do + Buffer.add_char b s.[i] + done; + for i = 4 to String.length s - 4 do + match s.[i] with + | '-' as c -> + if (i > 4 && s.[i-1] = '-') + || (i < String.length s - 5 && s.[i+1] = '-') + then + Printf.bprintf b "&#%d;" (int_of_char c) + else + Buffer.add_char b c + | c -> Buffer.add_char b c + done; + for i = String.length s - 3 to String.length s - 1 do + Buffer.add_char b s.[i] + done; + Buffer.contents b + in + let rec loop accu = function + | Html_comment s :: tl -> + loop (Html_comment(htmlcomments s)::accu) tl + | e :: tl -> + loop (e :: accu) tl + | [] -> List.rev accu + in loop [] l + + +let tag_toc l = + let open Omd_representation in + let x = + object(self) + (* [shield] is used to prevent endless loops. + If one wants to use system threads at some point, + and calls methods of this object concurrently, + then there is a real problem. *) + val remove = fun e md -> + visit + (function X(v) when v==e-> Some[] | _ -> None) + md + method name = "toc" + method to_html ?indent:_ f md = + let r = f (Omd.toc(remove self md)) in + Some r + method to_sexpr f md = + let r = f (Omd.toc(remove self md)) in + Some r + method to_t md = + let r = (Omd.toc(remove self md)) in + Some r + end + in + let rec loop = function + | Star:: + Word "Table"::Space:: + Word "of"::Space:: + Word "contents"::Star::tl -> + Tag("tag_toc", + object + method parser_extension r p l = + Some(X(x)::r,p,l) + method to_string = "" + end + ) :: loop tl + | e::tl -> e::loop tl + | [] -> [] + in loop l + + + +let split_comma_int_list s = + if s = "" then [] + else ( + let l = ref [] in + let i = ref 0 in + try + while true do + let j = String.index_from s !i ',' in + l := int_of_string(String.sub s !i (j - !i)) :: !l; + i := j + 1 + done; + assert false + with Not_found -> + l := (int_of_string(String.sub s !i (String.length s - !i))) :: !l; + List.rev !l + ) + +module E = Omd_parser.Default_env(struct end) + +let omd_gh_uemph_or_bold_style = + ref E.gh_uemph_or_bold_style +let omd_blind_html = + ref E.blind_html +let omd_strict_html = + ref E.strict_html +let omd_warning = ref E.warning +let omd_warn_error = ref E.warn_error + +let list_html_tags ~inline = + let module Parser = Omd_parser.Make(E) + in + if inline then + Omd_utils.StringSet.iter + (fun e -> print_string e; print_char '\n') + Parser.inline_htmltags_set + else + Omd_utils.StringSet.iter + (fun e -> print_string e; print_char '\n') + Parser.htmltags_set + +let verbatim_start = ref "" +let verbatim_end = ref "" +let lex_with_verb_extension s = + if !verbatim_start = "" || !verbatim_end = "" then + Omd_lexer.lex s + else + begin + let module M = struct + type t = Verb of string | To_lex of string + end in + let open M in + let sl = String.length s + and stl = String.length !verbatim_start + and enl = String.length !verbatim_end in + let rec seek_start accu from i = + if i + stl + enl > sl then + To_lex(String.sub s from (sl - from))::accu + else if String.sub s i stl = !verbatim_start then + seek_end + (To_lex(String.sub s from (i - from))::accu) + (i+stl) + (i+stl) + else seek_start accu from (i+1) + and seek_end accu from i = + if i + enl > sl then + To_lex(String.sub s from (sl - from))::accu + else if String.sub s i enl = !verbatim_end then + seek_start + (Verb(String.sub s from (i - from))::accu) + (i+enl) + (i+enl) + else seek_end accu from (i+1) + in + let first_pass () = seek_start [] 0 0 in + let second_pass l = + List.rev_map + (function + | To_lex x -> + Omd_lexer.lex x + | Verb x -> + [Omd_representation.Tag( + "raw", + object + method parser_extension r p l = + match p with + | [] | [Omd_representation.Newlines _] -> + Some(Raw_block x :: r, [Omd_representation.Space], l) + | _ -> + Some(Raw x :: r, [Omd_representation.Space], l) + method to_string = x + end + )] + ) + l + in + List.flatten(second_pass(first_pass())) + end + + +let main () = + let input = ref [] + and output = ref "" + in + Arg.( + parse + (align[ + "-o", Set_string output, + "file.html Specify the output file (default is stdout)."; + "--", Rest(fun s -> input := s :: !input), + " Consider all remaining arguments as input file names."; + "-u", Clear(omd_gh_uemph_or_bold_style), + " Use standard Markdown style for emph/bold when using `_'."; + "-c", Unit(fun () -> preprocess_functions += remove_endline_comments), + " Ignore lines that start with `!!!' (3 or more exclamation points)."; + "-C", Unit(fun () -> preprocess_functions += remove_comments), + " Ignore everything on a line after `!!!' \ + (3 or more exclamation points)."; + "-m", Set(omarkdown), " Output Markdown instead of HTML."; + "-notags", Set(notags), " Output without the HTML tags."; + "-toc", Set(toc), + " Replace `*Table of contents*' by the table of contents."; + "-otoc", Set(otoc), " Output only the table of contents."; + "-ts", String(fun l -> toc_start := split_comma_int_list l), + "f Section for the Table of contents (default: all)."; + "-td", Set_int(toc_depth), "f Table of contents depth (default is 2)."; + "-H", Set(protect_html_comments), " Protect HTML comments."; + "-r", String(register_code_stylist_of_program), + "l=p Register program p as a code highlighter for language l."; + "-R", String(register_default_language), + "l Registers unknown languages to be l instead of void."; + "-nl2br", Set(nl2br), " Convert new lines to
."; + "-x", String(ignore), + "ext Activate extension ext (not yet implemented)."; + "-l", Unit ignore, + " List available extensions ext (not yet implemented)."; + "-b", Set(omd_blind_html), + " Don't check validity of HTML tag names."; + "-s", Set(omd_strict_html), + " (might not work as expected yet) Block HTML only in block HTML, \ + inline HTML only in inline HTML \ + (semantics undefined if use both -b and -s)."; + "-LHTML", Unit(fun () -> list_html_tags ~inline:false; exit 0), + " List all known HTML tags"; + "-LHTMLi", Unit(fun () -> list_html_tags ~inline:true; exit 0), + " List all known inline HTML tags"; + "-version", Unit(fun () -> print_endline "This is version VERSION."; + exit 0), " Print version."; + "-VS", Set_string(verbatim_start), + "start Set the start token to use to declare a verbatim section. \ + If you use -VE, you must use -VS, and both must be non-empty."; + "-VE", Set_string(verbatim_end), + "end Set the end token to use to declare a verbatim section. \ + If you use -VE, you must use -VS, and both must be non-empty."; + "-w", Set(omd_warning), + " Activate warnings (beta)."; + "-W", Set(omd_warn_error), + " Convert warnings to errors, implies -w (beta)."; + ]) + (fun s -> input := s :: !input) + "omd [options] [inputfile1 .. inputfileN] [options]" + ); + let input_files = + if !input = [] then + [stdin] + else + List.rev_map (open_in) !input + in + let output = + if !output = "" then + stdout + else + open_out_bin !output + in + List.iter (fun ic -> + let b = Buffer.create 64 in + try while true do + Buffer.add_char b (input_char ic) + done; assert false + with End_of_file -> + let lexed = lex_with_verb_extension(Buffer.contents b) in + let preprocessed = preprocess (if !toc then tag_toc lexed else lexed) in + let module E = Omd_parser.Default_env(struct end) in + let module Parser = Omd_parser.Make( + struct + include E + let warning = !omd_warning || !omd_warn_error + let warn_error = !omd_warn_error + let gh_uemph_or_bold_style = !omd_gh_uemph_or_bold_style + let blind_html = !omd_blind_html + let strict_html = !omd_strict_html + end) + in + let parsed1 = Parser.parse preprocessed in + let parsed2 = + if !protect_html_comments then + patch_html_comments parsed1 + else + parsed1 + in + let parsed = parsed2 in + let o1 = (* make either TOC or paragraphs, or leave as it is *) + (if !otoc then Omd.toc ~start:!toc_start ~depth:!toc_depth + else Parser.make_paragraphs) + parsed in + let o2 = (* output either Text or HTML, or markdown *) + if !notags then to_text o1 + else if !omarkdown then to_markdown o1 + else if !toc && not !otoc then + to_html + ~pindent:true ~nl2br:false ~cs:code_stylist#style + (* FIXME: this is a quick fix for -toc which doesn't work + if to_html is directly applied to o1, and that seems to have + something to do with Parser.make_paragraphs, which seems to + prevent tag_toc from working properly when using to_html! + *) + (Parser.make_paragraphs(Parser.parse(Omd_lexer.lex(to_markdown o1)))) + else + to_html + ~pindent:true ~nl2br:false ~cs:code_stylist#style + (* The normal behaviour is to convert directly, like this. *) + o1 + in + output_string output o2; + if o2 <> "" && o2.[String.length o2 - 1] <> '\n' then + output_char output '\n'; + flush output; + if false && Omd_utils.debug then + print_endline + (Omd_backend.sexpr_of_md + (Omd_parser.default_parse + (preprocess(Omd_lexer.lex (Buffer.contents b))))); + ) + input_files + + +(* call the main function *) +let () = + try + main () + with + | Omd_utils.Error msg when not Omd_utils.debug -> + Printf.eprintf "(OMD) Error: %s\n" msg; + exit 1 + | Sys_error msg -> + Printf.eprintf "Error: %s\n" msg; + exit 1 diff --git a/ocaml-lsp-server/src/omd/src/omd_main.mli b/ocaml-lsp-server/src/omd/src/omd_main.mli new file mode 100644 index 000000000..724531dd0 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_main.mli @@ -0,0 +1,75 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +val remove_comments : Omd_representation.tok list -> Omd_representation.tok list +(** [remove_comments l] returns [l] without OMD comments. *) + +val remove_endline_comments : + Omd_representation.tok list -> Omd_representation.tok list +(** [remove_endline_comments l] returns [l] without OMD endline-comments. *) + +val preprocess_functions : + (Omd_representation.tok list -> Omd_representation.tok list) list ref +(** [preprocess_functions] contains the list of preprocessing functions *) + +val preprocess : Omd_representation.tok list -> Omd_representation.tok list +(** [preprocess l] returns [l] to which all preprocessing functions + (in reference [preprocess_functions]) have been applied. *) + +val otoc : bool ref +(** flag: output the table of contents only. *) + +val toc : bool ref +(** flag: replace "*Table of contents*" by the table of contents. *) + +val omarkdown : bool ref +(** flag: output Markdown instead of HTML. *) + +val notags : bool ref +(** flag: output HTML but without HTML tags, so it's not really HTML anymore. *) + +val toc_depth : int ref +(** flag: depth of table of contents *) + +val toc_start : int list ref +(** flag: first header level for table of contents *) + +val nl2br : bool ref +(** flag: convert newlines to "
" when output is HTML *) + +val omd_gh_uemph_or_bold_style : bool ref +(** flag: set on the command line, used for instanciating the + functor Omd_parser.Make *) + +val omd_blind_html : bool ref +(** flag: set on the command line, used for instanciating the + functor Omd_parser.Make *) + +val omd_strict_html : bool ref +(** flag: set on the command line, used for instanciating the + functor Omd_parser.Make *) + +val protect_html_comments : bool ref +(** flag: for multiple dashes in HTML comments, replace dashes by - *) + +val patch_html_comments : Omd.element list -> Omd.element list +(** [patch_html_comments l] returns the list [l] where + all [Html_comments s] have been converted to [Html_comments s'], + where [s'] means [s] with dashes replaced by - except for + single dashes (which are left untouched). + + N.B. It seems that it's not valid to have double dashes inside HTML comments + (cf. http://validator.w3.org/check). So one way to make life somewhat easier + is to patch the comments and transform inner dashed to -. *) + +val tag_toc : Omd_representation.tok list -> Omd_representation.tok list +(** [tag_toc l] returns [l] where *Table of contents* has been replaced + by a tag that can generate a table of contents. *) + +val main : unit -> unit +(** main function *) + diff --git a/ocaml-lsp-server/src/omd/src/omd_parser.ml b/ocaml-lsp-server/src/omd/src/omd_parser.ml new file mode 100644 index 000000000..a4f985fc0 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_parser.ml @@ -0,0 +1,4459 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013-2014 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +let sdebug = true + +open Printf +open Omd_representation +open Omd_utils +module L = Omd_lexer + +type r = Omd_representation.t +(** accumulator (beware, reversed tokens) *) + +and p = Omd_representation.tok list +(** context information: previous elements *) + +and l = Omd_representation.tok list +(** tokens to parse *) + +and main_loop = + ?html:bool -> + r -> (* accumulator (beware, reversed tokens) *) + p -> (* info: previous elements *) + l -> (* tokens to parse *) + Omd_representation.t (* final result *) +(** most important loop *) + + +(** N.B. Please do not use tabulations in your Markdown file! *) + +module type Env = sig + val rc: Omd_representation.ref_container + val extensions : Omd_representation.extensions + val default_lang : string + val gh_uemph_or_bold_style : bool + val blind_html : bool + val strict_html : bool + val warning : bool + val warn_error : bool +end + +module Unit = struct end + +module Default_env (Unit:sig end) : Env = struct + let rc = new Omd_representation.ref_container + let extensions = [] + let default_lang = "" + let gh_uemph_or_bold_style = true + let blind_html = false + let strict_html = false + let warning = false + let warn_error = false +end + +module Make (Env:Env) = +struct + include Env + + let warn = Omd_utils.warn ~we:warn_error + + (** set of known HTML codes *) + let htmlcodes_set = StringSet.of_list (* This list should be checked... *) + (* list extracted from: http://www.w3.org/TR/html4/charset.html *) + [ "AElig"; "Aacute"; "Acirc"; "Agrave"; "Alpha"; "Aring"; "Atilde"; + "Auml"; "Beta"; "Ccedil"; "Chi"; "Dagger"; "Delta"; "ETH"; "Eacute"; + "Ecirc"; "Egrave"; "Epsilon"; "Eta"; "Euml"; "Gamma"; "Iacute"; + "Icirc"; "Igrave"; "Iota"; "Iuml"; "Kappa"; "Lambda"; "Mu"; "Ntilde"; + "Nu"; "OElig"; "Oacute"; "Ocirc"; "Ograve"; "Omega"; "Omicron"; + "Oslash"; "Otilde"; "Ouml"; "Phi"; "Pi"; "Prime"; "Psi"; "Rho"; + "Scaron"; "Sigma"; "THORN"; "Tau"; "Theta"; "Uacute"; "Ucirc"; + "Ugrave"; "Upsilon"; "Uuml"; "Xi"; "Yacute"; "Yuml"; "Zeta"; "aacute"; + "acirc"; "acute"; "aelig"; "agrave"; "alefsym"; "alpha"; "amp"; "and"; + "ang"; "aring"; "asymp"; "atilde"; "auml"; "bdquo"; "beta"; "brvbar"; + "bull"; "cap"; "ccedil"; "cedil"; "cent"; "chi"; "circ"; "clubs"; + "cong"; "copy"; "crarr"; "cup"; "curren"; "dArr"; "dagger"; "darr"; + "deg"; "delta"; "diams"; "divide"; "eacute"; "ecirc"; "egrave"; + "empty"; "emsp"; "ensp"; "epsilon"; "equiv"; "eta"; "eth"; "euml"; + "euro"; "exist"; "fnof"; "forall"; "frac12"; "frac14"; "frac34"; + "frasl"; "gamma"; "ge"; "gt"; "hArr"; "harr"; "hearts"; "hellip"; + "iacute"; "icirc"; "iexcl"; "igrave"; "image"; "infin"; "int"; "iota"; + "iquest"; "isin"; "iuml"; "kappa"; "lArr"; "lambda"; "lang"; "laquo"; + "larr"; "lceil"; "ldquo"; "le"; "lfloor"; "lowast"; "loz"; "lrm"; + "lsaquo"; "lsquo"; "lt"; "macr"; "mdash"; "micro"; "middot"; "minus"; + "mu"; "nabla"; "nbsp"; "ndash"; "ne"; "ni"; "not"; "notin"; "nsub"; + "ntilde"; "nu"; "oacute"; "ocirc"; "oelig"; "ograve"; "oline"; + "omega"; "omicron"; "oplus"; "or"; "ordf"; "ordm"; "oslash"; "otilde"; + "otimes"; "ouml"; "para"; "part"; "permil"; "perp"; "phi"; "pi"; + "piv"; "plusmn"; "pound"; "prime"; "prod"; "prop"; "psi"; "quot"; + "rArr"; "radic"; "rang"; "raquo"; "rarr"; "rceil"; "rdquo"; "real"; + "reg"; "rfloor"; "rho"; "rlm"; "rsaquo"; "rsquo"; "sbquo"; "scaron"; + "sdot"; "sect"; "shy"; "sigma"; "sigmaf"; "sim"; "spades"; "sub"; + "sube"; "sum"; "sup"; "sup1"; "sup2"; "sup3"; "supe"; "szlig"; "tau"; + "there4"; "theta"; "thetasym"; "thinsp"; "thorn"; "tilde"; "times"; + "trade"; "uArr"; "uacute"; "uarr"; "ucirc"; "ugrave"; "uml"; "upsih"; + "upsilon"; "uuml"; "weierp"; "xi"; "yacute"; "yen"; "yuml"; "zeta"; + "zwj"; "zwnj"; ] + + + (** set of known inline HTML tags *) + let inline_htmltags_set = + (StringSet.of_list + (* from https://developer.mozilla.org/en-US/docs/HTML/Inline_elements *) + [ "b";"big";"i";"small";"tt"; + "abbr";"acronym";"cite";"code";"dfn";"em";"kbd";"strong";"samp";"var"; + "a";"bdo";"br";"img";"map";"object";"q";"span";"sub";"sup"; + "button";"input";"label";"select";"textarea";]) + + (** N.B. it seems that there is no clear distinction between inline + tags and block-level tags: in HTML4 it was not clear, in HTML5 + it's even more complicated. So, the choice *here* is to specify + a set of tags considered as "inline", cf. [inline_htmltags_set]. + So there will be inline tags, non-inline tags, and unknown + tags.*) + + (** set of HTML tags that may appear out of a body *) + let notinbodytags = StringSet.of_list + [ + "title"; + "link"; + "meta"; + "style"; + "html"; + "head"; + "body"; + ] + + (** All known HTML tags *) + let htmltags_set = + StringSet.union notinbodytags + (StringSet.union inline_htmltags_set + (StringSet.of_list + [ + "a";"abbr";"acronym";"address";"applet";"area";"article";"aside" + ;"audio";"b";"base";"basefont";"bdi";"bdo";"big";"blockquote" + ;"br";"button";"canvas";"caption";"center";"cite";"code";"col" + ;"colgroup";"command";"datalist";"dd";"del";"details";"dfn" + ;"dialog";"dir";"div";"dl";"dt";"em";"embed";"fieldset" + ;"figcaption";"figure";"font";"footer";"form";"frame";"frameset" + ;"h2";"h3";"h4";"h5";"h6" + ;"h1";"header";"hr";"i";"iframe";"img";"input";"ins";"kbd" + ;"keygen";"label";"legend";"li";"map";"mark";"menu";"meter";"nav" + ;"noframes";"noscript";"object";"ol";"optgroup";"option";"output" + ;"p";"param";"pre";"progress";"q";"rp";"rt";"ruby";"s";"samp" + ;"script";"section";"select";"small";"source";"span";"strike" + ;"strong";"style";"sub";"summary";"sup";"table";"tbody";"td" + ;"textarea";"tfoot";"th";"thead";"time";"tr";"track";"tt";"u" + ;"ul";"var";"video";"wbr" + ])) + + + (** This functions fixes bad lexing trees, which may be built when + extraction a portion of another lexing tree. *) + let fix l = + let rec loop accu = function + (* code to generate what follows... + List.iter (fun e -> + Printf.printf " + | %s::%s::tl -> + if trackfix then eprintf \"%s 1\\n%!\"; + loop accu (%ss 0::tl) + | %ss n::%s::tl -> + if trackfix then eprintf \"%s 2\\n%!\"; + loop accu (%ss(n+1)::tl) + | %s::%ss n::tl -> + if trackfix then eprintf \"%s 3\\n%!\"; + loop accu (%ss(n+1)::tl) + | %ss a::%ss b::tl -> + if trackfix then eprintf \"%s 4\\n%!\"; + loop accu (%ss(a+b+2)::tl)" + e e e e e e e e e e e e e e e e) + ["Ampersand"; "At"; "Backquote"; "Backslash"; "Bar"; "Caret"; "Cbrace"; "Colon"; "Comma"; "Cparenthesis"; "Cbracket"; "Dollar"; "Dot"; "Doublequote"; "Exclamation"; "Equal"; "Greaterthan"; "Hash"; "Lessthan"; "Minus"; "Newline"; "Obrace"; "Oparenthesis"; "Obracket"; "Percent"; "Plus"; "Question"; "Quote"; "Semicolon"; "Slash"; "Space"; "Star"; "Tab"; "Tilde"; "Underscore"]; + print_string "| x::tl -> loop (x::accu) tl\n| [] -> List.rev accu\n"; *) + | Ampersand::Ampersand::tl -> + if trackfix then eprintf "(OMD) Ampersand 1\n"; + loop accu (Ampersands 0::tl) + | Ampersands n::Ampersand::tl -> + if trackfix then eprintf "(OMD) Ampersand 2\n"; + loop accu (Ampersands(n+1)::tl) + | Ampersand::Ampersands n::tl -> + if trackfix then eprintf "(OMD) Ampersand 3\n"; + loop accu (Ampersands(n+1)::tl) + | Ampersands a::Ampersands b::tl -> + if trackfix then eprintf "(OMD) Ampersand 4\n"; + loop accu (Ampersands(a+b+2)::tl) + | At::At::tl -> + if trackfix then eprintf "(OMD) At 1\n"; + loop accu (Ats 0::tl) + | Ats n::At::tl -> + if trackfix then eprintf "(OMD) At 2\n"; + loop accu (Ats(n+1)::tl) + | At::Ats n::tl -> + if trackfix then eprintf "(OMD) At 3\n"; + loop accu (Ats(n+1)::tl) + | Ats a::Ats b::tl -> + if trackfix then eprintf "(OMD) At 4\n"; + loop accu (Ats(a+b+2)::tl) + | Backquote::Backquote::tl -> + if trackfix then eprintf "(OMD) Backquote 1\n"; + loop accu (Backquotes 0::tl) + | Backquotes n::Backquote::tl -> + if trackfix then eprintf "(OMD) Backquote 2\n"; + loop accu (Backquotes(n+1)::tl) + | Backquote::Backquotes n::tl -> + if trackfix then eprintf "(OMD) Backquote 3\n"; + loop accu (Backquotes(n+1)::tl) + | Backquotes a::Backquotes b::tl -> + if trackfix then eprintf "(OMD) Backquote 4\n"; + loop accu (Backquotes(a+b+2)::tl) + | Backslash::Backslash::tl -> + if trackfix then eprintf "(OMD) Backslash 1\n"; + loop accu (Backslashs 0::tl) + | Backslashs n::Backslash::tl -> + if trackfix then eprintf "(OMD) Backslash 2\n"; + loop accu (Backslashs(n+1)::tl) + | Backslash::Backslashs n::tl -> + if trackfix then eprintf "(OMD) Backslash 3\n"; + loop accu (Backslashs(n+1)::tl) + | Backslashs a::Backslashs b::tl -> + if trackfix then eprintf "(OMD) Backslash 4\n"; + loop accu (Backslashs(a+b+2)::tl) + | Bar::Bar::tl -> + if trackfix then eprintf "(OMD) Bar 1\n"; + loop accu (Bars 0::tl) + | Bars n::Bar::tl -> + if trackfix then eprintf "(OMD) Bar 2\n"; + loop accu (Bars(n+1)::tl) + | Bar::Bars n::tl -> + if trackfix then eprintf "(OMD) Bar 3\n"; + loop accu (Bars(n+1)::tl) + | Bars a::Bars b::tl -> + if trackfix then eprintf "(OMD) Bar 4\n"; + loop accu (Bars(a+b+2)::tl) + | Caret::Caret::tl -> + if trackfix then eprintf "(OMD) Caret 1\n"; + loop accu (Carets 0::tl) + | Carets n::Caret::tl -> + if trackfix then eprintf "(OMD) Caret 2\n"; + loop accu (Carets(n+1)::tl) + | Caret::Carets n::tl -> + if trackfix then eprintf "(OMD) Caret 3\n"; + loop accu (Carets(n+1)::tl) + | Carets a::Carets b::tl -> + if trackfix then eprintf "(OMD) Caret 4\n"; + loop accu (Carets(a+b+2)::tl) + | Cbrace::Cbrace::tl -> + if trackfix then eprintf "(OMD) Cbrace 1\n"; + loop accu (Cbraces 0::tl) + | Cbraces n::Cbrace::tl -> + if trackfix then eprintf "(OMD) Cbrace 2\n"; + loop accu (Cbraces(n+1)::tl) + | Cbrace::Cbraces n::tl -> + if trackfix then eprintf "(OMD) Cbrace 3\n"; + loop accu (Cbraces(n+1)::tl) + | Cbraces a::Cbraces b::tl -> + if trackfix then eprintf "(OMD) Cbrace 4\n"; + loop accu (Cbraces(a+b+2)::tl) + | Colon::Colon::tl -> + if trackfix then eprintf "(OMD) Colon 1\n"; + loop accu (Colons 0::tl) + | Colons n::Colon::tl -> + if trackfix then eprintf "(OMD) Colon 2\n"; + loop accu (Colons(n+1)::tl) + | Colon::Colons n::tl -> + if trackfix then eprintf "(OMD) Colon 3\n"; + loop accu (Colons(n+1)::tl) + | Colons a::Colons b::tl -> + if trackfix then eprintf "(OMD) Colon 4\n"; + loop accu (Colons(a+b+2)::tl) + | Comma::Comma::tl -> + if trackfix then eprintf "(OMD) Comma 1\n"; + loop accu (Commas 0::tl) + | Commas n::Comma::tl -> + if trackfix then eprintf "(OMD) Comma 2\n"; + loop accu (Commas(n+1)::tl) + | Comma::Commas n::tl -> + if trackfix then eprintf "(OMD) Comma 3\n"; + loop accu (Commas(n+1)::tl) + | Commas a::Commas b::tl -> + if trackfix then eprintf "(OMD) Comma 4\n"; + loop accu (Commas(a+b+2)::tl) + | Cparenthesis::Cparenthesis::tl -> + if trackfix then eprintf "(OMD) Cparenthesis 1\n"; + loop accu (Cparenthesiss 0::tl) + | Cparenthesiss n::Cparenthesis::tl -> + if trackfix then eprintf "(OMD) Cparenthesis 2\n"; + loop accu (Cparenthesiss(n+1)::tl) + | Cparenthesis::Cparenthesiss n::tl -> + if trackfix then eprintf "(OMD) Cparenthesis 3\n"; + loop accu (Cparenthesiss(n+1)::tl) + | Cparenthesiss a::Cparenthesiss b::tl -> + if trackfix then eprintf "(OMD) Cparenthesis 4\n"; + loop accu (Cparenthesiss(a+b+2)::tl) + | Cbracket::Cbracket::tl -> + if trackfix then eprintf "(OMD) Cbracket 1\n"; + loop accu (Cbrackets 0::tl) + | Cbrackets n::Cbracket::tl -> + if trackfix then eprintf "(OMD) Cbracket 2\n"; + loop accu (Cbrackets(n+1)::tl) + | Cbracket::Cbrackets n::tl -> + if trackfix then eprintf "(OMD) Cbracket 3\n"; + loop accu (Cbrackets(n+1)::tl) + | Cbrackets a::Cbrackets b::tl -> + if trackfix then eprintf "(OMD) Cbracket 4\n"; + loop accu (Cbrackets(a+b+2)::tl) + | Dollar::Dollar::tl -> + if trackfix then eprintf "(OMD) Dollar 1\n"; + loop accu (Dollars 0::tl) + | Dollars n::Dollar::tl -> + if trackfix then eprintf "(OMD) Dollar 2\n"; + loop accu (Dollars(n+1)::tl) + | Dollar::Dollars n::tl -> + if trackfix then eprintf "(OMD) Dollar 3\n"; + loop accu (Dollars(n+1)::tl) + | Dollars a::Dollars b::tl -> + if trackfix then eprintf "(OMD) Dollar 4\n"; + loop accu (Dollars(a+b+2)::tl) + | Dot::Dot::tl -> + if trackfix then eprintf "(OMD) Dot 1\n"; + loop accu (Dots 0::tl) + | Dots n::Dot::tl -> + if trackfix then eprintf "(OMD) Dot 2\n"; + loop accu (Dots(n+1)::tl) + | Dot::Dots n::tl -> + if trackfix then eprintf "(OMD) Dot 3\n"; + loop accu (Dots(n+1)::tl) + | Dots a::Dots b::tl -> + if trackfix then eprintf "(OMD) Dot 4\n"; + loop accu (Dots(a+b+2)::tl) + | Doublequote::Doublequote::tl -> + if trackfix then eprintf "(OMD) Doublequote 1\n"; + loop accu (Doublequotes 0::tl) + | Doublequotes n::Doublequote::tl -> + if trackfix then eprintf "(OMD) Doublequote 2\n"; + loop accu (Doublequotes(n+1)::tl) + | Doublequote::Doublequotes n::tl -> + if trackfix then eprintf "(OMD) Doublequote 3\n"; + loop accu (Doublequotes(n+1)::tl) + | Doublequotes a::Doublequotes b::tl -> + if trackfix then eprintf "(OMD) Doublequote 4\n"; + loop accu (Doublequotes(a+b+2)::tl) + | Exclamation::Exclamation::tl -> + if trackfix then eprintf "(OMD) Exclamation 1\n"; + loop accu (Exclamations 0::tl) + | Exclamations n::Exclamation::tl -> + if trackfix then eprintf "(OMD) Exclamation 2\n"; + loop accu (Exclamations(n+1)::tl) + | Exclamation::Exclamations n::tl -> + if trackfix then eprintf "(OMD) Exclamation 3\n"; + loop accu (Exclamations(n+1)::tl) + | Exclamations a::Exclamations b::tl -> + if trackfix then eprintf "(OMD) Exclamation 4\n"; + loop accu (Exclamations(a+b+2)::tl) + | Equal::Equal::tl -> + if trackfix then eprintf "(OMD) Equal 1\n"; + loop accu (Equals 0::tl) + | Equals n::Equal::tl -> + if trackfix then eprintf "(OMD) Equal 2\n"; + loop accu (Equals(n+1)::tl) + | Equal::Equals n::tl -> + if trackfix then eprintf "(OMD) Equal 3\n"; + loop accu (Equals(n+1)::tl) + | Equals a::Equals b::tl -> + if trackfix then eprintf "(OMD) Equal 4\n"; + loop accu (Equals(a+b+2)::tl) + | Greaterthan::Greaterthan::tl -> + if trackfix then eprintf "(OMD) Greaterthan 1\n"; + loop accu (Greaterthans 0::tl) + | Greaterthans n::Greaterthan::tl -> + if trackfix then eprintf "(OMD) Greaterthan 2\n"; + loop accu (Greaterthans(n+1)::tl) + | Greaterthan::Greaterthans n::tl -> + if trackfix then eprintf "(OMD) Greaterthan 3\n"; + loop accu (Greaterthans(n+1)::tl) + | Greaterthans a::Greaterthans b::tl -> + if trackfix then eprintf "(OMD) Greaterthan 4\n"; + loop accu (Greaterthans(a+b+2)::tl) + | Hash::Hash::tl -> + if trackfix then eprintf "(OMD) Hash 1\n"; + loop accu (Hashs 0::tl) + | Hashs n::Hash::tl -> + if trackfix then eprintf "(OMD) Hash 2\n"; + loop accu (Hashs(n+1)::tl) + | Hash::Hashs n::tl -> + if trackfix then eprintf "(OMD) Hash 3\n"; + loop accu (Hashs(n+1)::tl) + | Hashs a::Hashs b::tl -> + if trackfix then eprintf "(OMD) Hash 4\n"; + loop accu (Hashs(a+b+2)::tl) + | Lessthan::Lessthan::tl -> + if trackfix then eprintf "(OMD) Lessthan 1\n"; + loop accu (Lessthans 0::tl) + | Lessthans n::Lessthan::tl -> + if trackfix then eprintf "(OMD) Lessthan 2\n"; + loop accu (Lessthans(n+1)::tl) + | Lessthan::Lessthans n::tl -> + if trackfix then eprintf "(OMD) Lessthan 3\n"; + loop accu (Lessthans(n+1)::tl) + | Lessthans a::Lessthans b::tl -> + if trackfix then eprintf "(OMD) Lessthan 4\n"; + loop accu (Lessthans(a+b+2)::tl) + | Minus::Minus::tl -> + if trackfix then eprintf "(OMD) Minus 1\n"; + loop accu (Minuss 0::tl) + | Minuss n::Minus::tl -> + if trackfix then eprintf "(OMD) Minus 2\n"; + loop accu (Minuss(n+1)::tl) + | Minus::Minuss n::tl -> + if trackfix then eprintf "(OMD) Minus 3\n"; + loop accu (Minuss(n+1)::tl) + | Minuss a::Minuss b::tl -> + if trackfix then eprintf "(OMD) Minus 4\n"; + loop accu (Minuss(a+b+2)::tl) + | Newline::Newline::tl -> + if trackfix then eprintf "(OMD) Newline 1\n"; + loop accu (Newlines 0::tl) + | Newlines n::Newline::tl -> + if trackfix then eprintf "(OMD) Newline 2\n"; + loop accu (Newlines(n+1)::tl) + | Newline::Newlines n::tl -> + if trackfix then eprintf "(OMD) Newline 3\n"; + loop accu (Newlines(n+1)::tl) + | Newlines a::Newlines b::tl -> + if trackfix then eprintf "(OMD) Newline 4\n"; + loop accu (Newlines(a+b+2)::tl) + | Obrace::Obrace::tl -> + if trackfix then eprintf "(OMD) Obrace 1\n"; + loop accu (Obraces 0::tl) + | Obraces n::Obrace::tl -> + if trackfix then eprintf "(OMD) Obrace 2\n"; + loop accu (Obraces(n+1)::tl) + | Obrace::Obraces n::tl -> + if trackfix then eprintf "(OMD) Obrace 3\n"; + loop accu (Obraces(n+1)::tl) + | Obraces a::Obraces b::tl -> + if trackfix then eprintf "(OMD) Obrace 4\n"; + loop accu (Obraces(a+b+2)::tl) + | Oparenthesis::Oparenthesis::tl -> + if trackfix then eprintf "(OMD) Oparenthesis 1\n"; + loop accu (Oparenthesiss 0::tl) + | Oparenthesiss n::Oparenthesis::tl -> + if trackfix then eprintf "(OMD) Oparenthesis 2\n"; + loop accu (Oparenthesiss(n+1)::tl) + | Oparenthesis::Oparenthesiss n::tl -> + if trackfix then eprintf "(OMD) Oparenthesis 3\n"; + loop accu (Oparenthesiss(n+1)::tl) + | Oparenthesiss a::Oparenthesiss b::tl -> + if trackfix then eprintf "(OMD) Oparenthesis 4\n"; + loop accu (Oparenthesiss(a+b+2)::tl) + | Obracket::Obracket::tl -> + if trackfix then eprintf "(OMD) Obracket 1\n"; + loop accu (Obrackets 0::tl) + | Obrackets n::Obracket::tl -> + if trackfix then eprintf "(OMD) Obracket 2\n"; + loop accu (Obrackets(n+1)::tl) + | Obracket::Obrackets n::tl -> + if trackfix then eprintf "(OMD) Obracket 3\n"; + loop accu (Obrackets(n+1)::tl) + | Obrackets a::Obrackets b::tl -> + if trackfix then eprintf "(OMD) Obracket 4\n"; + loop accu (Obrackets(a+b+2)::tl) + | Percent::Percent::tl -> + if trackfix then eprintf "(OMD) Percent 1\n"; + loop accu (Percents 0::tl) + | Percents n::Percent::tl -> + if trackfix then eprintf "(OMD) Percent 2\n"; + loop accu (Percents(n+1)::tl) + | Percent::Percents n::tl -> + if trackfix then eprintf "(OMD) Percent 3\n"; + loop accu (Percents(n+1)::tl) + | Percents a::Percents b::tl -> + if trackfix then eprintf "(OMD) Percent 4\n"; + loop accu (Percents(a+b+2)::tl) + | Plus::Plus::tl -> + if trackfix then eprintf "(OMD) Plus 1\n"; + loop accu (Pluss 0::tl) + | Pluss n::Plus::tl -> + if trackfix then eprintf "(OMD) Plus 2\n"; + loop accu (Pluss(n+1)::tl) + | Plus::Pluss n::tl -> + if trackfix then eprintf "(OMD) Plus 3\n"; + loop accu (Pluss(n+1)::tl) + | Pluss a::Pluss b::tl -> + if trackfix then eprintf "(OMD) Plus 4\n"; + loop accu (Pluss(a+b+2)::tl) + | Question::Question::tl -> + if trackfix then eprintf "(OMD) Question 1\n"; + loop accu (Questions 0::tl) + | Questions n::Question::tl -> + if trackfix then eprintf "(OMD) Question 2\n"; + loop accu (Questions(n+1)::tl) + | Question::Questions n::tl -> + if trackfix then eprintf "(OMD) Question 3\n"; + loop accu (Questions(n+1)::tl) + | Questions a::Questions b::tl -> + if trackfix then eprintf "(OMD) Question 4\n"; + loop accu (Questions(a+b+2)::tl) + | Quote::Quote::tl -> + if trackfix then eprintf "(OMD) Quote 1\n"; + loop accu (Quotes 0::tl) + | Quotes n::Quote::tl -> + if trackfix then eprintf "(OMD) Quote 2\n"; + loop accu (Quotes(n+1)::tl) + | Quote::Quotes n::tl -> + if trackfix then eprintf "(OMD) Quote 3\n"; + loop accu (Quotes(n+1)::tl) + | Quotes a::Quotes b::tl -> + if trackfix then eprintf "(OMD) Quote 4\n"; + loop accu (Quotes(a+b+2)::tl) + | Semicolon::Semicolon::tl -> + if trackfix then eprintf "(OMD) Semicolon 1\n"; + loop accu (Semicolons 0::tl) + | Semicolons n::Semicolon::tl -> + if trackfix then eprintf "(OMD) Semicolon 2\n"; + loop accu (Semicolons(n+1)::tl) + | Semicolon::Semicolons n::tl -> + if trackfix then eprintf "(OMD) Semicolon 3\n"; + loop accu (Semicolons(n+1)::tl) + | Semicolons a::Semicolons b::tl -> + if trackfix then eprintf "(OMD) Semicolon 4\n"; + loop accu (Semicolons(a+b+2)::tl) + | Slash::Slash::tl -> + if trackfix then eprintf "(OMD) Slash 1\n"; + loop accu (Slashs 0::tl) + | Slashs n::Slash::tl -> + if trackfix then eprintf "(OMD) Slash 2\n"; + loop accu (Slashs(n+1)::tl) + | Slash::Slashs n::tl -> + if trackfix then eprintf "(OMD) Slash 3\n"; + loop accu (Slashs(n+1)::tl) + | Slashs a::Slashs b::tl -> + if trackfix then eprintf "(OMD) Slash 4\n"; + loop accu (Slashs(a+b+2)::tl) + | Space::Space::tl -> + if trackfix then eprintf "(OMD) Space 1\n"; + loop accu (Spaces 0::tl) + | Spaces n::Space::tl -> + if trackfix then eprintf "(OMD) Space 2\n"; + loop accu (Spaces(n+1)::tl) + | Space::Spaces n::tl -> + if trackfix then eprintf "(OMD) Space 3\n"; + loop accu (Spaces(n+1)::tl) + | Spaces a::Spaces b::tl -> + if trackfix then eprintf "(OMD) Space 4\n"; + loop accu (Spaces(a+b+2)::tl) + | Star::Star::tl -> + if trackfix then eprintf "(OMD) Star 1\n"; + loop accu (Stars 0::tl) + | Stars n::Star::tl -> + if trackfix then eprintf "(OMD) Star 2\n"; + loop accu (Stars(n+1)::tl) + | Star::Stars n::tl -> + if trackfix then eprintf "(OMD) Star 3\n"; + loop accu (Stars(n+1)::tl) + | Stars a::Stars b::tl -> + if trackfix then eprintf "(OMD) Star 4\n"; + loop accu (Stars(a+b+2)::tl) + | Tab::Tab::tl -> + if trackfix then eprintf "(OMD) Tab 1\n"; + loop accu (Tabs 0::tl) + | Tabs n::Tab::tl -> + if trackfix then eprintf "(OMD) Tab 2\n"; + loop accu (Tabs(n+1)::tl) + | Tab::Tabs n::tl -> + if trackfix then eprintf "(OMD) Tab 3\n"; + loop accu (Tabs(n+1)::tl) + | Tabs a::Tabs b::tl -> + if trackfix then eprintf "(OMD) Tab 4\n"; + loop accu (Tabs(a+b+2)::tl) + | Tilde::Tilde::tl -> + if trackfix then eprintf "(OMD) Tilde 1\n"; + loop accu (Tildes 0::tl) + | Tildes n::Tilde::tl -> + if trackfix then eprintf "(OMD) Tilde 2\n"; + loop accu (Tildes(n+1)::tl) + | Tilde::Tildes n::tl -> + if trackfix then eprintf "(OMD) Tilde 3\n"; + loop accu (Tildes(n+1)::tl) + | Tildes a::Tildes b::tl -> + if trackfix then eprintf "(OMD) Tilde 4\n"; + loop accu (Tildes(a+b+2)::tl) + | Underscore::Underscore::tl -> + if trackfix then eprintf "(OMD) Underscore 1\n"; + loop accu (Underscores 0::tl) + | Underscores n::Underscore::tl -> + if trackfix then eprintf "(OMD) Underscore 2\n"; + loop accu (Underscores(n+1)::tl) + | Underscore::Underscores n::tl -> + if trackfix then eprintf "(OMD) Underscore 3\n"; + loop accu (Underscores(n+1)::tl) + | Underscores a::Underscores b::tl -> + if trackfix then eprintf "(OMD) Underscore 4\n"; + loop accu (Underscores(a+b+2)::tl)| x::tl -> loop (x::accu) tl + | [] -> List.rev accu + in + loop [] l + + + (* Remove all [NL] and [Br] at the beginning. *) + let rec remove_initial_newlines = function + | [] -> [] + | (NL | Br) :: tl -> remove_initial_newlines tl + | l -> l + + + (** - recognizes paragraphs + - glues following blockquotes *) + let make_paragraphs md = + let rec loop cp accu = function (* cp means current paragraph *) + | [] -> + let accu = + match cp with + | [] | [NL] | [Br] -> accu + | (NL|Br)::cp -> Paragraph(List.rev cp)::accu + | cp -> Paragraph(List.rev cp)::accu + in + List.rev accu + | Blockquote b1 :: Blockquote b2 :: tl -> + loop cp accu (Blockquote(b1@b2):: tl) + | Blockquote b :: tl -> + let e = Blockquote(loop [] [] b) in + (match cp with + | [] | [NL] | [Br] -> loop cp (e::accu) tl + | _ -> loop [] (e::Paragraph(List.rev cp)::accu) tl) + | (Ulp b) :: tl -> + let e = Ulp(List.map (fun li -> loop [] [] li) b) in + (match cp with + | [] | [NL] | [Br] -> loop cp (e::accu) tl + | _ -> loop [] (e::Paragraph(List.rev cp)::accu) tl) + | (Olp b) :: tl -> + let e = Olp(List.map (fun li -> loop [] [] li) b) in + (match cp with + | [] | [NL] | [Br] -> loop cp (e::accu) tl + | _ -> loop [] (e::Paragraph(List.rev cp)::accu) tl) + | Html_comment _ as e :: tl -> + (match cp with + | [] -> loop [] (e::accu) tl + | [NL] | [Br] -> loop [] (e::NL::accu) tl + | _ -> loop (e::cp) accu tl) + | (Raw_block _ | Html_block _) as e :: tl -> + (match cp with + | [] | [NL] | [Br] -> loop cp (e::cp@accu) tl + | _ -> loop [] (e::Paragraph(List.rev cp)::accu) tl) + | (Code_block _ | H1 _ | H2 _ | H3 _ | H4 _ | H5 _ | H6 _ + | Ol _ | Ul _) as e :: tl -> + (match cp with + | [] | [NL] | [Br] -> loop cp (e::accu) tl + | _ -> loop [] (e::Paragraph(List.rev cp)::accu) tl) + | Text "\n" :: _ | Paragraph _ :: _ -> + invalid_arg "Omd_parser.make_paragraphs" + | (NL|Br) :: (NL|Br) :: tl -> + let tl = remove_initial_newlines tl in + begin match cp with + | [] | [NL] | [Br] -> loop [] (NL::NL::accu) tl + | _ -> loop [] (Paragraph(List.rev cp)::accu) tl + end + | X(x) as e :: tl -> + (* If the extension returns a block as first element, + then consider the extension as a block. However + don't take its contents as it is yet, the contents + of the extension shall be considered final as late + as possible. *) + begin match x#to_t md with + | None -> loop (e::cp) accu tl + | Some(t) -> + match t with + | ( H1 _ + | H2 _ + | H3 _ + | H4 _ + | H5 _ + | H6 _ + | Paragraph _ + | Ul _ + | Ol _ + | Ulp _ + | Olp _ + | Code_block _ + | Hr + | Html_block _ + | Raw_block _ + | Blockquote _ + ) :: _ + -> + (match cp with + | [] | [NL] | [Br] -> + loop cp (e::accu) tl + | _ -> + loop [] (e::Paragraph(List.rev cp)::accu) tl) + | _ -> + loop (e::cp) accu tl + end + | e::tl -> + loop (e::cp) accu tl + in + let remove_white_crumbs l = + let rec loop = function + | [] -> [] + | Text " " :: tl + | NL::tl + | Br::tl + -> + loop tl + | l -> l + in + List.rev (loop (List.rev l)) + in + let rec clean_paragraphs = + if debug then eprintf "(OMD) clean_paragraphs\n"; + function + | [] -> [] + | Paragraph[]::tl -> tl + | Paragraph(p) :: tl -> + Paragraph(clean_paragraphs + (remove_initial_newlines + (remove_white_crumbs(normalise_md p)))) + :: clean_paragraphs tl + | H1 v :: tl -> H1(clean_paragraphs v) + :: clean_paragraphs tl + | H2 v :: tl -> H2(clean_paragraphs v) + :: clean_paragraphs tl + | H3 v :: tl -> H3(clean_paragraphs v) + :: clean_paragraphs tl + | H4 v :: tl -> H4(clean_paragraphs v) + :: clean_paragraphs tl + | H5 v :: tl -> H5(clean_paragraphs v) + :: clean_paragraphs tl + | H6 v :: tl -> H6(clean_paragraphs v) + :: clean_paragraphs tl + | Emph v :: tl -> Emph(clean_paragraphs v) + :: clean_paragraphs tl + | Bold v :: tl -> Bold(clean_paragraphs v) + :: clean_paragraphs tl + | Ul v :: tl -> Ul(List.map clean_paragraphs v) + :: clean_paragraphs tl + | Ol v :: tl -> Ol(List.map clean_paragraphs v) + :: clean_paragraphs tl + | Ulp v :: tl -> Ulp(List.map clean_paragraphs v) + :: clean_paragraphs tl + | Olp v :: tl -> Olp(List.map clean_paragraphs v) + :: clean_paragraphs tl + | Blockquote v :: tl -> Blockquote(clean_paragraphs v) + :: clean_paragraphs tl + | Url(href,v,title) :: tl -> Url(href,(clean_paragraphs v),title) + :: clean_paragraphs tl + | Text _ + | Code _ + | Code_block _ + | Br + | Hr + | NL + | Ref _ + | Img_ref _ + | Raw _ + | Raw_block _ + | Html _ + | Html_block _ + | Html_comment _ + | Img _ + | X _ as v :: tl -> v :: clean_paragraphs tl + in + let r = clean_paragraphs(loop [] [] md) + in + if debug then eprintf "(OMD) clean_paragraphs %S --> %S\n%!" + (Omd_backend.sexpr_of_md md) + (Omd_backend.sexpr_of_md r); + r + + + (** [assert_well_formed] is a developer's function that helps to + track badly constructed token lists. This function has an + effect only if [trackfix] is [true]. *) + let assert_well_formed (l:tok list) : unit = + if trackfix then + let rec equiv l1 l2 = match l1, l2 with + | [], [] -> true + | Tag _::tl1, Tag _::tl2-> equiv tl1 tl2 + | e1::tl1, e2::tl2 -> e1 = e2 && equiv tl1 tl2 + | _ -> false + in + assert(equiv (fix l) l); + () + + (** Generate fallback for references. *) + let extract_fallback main_loop remains l = + if debug then eprintf "(OMD) Omd_parser.extract_fallback\n%!"; + let rec loop accu = function + | [] -> List.rev accu + | e::tl as r -> + if r == remains then + List.rev accu + else + match e, remains with + | Cbrackets 0, Cbracket::r when tl = r -> + let accu = Word "]" :: accu in + List.rev accu + | Cbrackets n, Cbrackets m::r when m + 1 = n && tl = r -> + let accu = Word "]" :: accu in + List.rev accu + | _ -> + loop (e::accu) tl + in + let a = loop [] l in + object + method to_string = L.string_of_tokens a + method to_t = [Text(L.string_of_tokens a)] + end + + + let unindent_rev n lexemes = + if debug then eprintf "(OMD) CALL: Omd_parser.unindent_rev\n%!"; + assert_well_formed lexemes; + let rec loop accu cl = function + | Newlines x::(Space|Spaces _)::Newlines y::tl -> + loop accu cl (Newlines(x+y+2)::tl) + | Newline::(Space|Spaces _)::Newlines x::tl -> + loop accu cl (Newlines(1+x)::tl) + | Newlines x::(Space|Spaces _)::Newline::tl -> + loop accu cl (Newlines(1+x)::tl) + | Newline::(Space|Spaces _)::Newline::tl -> + loop accu cl (Newlines(0)::tl) + + | (Newline|Newlines 0 as nl)::(Space|Spaces _ as s)::( + (Number _::Dot::(Space|Spaces _)::_) + | ((Star|Plus|Minus)::(Space|Spaces _)::_) + as tl) as l -> + if n = L.length s then + loop (nl::cl@accu) [] tl + else + (cl@accu), l + | (Newline|Newlines 0 as nl)::(Space|Spaces _ as s)::tl -> + let x = L.length s - n in + loop (nl::cl@accu) + (if x > 0 then [L.make_space x] else []) + tl + | Newlines(_)::_ as l -> + (cl@accu), l + | Newline::_ as l -> + (cl@accu), l + | e::tl -> + loop accu (e::cl) tl + | [] as l -> + (cl@accu), l + in + match loop [] [] lexemes with + | [], right -> [], right + | l, right -> + assert_well_formed l; + l, right + + let unindent n lexemes = + let fst, snd = unindent_rev n lexemes in + List.rev fst, snd + + let rec is_blank = function + | (Space | Spaces _ | Newline | Newlines _) :: tl -> + is_blank tl + | [] -> true + | _ -> false + + let semph_or_bold (n:int) (l:l) = + (* FIXME: use rpl call/return convention *) + assert_well_formed l; + assert (n>0 && n<4); + match + fsplit + ~excl:(function Newlines _ :: _ -> true | _ -> false) + ~f:(function + | Backslash::Star::tl -> + Continue_with([Star;Backslash],tl) + | Backslash::Stars 0::tl -> + Continue_with([Star;Backslash],Star::tl) + | Backslash::Stars n::tl -> + Continue_with([Star;Backslash],Stars(n-1)::tl) + | (Backslashs b as x)::Star::tl -> + if b mod 2 = 0 then + Continue_with([x],Star::tl) + else + Continue_with([Star;x],tl) + | (Backslashs b as x)::(Stars 0 as s)::tl -> + if b mod 2 = 0 then + Continue_with([x],s::tl) + else + Continue_with([Star;x],Star::tl) + | (Backslashs b as x)::(Stars n as s)::tl -> + if b mod 2 = 0 then + Continue_with([x],s::tl) + else + Continue_with([Star;x],Stars(n-1)::tl) + | (Space|Spaces _ as x)::(Star|Stars _ as s)::tl -> + Continue_with([s;x],tl) + | (Star|Stars _ as s)::tl -> + if L.length s = n then + Split([],tl) + else + Continue + | _ -> Continue) + l + with + | None -> + None + | Some(left,right) -> + if is_blank left then None else Some(left,right) + + let sm_uemph_or_bold (n:int) (l:l) = + assert_well_formed l; + (* FIXME: use rpl call/return convention *) + assert (n>0 && n<4); + match + fsplit + ~excl:(function Newlines _ :: _ -> true | _ -> false) + ~f:(function + | Backslash::Underscore::tl -> + Continue_with([Underscore;Backslash],tl) + | Backslash::Underscores 0::tl -> + Continue_with([Underscore;Backslash],Underscore::tl) + | Backslash::Underscores n::tl -> + Continue_with([Underscore;Backslash],Underscores(n-1)::tl) + | (Backslashs b as x)::Underscore::tl -> + if b mod 2 = 0 then + Continue_with([x],Underscore::tl) + else + Continue_with([Underscore;x],tl) + | (Backslashs b as x)::(Underscores 0 as s)::tl -> + if b mod 2 = 0 then + Continue_with([x],s::tl) + else + Continue_with([Underscore;x],Underscore::tl) + | (Backslashs b as x)::(Underscores n as s)::tl -> + if b mod 2 = 0 then + Continue_with([x],s::tl) + else + Continue_with([Underscore;x],Underscores(n-1)::tl) + | (Space|Spaces _ as x)::(Underscore|Underscores _ as s)::tl -> + Continue_with([s;x],tl) + | (Underscore|Underscores _ as s)::tl -> + if L.length s = n then + Split([],tl) + else + Continue + | _ -> Continue) + l + with + | None -> + None + | Some(left,right) -> + if is_blank left then None else Some(left,right) + + + let gh_uemph_or_bold (n:int) (l:l) = + assert_well_formed l; + (* FIXME: use rpl call/return convention *) + assert (n>0 && n<4); + match + fsplit + ~excl:(function Newlines _ :: _ -> true | _ -> false) + ~f:(function + | Backslash::Underscore::tl -> + Continue_with([Underscore;Backslash],tl) + | Backslash::Underscores 0::tl -> + Continue_with([Underscore;Backslash],Underscore::tl) + | Backslash::Underscores n::tl -> + Continue_with([Underscore;Backslash],Underscores(n-1)::tl) + | (Backslashs b as x)::Underscore::tl -> + if b mod 2 = 0 then + Continue_with([x],Underscore::tl) + else + Continue_with([Underscore;x],tl) + | (Backslashs b as x)::(Underscores 0 as s)::tl -> + if b mod 2 = 0 then + Continue_with([x],s::tl) + else + Continue_with([Underscore;x],Underscore::tl) + | (Backslashs b as x)::(Underscores n as s)::tl -> + if b mod 2 = 0 then + Continue_with([x],s::tl) + else + Continue_with([Underscore;x],Underscores(n-1)::tl) + | (Space|Spaces _ as x)::(Underscore|Underscores _ as s)::tl -> + Continue_with([s;x],tl) + | (Underscore|Underscores _ as s)::(Word _|Number _ as w):: tl -> + Continue_with([w;s],tl) + | (Underscore|Underscores _ as s)::tl -> + if L.length s = n then + Split([],tl) + else + Continue + | _ -> Continue) + l + with + | None -> + None + | Some(left,right) -> + if is_blank left then None else Some(left,right) + + + let uemph_or_bold n l = + assert_well_formed l; + (* FIXME: use rpl call/return convention *) + if gh_uemph_or_bold_style then + gh_uemph_or_bold n l + else + sm_uemph_or_bold n l + + let eat_blank = + eat (function |Space|Spaces _|Newline|Newlines _ -> true| _ -> false) + + + (* used by tag__maybe_h1 and tag__maybe_h2 *) + let setext_title main_loop (l:l) : (Omd_representation.tok list * l) option = + assert_well_formed l; + let rec detect_balanced_bqs n r l = + (* If there's a balanced (complete) backquote-started code block + then it should be "ignored", else it means the line that + follows is part of a code block, so it's not defining a + setext-style title. *) + if debug then + eprintf "(OMD) detect_balanced_bqs n=%d r=%S l=%S\n%!" + n (L.string_of_tokens r) (L.string_of_tokens l); + match l with + | [] -> + None + | (Newline|Newlines _)::_ -> + None + | Backslash::Backquote::tl -> + detect_balanced_bqs n (Backquote::Backslash::r) tl + | Backslash::Backquotes 0::tl -> + detect_balanced_bqs n (Backquote::Backslash::r) (Backquote::tl) + | Backslash::Backquotes x::tl -> + detect_balanced_bqs n (Backquote::Backslash::r) (Backquotes(x-1)::tl) + | Backslashs(m) as b::Backquote::tl when m mod 2 = 1 -> + detect_balanced_bqs n (Backquote::b::r) tl + | Backslashs(m) as b::Backquotes 0::tl when m mod 2 = 1 -> + detect_balanced_bqs n (Backquote::b::r) (Backquote::tl) + | Backslashs(m) as b::Backquotes x::tl when m mod 2 = 1 -> + detect_balanced_bqs n (Backquote::b::r) (Backquotes(x-1)::tl) + | (Backquote as b)::tl when n = 1 -> + Some(List.rev (b::r), tl) + | (Backquotes x as b)::tl when n = x+2 -> + Some(List.rev (b::r), tl) + | e::tl -> + detect_balanced_bqs n (e::r) tl + in + let rec loop r = function + | [] -> + if r = [] then + None + else + Some(List.rev r, []) + | Backslash::Backquote::tl -> + loop (Backquote::Backslash::r) tl + | Backslashs(m) as b::Backquote::tl when m mod 2 = 1 -> + loop (Backquote::b::r) tl + | Backslash::Backquotes 0::tl -> + loop (Backquote::Backslash::r) (Backquote::tl) + | Backslash::Backquotes x::tl -> + loop (Backquote::Backslash::r) (Backquotes(x-1)::tl) + | Backslashs(m) as b::Backquotes 0::tl when m mod 2 = 1 -> + loop (Backquote::b::r) (Backquote::tl) + | Backslashs(m) as b::Backquotes x::tl when m mod 2 = 1 -> + loop (Backquote::b::r) (Backquotes(x-1)::tl) + | Backquote::tl -> + begin match detect_balanced_bqs 1 [] tl with + | Some(bl,tl) -> loop (bl@r) tl + | _ -> None + end + | Backquotes(x)::tl -> + begin match detect_balanced_bqs (x+2) [] tl with + | Some(bl,tl) -> loop (bl@r) tl + | _ -> None + end + | Newline::(Equal|Equals _|Minus|Minuss _)::tl -> + if r = [] then + None + else + Some(List.rev r, tl) + | (Newline|Newlines _)::_ -> + if debug then + eprintf "(OMD) Omd_parser.setext_title is wrongly used!\n%!"; + None + | e::tl -> + loop (e::r) tl + in + if match l with + | Lessthan::Word _::_ -> + begin match main_loop [] [] l with + | (Html_block _ | Code_block _ | Raw_block _)::_ -> + true + | _ -> + false + end + | _ -> false + then + None + else + let result = loop [] l in + if debug then + eprintf "(OMD) setext_title l=%S result=%S,%S\n%!" + (L.string_of_tokens l) + (match result with + | None -> "" + | Some (x,tl) -> L.string_of_tokens x) + (match result with + | None -> "" + | Some (x,tl) -> L.string_of_tokens tl); + result + + let tag__maybe_h1 (main_loop:main_loop) = + Tag("tag__maybe_h1", + object + method parser_extension r p l = + match p with + | ([]|[Newline|Newlines _]) -> + begin match setext_title main_loop l with + | None -> + None + | Some(title, tl) -> + let title = H1(main_loop [] [] title) in + Some((title::r), [Newline], tl) + end + | _ -> + if debug then + eprintf "(OMD) Warning: Omd_parser.tag__maybe_h1 is wrongly \ + used (p=%S)!\n" + (L.string_of_tokens p); + None + method to_string = "" + end + ) + + let tag__maybe_h2 (main_loop:main_loop) = + Tag("tag__maybe_h2", + object + method parser_extension r p l = + match p with + | ([]|[Newline|Newlines _]) -> + begin match setext_title main_loop l with + | None -> + None + | Some(title, tl) -> + let title = H2(main_loop [] [] title) in + Some((title::r), [Newline], tl) + end + | _ -> + if debug then + eprintf "(OMD) Warning: Omd_parser.tag__maybe_h2 is wrongly \ + used (p=%S)!\n" + (L.string_of_tokens p); + None + method to_string = "" + end + ) + + let tag__md md = (* [md] should be in reverse *) + Tag("tag__md", + object + method parser_extension r p l = Some(md@r, [], l) + method to_string = "" + end + ) + + (* Let's tag the lines that *might* be titles using setext-style. + "might" because if they are, for instance, in a code section, + then they are not titles at all. *) + let tag_setext main_loop lexemes = + assert_well_formed lexemes; + let rec loop pl res = function + | [] | [Newline|Newlines _] -> + pl@res + | (Newline as e1)::(Equal|Equals _ as e2)::tl -> (* might be a H1. *) + begin + match + fsplit_rev + ~f:(function + | (Space|Spaces _|Equal|Equals _)::tl -> Continue + | [] -> Split([],[]) + | _::_ as l -> Split([], l)) + tl + with + | Some(rleft, (([]|(Newline|Newlines _)::_) as right)) -> + loop [] (rleft@(e2::e1::pl@tag__maybe_h1 main_loop::res)) right + | Some(rleft, right) -> + loop [] (rleft@(e2::e1::pl@res)) right + | None -> + loop [] (e2::e1::pl@res) [] + end + | (Newline as e1)::(Minus|Minuss _ as e2)::tl -> (* might be a H2. *) + begin + match + fsplit_rev + ~f:(function + | (Space|Spaces _|Minus|Minuss _)::tl -> Continue + | [] -> Split([],[]) + | _::_ as l -> Split([], l)) + tl + with + | Some(rleft, (([]|(Newline|Newlines _)::_) as right)) -> + loop [] (rleft@(e2::e1::pl@tag__maybe_h2 main_loop::res)) right + | Some(rleft, right) -> + loop [] (rleft@(e2::e1::pl@res)) right + | None -> + loop [] (e2::e1::pl@res) [] + end + | (Newline | Newlines _ as e1)::tl -> + loop [] (e1::pl@res) tl + | e::tl -> + loop (e::pl) res tl + in + List.rev (loop [] [] lexemes) + + + let hr_m l = + assert_well_formed l; + let rec loop n = function + | ((Newlines _|Newline)::tl) | ([] as tl) -> + if n >= 3 then Some tl else None + | (Space|Spaces _)::tl -> + loop n tl + | Minus::tl -> + loop (n+1) tl + | Minuss x::tl -> + loop (x+2+n) tl + | _::_ -> + None + in loop 0 l + + let hr_s l = + assert_well_formed l; + let rec loop n = function + | ((Newline|Newlines _)::tl) | ([] as tl) -> + if n >= 3 then Some tl else None + | (Space|Spaces _)::tl -> + loop n tl + | Star::tl -> + loop (n+1) tl + | Stars x::tl -> + loop (x+2+n) tl + | _::_ -> + None + in loop 0 l + + let hr l = + match hr_m l with + | None -> hr_s l + | Some _ as tl -> tl + + (** [bcode] parses code that's delimited by backquote(s) *) + let bcode ?(default_lang=default_lang) r p l = + assert_well_formed l; + let e, tl = + match l with + | (Backquote|Backquotes _ as e)::tl -> e, tl + | _ -> failwith "Omd_parser.bcode is wrongly called" + in + let rec code_block accu = function + | [] -> + None + | Backquote::tl -> + if e = Backquote then + match accu with + | Newline::accu -> + Some(List.rev accu, tl) + | _ -> + Some(List.rev accu, tl) + else + code_block (Backquote::accu) tl + | (Backquotes n as b)::tl -> + if e = b then + match accu with + | Newline::accu -> + Some(List.rev accu, tl) + | _ -> + Some(List.rev accu, tl) + else + code_block (b::accu) tl + | Tag(_, _)::tl -> + code_block accu tl + | e::tl -> + code_block (e::accu) tl + in + match code_block [] tl with + | None -> None + | Some(cb, l) -> + if List.exists (function (Newline|Newlines _) -> true | _ -> false) cb + && (match p with []|[Newline|Newlines _] -> true | _ -> false) + && (match e with Backquotes n when n > 0 -> true | _ -> false) + then + match cb with + | Word lang :: (Space|Spaces _) :: Newline :: tl + | Word lang :: Newline :: tl -> + let code = L.string_of_tokens tl in + Some(Code_block(lang, code) :: r, [Backquote], l) + | Word lang :: (Space|Spaces _) :: Newlines 0 :: tl + | Word lang :: Newlines 0 :: tl -> + let code = L.string_of_tokens(Newline::tl) in + Some(Code_block(lang, code) :: r, [Backquote], l) + | Word lang :: (Space|Spaces _) :: Newlines n :: tl + | Word lang :: Newlines n :: tl -> + let code = L.string_of_tokens (Newlines(n-1)::tl) in + Some(Code_block(lang, code) :: r, [Backquote], l) + | Newline :: tl -> + let code = L.string_of_tokens tl in + Some(Code_block(default_lang, code) :: r, [Backquote], l) + | _ -> + let code = L.string_of_tokens cb in + Some(Code_block(default_lang, code) :: r, [Backquote], l) + else + let clean_bcode s = + let rec loop1 i = + if i = String.length s then 0 + else match s.[i] with + | ' ' -> loop1(i+1) + | _ -> i + in + let rec loop2 i = + if i = -1 then String.length s + else match s.[i] with + | ' ' -> loop2(i-1) + | _ -> i+1 + in + match loop1 0, loop2 (String.length s - 1) with + | 0, n when n = String.length s - 1 -> s + | i, n -> String.sub s i (n-i) + in + let code = L.string_of_tokens cb in + if debug then + eprintf "(OMD) clean_bcode %S => %S\n%!" code (clean_bcode code); + Some(Code(default_lang, clean_bcode code) :: r, [Backquote], l) + + + exception NL_exception + exception Premature_ending + + (* !!DO NOT DELETE THIS!! + The program that generates the generated part that follows right after. + List.iter (fun (a,b,c) -> + print_endline ("let read_until_"^a^" ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: ("^b^" as b) :: tl -> + loop (b::accu) n tl + | Backslash :: ("^b^"s 0) :: tl -> + loop ("^b^"::accu) n ("^b^"::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl" + ^(if c<>"" then " + | Backslash :: ("^c^" as b) :: tl -> + loop (b::accu) n tl + | Backslash :: ("^c^"s 0) :: tl -> + loop ("^c^"::accu) n ("^c^"::tl) + | "^c^" as e :: tl -> + loop (e::accu) (n+1) tl + | "^c^"s x as e :: tl -> + loop (e::accu) (n+x+2) tl + " else "")^ + " | "^b^" as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | "^b^"s 0 :: tl -> + if n = 0 then + List.rev accu, "^b^"::tl + else + loop ("^b^"::accu) (n-1) ("^b^"::tl) + | "^b^"s x :: tl -> + if n = 0 then + List.rev accu, "^b^"s(x-1)::tl + else + loop + (match accu with + | "^b^"::accu -> "^b^"s(0)::accu + | "^b^"s x::accu -> "^b^"s(x+1)::accu + | _ -> "^b^"::accu) + (n-1) + ("^b^"s(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf \"Omd_parser.read_until_"^a^" %S bq=%b no_nl=%b\\n%!\" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf \"Omd_parser.read_until_"^a^" %S bq=%b no_nl=%b => %S\\n%!\" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res +")) + + [ "gt", "Greaterthan", "Lessthan"; + "lt", "Lessthan", ""; + "cparenth", "Cparenthesis", "Oparenthesis"; + "oparenth", "Oparenthesis", ""; + "dq", "Doublequote", ""; + "q", "Quote", ""; + "obracket", "Obracket", ""; + "cbracket", "Cbracket", "Obracket"; + "space", "Space", ""; + ] + *) + + (* begin generated part *) + +let read_until_gt ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: (Greaterthan as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Greaterthans 0) :: tl -> + loop (Greaterthan::accu) n (Greaterthan::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl + | Backslash :: (Lessthan as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Lessthans 0) :: tl -> + loop (Lessthan::accu) n (Lessthan::tl) + | Lessthan as e :: tl -> + loop (e::accu) (n+1) tl + | Lessthans x as e :: tl -> + loop (e::accu) (n+x+2) tl + | Greaterthan as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | Greaterthans 0 :: tl -> + if n = 0 then + List.rev accu, Greaterthan::tl + else + loop (Greaterthan::accu) (n-1) (Greaterthan::tl) + | Greaterthans x :: tl -> + if n = 0 then + List.rev accu, Greaterthans(x-1)::tl + else + loop + (match accu with + | Greaterthan::accu -> Greaterthans(0)::accu + | Greaterthans x::accu -> Greaterthans(x+1)::accu + | _ -> Greaterthan::accu) + (n-1) + (Greaterthans(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf "Omd_parser.read_until_gt %S bq=%b no_nl=%b\n%!" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf "Omd_parser.read_until_gt %S bq=%b no_nl=%b => %S\n%!" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res + +let read_until_lt ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: (Lessthan as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Lessthans 0) :: tl -> + loop (Lessthan::accu) n (Lessthan::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl | Lessthan as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | Lessthans 0 :: tl -> + if n = 0 then + List.rev accu, Lessthan::tl + else + loop (Lessthan::accu) (n-1) (Lessthan::tl) + | Lessthans x :: tl -> + if n = 0 then + List.rev accu, Lessthans(x-1)::tl + else + loop + (match accu with + | Lessthan::accu -> Lessthans(0)::accu + | Lessthans x::accu -> Lessthans(x+1)::accu + | _ -> Lessthan::accu) + (n-1) + (Lessthans(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf "Omd_parser.read_until_lt %S bq=%b no_nl=%b\n%!" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf "Omd_parser.read_until_lt %S bq=%b no_nl=%b => %S\n%!" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res + +let read_until_cparenth ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: (Cparenthesis as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Cparenthesiss 0) :: tl -> + loop (Cparenthesis::accu) n (Cparenthesis::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl + | Backslash :: (Oparenthesis as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Oparenthesiss 0) :: tl -> + loop (Oparenthesis::accu) n (Oparenthesis::tl) + | Oparenthesis as e :: tl -> + loop (e::accu) (n+1) tl + | Oparenthesiss x as e :: tl -> + loop (e::accu) (n+x+2) tl + | Cparenthesis as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | Cparenthesiss 0 :: tl -> + if n = 0 then + List.rev accu, Cparenthesis::tl + else + loop (Cparenthesis::accu) (n-1) (Cparenthesis::tl) + | Cparenthesiss x :: tl -> + if n = 0 then + List.rev accu, Cparenthesiss(x-1)::tl + else + loop + (match accu with + | Cparenthesis::accu -> Cparenthesiss(0)::accu + | Cparenthesiss x::accu -> Cparenthesiss(x+1)::accu + | _ -> Cparenthesis::accu) + (n-1) + (Cparenthesiss(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf "Omd_parser.read_until_cparenth %S bq=%b no_nl=%b\n%!" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf "Omd_parser.read_until_cparenth %S bq=%b no_nl=%b => %S\n%!" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res + +let read_until_oparenth ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: (Oparenthesis as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Oparenthesiss 0) :: tl -> + loop (Oparenthesis::accu) n (Oparenthesis::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl | Oparenthesis as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | Oparenthesiss 0 :: tl -> + if n = 0 then + List.rev accu, Oparenthesis::tl + else + loop (Oparenthesis::accu) (n-1) (Oparenthesis::tl) + | Oparenthesiss x :: tl -> + if n = 0 then + List.rev accu, Oparenthesiss(x-1)::tl + else + loop + (match accu with + | Oparenthesis::accu -> Oparenthesiss(0)::accu + | Oparenthesiss x::accu -> Oparenthesiss(x+1)::accu + | _ -> Oparenthesis::accu) + (n-1) + (Oparenthesiss(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf "Omd_parser.read_until_oparenth %S bq=%b no_nl=%b\n%!" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf "Omd_parser.read_until_oparenth %S bq=%b no_nl=%b => %S\n%!" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res + +let read_until_dq ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: (Doublequote as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Doublequotes 0) :: tl -> + loop (Doublequote::accu) n (Doublequote::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl | Doublequote as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | Doublequotes 0 :: tl -> + if n = 0 then + List.rev accu, Doublequote::tl + else + loop (Doublequote::accu) (n-1) (Doublequote::tl) + | Doublequotes x :: tl -> + if n = 0 then + List.rev accu, Doublequotes(x-1)::tl + else + loop + (match accu with + | Doublequote::accu -> Doublequotes(0)::accu + | Doublequotes x::accu -> Doublequotes(x+1)::accu + | _ -> Doublequote::accu) + (n-1) + (Doublequotes(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf "Omd_parser.read_until_dq %S bq=%b no_nl=%b\n%!" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf "Omd_parser.read_until_dq %S bq=%b no_nl=%b => %S\n%!" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res + +let read_until_q ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: (Quote as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Quotes 0) :: tl -> + loop (Quote::accu) n (Quote::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl | Quote as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | Quotes 0 :: tl -> + if n = 0 then + List.rev accu, Quote::tl + else + loop (Quote::accu) (n-1) (Quote::tl) + | Quotes x :: tl -> + if n = 0 then + List.rev accu, Quotes(x-1)::tl + else + loop + (match accu with + | Quote::accu -> Quotes(0)::accu + | Quotes x::accu -> Quotes(x+1)::accu + | _ -> Quote::accu) + (n-1) + (Quotes(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf "Omd_parser.read_until_q %S bq=%b no_nl=%b\n%!" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf "Omd_parser.read_until_q %S bq=%b no_nl=%b => %S\n%!" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res + +let read_until_obracket ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: (Obracket as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Obrackets 0) :: tl -> + loop (Obracket::accu) n (Obracket::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl | Obracket as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | Obrackets 0 :: tl -> + if n = 0 then + List.rev accu, Obracket::tl + else + loop (Obracket::accu) (n-1) (Obracket::tl) + | Obrackets x :: tl -> + if n = 0 then + List.rev accu, Obrackets(x-1)::tl + else + loop + (match accu with + | Obracket::accu -> Obrackets(0)::accu + | Obrackets x::accu -> Obrackets(x+1)::accu + | _ -> Obracket::accu) + (n-1) + (Obrackets(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf "Omd_parser.read_until_obracket %S bq=%b no_nl=%b\n%!" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf "Omd_parser.read_until_obracket %S bq=%b no_nl=%b => %S\n%!" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res + +let read_until_cbracket ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: (Cbracket as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Cbrackets 0) :: tl -> + loop (Cbracket::accu) n (Cbracket::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl + | Backslash :: (Obracket as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Obrackets 0) :: tl -> + loop (Obracket::accu) n (Obracket::tl) + | Obracket as e :: tl -> + loop (e::accu) (n+1) tl + | Obrackets x as e :: tl -> + loop (e::accu) (n+x+2) tl + | Cbracket as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | Cbrackets 0 :: tl -> + if n = 0 then + List.rev accu, Cbracket::tl + else + loop (Cbracket::accu) (n-1) (Cbracket::tl) + | Cbrackets x :: tl -> + if n = 0 then + List.rev accu, Cbrackets(x-1)::tl + else + loop + (match accu with + | Cbracket::accu -> Cbrackets(0)::accu + | Cbrackets x::accu -> Cbrackets(x+1)::accu + | _ -> Cbracket::accu) + (n-1) + (Cbrackets(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf "Omd_parser.read_until_cbracket %S bq=%b no_nl=%b\n%!" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf "Omd_parser.read_until_cbracket %S bq=%b no_nl=%b => %S\n%!" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res + +let read_until_space ?(bq=false) ?(no_nl=false) l = + assert_well_formed l; + let rec loop accu n = function + | Backslash :: (Space as b) :: tl -> + loop (b::accu) n tl + | Backslash :: (Spaces 0) :: tl -> + loop (Space::accu) n (Space::tl) + | Backslashs 0 :: tl -> + loop (Backslash::accu) n tl + | Backslashs 1 :: tl -> + loop (Backslash::accu) n (Backslash::tl) + | Backslashs 2 :: tl -> + loop (Backslashs 0::accu) n tl + | (Backslashs x) :: tl -> + if x mod 2 = 0 then + loop (Backslashs(x/2-1)::accu) n tl + else + loop (Backslashs(x/2-1)::accu) n (Backslash::tl) + | (Backquote|Backquotes _ as e)::tl as l -> + if bq then + match bcode [] [] l with + | None -> loop (e::accu) n tl + | Some (r, _, tl) -> + loop (* not very pretty kind of hack *) + (List.rev(L.lex(Omd_backend.markdown_of_md r))@accu) + n + tl + else + loop (e::accu) n tl | Space as e :: tl -> + if n = 0 then + List.rev accu, tl + else + loop (e::accu) (n-1) tl + | Spaces 0 :: tl -> + if n = 0 then + List.rev accu, Space::tl + else + loop (Space::accu) (n-1) (Space::tl) + | Spaces x :: tl -> + if n = 0 then + List.rev accu, Spaces(x-1)::tl + else + loop + (match accu with + | Space::accu -> Spaces(0)::accu + | Spaces x::accu -> Spaces(x+1)::accu + | _ -> Space::accu) + (n-1) + (Spaces(x-1)::tl) + | (Newline|Newlines _ as e)::tl -> + if no_nl then + raise NL_exception + else + loop (e::accu) n tl + | e::tl -> + loop (e::accu) n tl + | [] -> + raise Premature_ending + in + if debug then + eprintf "Omd_parser.read_until_space %S bq=%b no_nl=%b\n%!" (L.string_of_tokens l) bq no_nl; + let res = loop [] 0 l in + if debug then + eprintf "Omd_parser.read_until_space %S bq=%b no_nl=%b => %S\n%!" (L.string_of_tokens l) bq no_nl (L.string_of_tokens (fst res)); + res + (* /end generated part *) + + let read_until_newline l = + assert_well_formed l; + let rec loop accu n = + function + | ((Backslash as a)) :: ((Newline as b)) :: tl -> + loop (b :: a :: accu) n tl + | Backslash :: Newlines 0 :: tl -> + loop (Newline :: Backslash :: accu) n (Newline :: tl) + | ((Backslashs 0 as e)) :: tl -> loop (e :: accu) n tl + | ((Backslashs x as e)) :: tl -> + if (x mod 2) = 0 + then loop (e :: accu) n tl + else loop ((Backslashs (x - 1)) :: accu) n (Backslash :: tl) + | ((Newline as e)) :: tl -> + if n = 0 then ((List.rev accu), tl) else loop (e :: accu) (n - 1) tl + | Newlines 0 :: tl -> + if n = 0 + then ((List.rev accu), (Newline :: tl)) + else loop (Newline :: accu) (n - 1) (Newline :: tl) + | Newlines n :: tl -> ((List.rev accu), ((Newlines (n - 1)) :: tl)) + | e :: tl -> loop (e :: accu) n tl + | [] -> raise Premature_ending + in loop [] 0 l + + (* H1, H2, H3, ... *) + let read_title (main_loop:main_loop) n r _previous lexemes = + let title, rest = + let rec loop accu = function + | Backslash::Hash::tl -> + loop (Hash::Backslash::accu) tl + | Backslashs(n)::Hash::tl when n mod 2 = 1 -> + loop (Hash::Backslashs(n-1)::accu) tl + | Backslash::Hashs(h)::tl -> + begin match tl with + | [] + | (Space|Spaces _)::(Newline|Newlines _)::_ + | (Newline|Newlines _)::_ -> + loop (Hash::Backslash::accu) + ((if h = 0 then Hash else Hashs(h-1))::tl) + | _ -> + loop (Hashs(h)::Backslash::accu) tl + end + | Backslashs(n)::Hashs(h)::tl when n mod 2 = 1 -> + begin match tl with + | [] + | (Space|Spaces _)::(Newline|Newlines _)::_ + | (Newline|Newlines _)::_ -> + loop (Hash::Backslashs(n)::accu) + ((if h = 0 then Hash else Hashs(h-1))::tl) + | _ -> + loop (Hashs(h)::Backslashs(n)::accu) tl + end + | (Hash|Hashs _) :: ((Newline|Newlines _) :: _ as l) + | (Hash|Hashs _) :: (Space|Spaces _) :: ((Newline|Newlines _)::_ as l) + | ((Newline|Newlines _) :: _ as l) + | ([] as l) + | (Space|Spaces _) :: (Hash|Hashs _) :: ((Newline|Newlines _) :: _ as l) + | (Space|Spaces _) :: (Hash|Hashs _) :: (Space|Spaces _) + :: ((Newline|Newlines _)::_ as l) + | (Space|Spaces _) :: ((Newline|Newlines _) :: _ as l) + | (Space|Spaces _) :: ([] as l) -> + main_loop [] [] (List.rev accu), l + | [Hash|Hashs _] + | [(Space|Spaces _); Hash|Hashs _] + | [(Space|Spaces _); (Hash|Hashs _); (Space|Spaces _)] -> + main_loop [] [] (List.rev accu), [] + | x::tl -> + loop (x::accu) tl + in + loop [] lexemes + in + match n with + | 1 -> Some(H1 title :: r, [Newline], rest) + | 2 -> Some(H2 title :: r, [Newline], rest) + | 3 -> Some(H3 title :: r, [Newline], rest) + | 4 -> Some(H4 title :: r, [Newline], rest) + | 5 -> Some(H5 title :: r, [Newline], rest) + | 6 -> Some(H6 title :: r, [Newline], rest) + | _ -> None + + let maybe_extension extensions r p l = + match extensions with + | [] -> None + | _ -> + List.fold_left + (function + | None -> + (fun f -> f#parser_extension r p l) + | Some(nr, np, nl) as e -> + (fun f -> match f#parser_extension nr np nl with + | None -> e + | Some _ as k -> k) + ) + None + extensions + + (* blockquotes *) + let emailstyle_quoting (main_loop:main_loop) r _p lexemes = + assert_well_formed lexemes; + let rec loop block cl = + function + | Newline::Greaterthan::(Newline::_ as tl) -> + loop (Newline::cl@block) [] tl + | Newline::Greaterthan::Space::tl -> + loop (Newline::cl@block) [] tl + | Newline::Greaterthan::Spaces 0::tl -> + loop (Newline::cl@block) [Space] tl + | Newline::Greaterthan::Spaces n::tl -> + assert(n>0); + loop (Newline::cl@block) [Spaces(n-1)] tl + + (* multi paragraph blockquotes with empty lines *) + | Newlines 0::Greaterthan::Space::tl -> + loop (Newlines 0::cl@block) [] tl + | Newlines 0::Greaterthan::Spaces 0::tl -> + loop (Newlines 0::cl@block) [Space] tl + | Newlines 0::Greaterthan::Spaces n::tl -> + assert(n>0); + loop (Newlines 0::cl@block) [Spaces(n-1)] tl + + | (Newlines _::_ as l) | ([] as l) -> fix(List.rev(cl@block)), l + | e::tl -> loop block (e::cl) tl + in + match loop [] [] lexemes with + | (Newline|Newlines _)::block, tl -> + if debug then + eprintf "(OMD) Omd_parser.emailstyle_quoting %S\n%!" + (L.string_of_tokens block); + Some((Blockquote(main_loop [] [] block)::r), [Newline], tl) + | _ -> + None + + + (* maybe a reference *) + let maybe_reference (main_loop:main_loop) rc r _p l = + assert_well_formed l; + (* this function is called when we know it's not a link although + it started with a '[' *) + (* So it could be a reference or a link definition. *) + let rec maybe_ref l = + let text, remains = read_until_cbracket ~bq:true l in + (* check that there is no ill-placed open bracket *) + if (try ignore(read_until_obracket ~bq:true text); true + with Premature_ending -> false) then + raise Premature_ending; (* <-- ill-placed open bracket *) + let blank, remains = read_until_obracket ~bq:true remains in + (* check that there are no unwanted characters between CB and OB. *) + if eat (let flag = ref true in + function (* allow only a space, multiple spaces, or a newline *) + | Newline -> !flag && (flag := false; true) + | (Space|Spaces _) -> !flag && (flag := false; true) + | _ -> false) blank <> [] then + raise Premature_ending (* <-- not a regular reference *) + else + match read_until_cbracket ~bq:true remains with + | [], remains -> + let fallback = extract_fallback main_loop remains (Obracket::l) in + let id = L.string_of_tokens text in (* implicit anchor *) + Some(((Ref(rc, id, id, fallback))::r), [Cbracket], remains) + | id, remains -> + let fallback = extract_fallback main_loop remains (Obracket::l) in + Some(((Ref(rc, L.string_of_tokens id, + L.string_of_tokens text, fallback))::r), + [Cbracket], remains) + in + let rec maybe_nonregular_ref l = + let text, remains = read_until_cbracket ~bq:true l in + (* check that there is no ill-placed open bracket *) + if (try ignore(read_until_obracket ~bq:true text); true + with Premature_ending -> false) then + raise Premature_ending; (* <-- ill-placed open bracket *) + let fallback = extract_fallback main_loop remains (Obracket::l) in + let id = L.string_of_tokens text in (* implicit anchor *) + Some(((Ref(rc, id, id, fallback))::r), [Cbracket], remains) + in + let rec maybe_def l = + match read_until_cbracket ~bq:true l with + | _, [] -> raise Premature_ending + | id, (Colon::(Space|Spaces _)::remains) + | id, (Colon::remains) -> + begin + match + fsplit + ~f:(function + | (Space|Spaces _|Newline|Newlines _):: _ as l -> Split([], l) + | e::tl -> Continue + | [] -> Split([],[])) + remains + with + | None | Some([], _) -> raise Premature_ending + | Some(url, remains) -> + let title, remains = + match + eat + (function | (Space|Spaces _|Newline|Newlines _) -> true + | _ -> false) + remains + with + | Doublequotes(0)::tl -> [], tl + | Doublequote::tl -> read_until_dq ~bq:true tl + | Quotes(0)::tl -> [], tl + | Quote::tl -> read_until_q ~bq:true tl + | Oparenthesis::tl-> read_until_cparenth ~bq:true tl + | l -> [], l + in + let url = + let url = L.string_of_tokens url in + if String.length url > 2 && url.[0] = '<' + && url.[String.length url - 1] = '>' then + String.sub url 1 (String.length url - 2) + else + url + in + rc#add_ref (L.string_of_tokens id) (L.string_of_tokens title) url; + Some(r, [Newline], remains) + end + | _ -> raise Premature_ending + in + try + maybe_ref l + with | Premature_ending | NL_exception -> + try + maybe_def l + with + | Premature_ending | NL_exception -> + try + maybe_nonregular_ref l + with + | Premature_ending | NL_exception -> + None + + + (** maybe a link *) + let maybe_link (main_loop:main_loop) r _p l = + if debug then eprintf "(OMD) # maybe_link\n"; + assert_well_formed l; + let read_url name l = + if debug then + eprintf "(OMD) # maybe_link>read_url %S\n" (L.string_of_tokens l); + try + let l_cp, r_cp = + read_until_cparenth ~no_nl:true ~bq:false l + in + if debug then eprintf "(OMD) maybe_link >> l_cp=%S r_cp=%S\n%!" + (L.string_of_tokens l_cp) + (L.string_of_tokens r_cp); + try + let l_dq, r_dq = + read_until_dq ~no_nl:true ~bq:false l + in + if debug then eprintf "(OMD) maybe_link >> l_dq=%S r_dq=%S\n%!" + (L.string_of_tokens l_dq) + (L.string_of_tokens r_dq); + (* maybe title *) + if List.length l_cp > List.length l_dq then (* title *) + begin + if debug then eprintf "(OMD) maybe_link >> title\n%!"; + let url = + match List.rev l_dq with + | (Newline|Space|Spaces _)::(Newline|Space|Spaces _)::tl + | (Newline|Space|Spaces _)::tl -> + L.string_of_tokens (List.rev tl) + | _ -> + L.string_of_tokens l_dq + in + let title, rest = read_until_dq ~no_nl:false ~bq:false r_dq in + let rest = snd(read_until_cparenth rest) in + let title = L.string_of_tokens title in + Some(Url(url, name, title) :: r, [Cparenthesis], rest) + end + else (* no title *) + raise Premature_ending + with NL_exception | Premature_ending -> (* no title *) + begin + if debug then eprintf "(OMD) maybe_link >> no title\n%!"; + let url = match List.rev l_cp with + | (Newline|Space|Spaces _)::(Newline|Space|Spaces _)::tl + | (Newline|Space|Spaces _)::tl -> List.rev tl + | _ -> l_cp + in + let title, rest = [], r_cp in + let url = L.string_of_tokens url in + let title = L.string_of_tokens title in + Some(Url(url, name, title) :: r, [Cparenthesis], rest) + end + with NL_exception | Premature_ending -> + None + in + let read_name l = + (* it's not really the "name" of a URL but what + corresponds to the inner HTML of an HTML 'A' tag *) + if debug then eprintf "(OMD) # maybe_link> read_name\n"; + try + match read_until_cbracket ~bq:true l with + | name, (Oparenthesis::tl) -> + read_url (main_loop [] [Obracket] name) (eat_blank tl) + | name, (Oparenthesiss 0::tl) -> + read_url (main_loop [] [Obracket] name) (Oparenthesis::tl) + | name, (Oparenthesiss n::tl) -> + read_url (main_loop [] [Obracket] name) (Oparenthesiss(n-1)::tl) + | _ -> + None + with Premature_ending | NL_exception -> None + in + read_name l + + + let has_paragraphs l = + (* Has at least 2 consecutive newlines. *) + List.exists (function Newlines _ -> true | _ -> false) l + + let parse_list (main_loop:main_loop) r _p l = + assert_well_formed l; + if debug then begin + eprintf "(OMD) parse_list r=(%s) p=(%s) l=(%s)\n%!" + "" (* (Omd_backend.sexpr_of_md (List.rev r)) *) + "" (* (destring_of_tl p) *) + (L.destring_of_tokens ~limit:40 l); + end; + let module UO = struct type ordered = O | U end in + let open UO in + if debug then + eprintf "(OMD) parse_list: l=(%s)\n%!" (L.destring_of_tokens l); + let end_of_item (indent:int) l : tok split_action = match l with + | [] -> + Split([],[]) + | Newlines 0 :: ((Spaces n) :: Greaterthan :: (Space | Spaces _) :: tl + as s) -> + assert(n>=0); + if n+2 = indent+4 then (* blockquote *) + match unindent (n+2) (Newline::s) with + | Newline::block, rest -> + Continue_with(List.rev(Newlines(1)::block), rest) + | Newlines n::block, rest -> + Continue_with(List.rev(Newlines(n+2)::block), rest) + | block, rest -> + Continue_with(Newlines 0::block, rest) + else if n+2 >= indent+8 then (* code inside item *) + match unindent (indent+4) (Newline::s) with + | Newline::block, rest -> + Continue_with(List.rev(Newlines(1)::block), rest) + | Newlines n::block, rest -> + Continue_with(List.rev(Newlines(n+2)::block), rest) + | block, rest -> + Continue_with(Newlines 0::block, rest) + else + Split([], l) + | Newlines 0 :: (Spaces n :: tl as s) -> + assert(n>=0); + if n+2 >= indent+8 then (* code inside item *) + match unindent (indent+4) (Newline::s) with + | Newline::block, rest -> + Continue_with(List.rev(Newlines(0)::block), rest) + | Newlines n::block, rest -> + Continue_with(List.rev(Newlines(n+1)::block), rest) + | block, rest -> + Continue_with(Newline::block, rest) + else if n+2 >= indent+4 then (* new paragraph inside item *) + match unindent (indent+4) (Newline::s) with + | Newline::block, rest -> + Continue_with(List.rev(Newlines(1)::block), rest) + | Newlines n::block, rest -> + Continue_with(List.rev(Newlines(n+2)::block), rest) + | block, rest -> + Continue_with(Newlines 0::block, rest) + else + Split([], l) + | (Newlines _) :: _ -> (* n > 0 *) + (* End of item, stop *) + Split([], l) + | Newline :: + ( + ((Space|Spaces _) :: (Star|Minus|Plus) :: (Space|Spaces _):: _) + | ((Space|Spaces _) :: Number _ :: Dot :: (Space|Spaces _) :: _) + | ((Star|Minus|Plus) :: (Space|Spaces _):: _) + | (Number _ :: Dot :: (Space|Spaces _) :: _) + as tl) -> + Split([Newline], tl) + | Newline :: (Space | Spaces _) :: Newline :: tl -> + (* A line with spaces shouldn't interfere here, + which is about exactly 2 consecutive newlines, + so we rewrite the head of the lexing stream. *) + Continue_with([], Newlines 0 :: tl) + | Newline :: (Space | Spaces _) :: (Newlines _) :: _ -> + (* A line with spaces shouldn't interfere here, + which is about at least 3 consecutive newlines, + so we stop. *) + Split([], l) + | Newline :: (Spaces _ as s) :: tl -> + Continue_with + ([s; + Tag("parse_list/remember spaces", + object + method parser_extension r p = + function Spaces _::tl -> Some(r,p,Space::tl) + | _ -> None + method to_string = "" + end); + Newline], + tl) + | Newline :: (Space as s) :: tl -> + Continue_with + ([s; + Tag("parse_list/remember space", + object + method parser_extension r p = + function (Space|Spaces _)::tl -> Some(r,p,Space::tl) + | _ -> None + method to_string = "" + end); + Newline], + tl) + | _::_ -> + Continue + in + let rev_to_t l = + assert_well_formed l; + (* Newlines at the end of items have no meaning (except to end the + item which is expressed by the constructor already). *) + let l = match l with (Newline | Newlines _) :: tl -> tl | _ -> l in + main_loop [] [Newline] (List.rev l) + in + let add (sublist:element) items = + if debug then eprintf "(OMD) add\n%!"; + match items with + | [] -> assert false + | (O,indents,item)::tl -> + (O,indents,(item@[sublist]))::tl + | (U,indents,item)::tl -> + (U,indents,(item@[sublist]))::tl + in + let make_up ~p items : Omd_representation.element = + if debug then eprintf "(OMD) make_up p=%b\n%!" p; + let items = List.rev items in + match items with + | (U,_,item)::_ -> + if p then + Ulp(List.map (fun (_,_,i) -> i) items) + else + Ul(List.map (fun (_,_,i) -> i) items) + | (O,_,item)::_ -> + if p then + Olp(List.map (fun (_,_,i) -> i) items) + else + Ol(List.map (fun (_,_,i) -> i) items) + | [] -> + failwith "make_up called with []" (* assert false *) + in + let rec list_items ~p indents items l = + if debug then eprintf "(OMD) list_items: p=%b l=(%s)\n%!" + p (L.destring_of_tokens l); + match l with + (* no more list items *) + | [] -> + make_up p items, l + (* more list items *) + (* new unordered items *) + | (Star|Minus|Plus)::(Space|Spaces _)::tl -> + begin + match fsplit_rev ~f:(end_of_item 0) tl with + | None -> + make_up p items, l + | Some(new_item, rest) -> + let p = p || has_paragraphs new_item in + if debug then + eprintf "(OMD) (2346) new_item=%S\n%!" + (L.destring_of_tokens new_item); + match indents with + | [] -> + assert(items = []); + list_items ~p [0] ((U,[0], rev_to_t new_item)::items) rest + | 0::_ -> + list_items ~p indents ((U,indents,rev_to_t new_item)::items) rest + | _::_ -> + make_up p items, l + end + | Space::(Star|Minus|Plus)::(Space|Spaces _)::tl -> + begin + match fsplit_rev ~f:(end_of_item 1) tl with + | None -> make_up p items, l + | Some(new_item, rest) -> + let p = p || has_paragraphs new_item in + match indents with + | [] -> + assert(items = []); + list_items ~p [1] ((U,[1],rev_to_t new_item)::items) rest + | 1::_ -> + list_items ~p indents ((U,indents,rev_to_t new_item)::items) rest + | i::_ -> + if i > 1 then + make_up p items, l + else (* i < 1 : new sub list*) + let sublist, remains = + list_items ~p (1::indents) + [(U,1::indents,rev_to_t new_item)] rest + in + list_items ~p indents (add sublist items) remains + end + | Spaces n::(Star|Minus|Plus)::(Space|Spaces _)::tl -> + begin + match fsplit_rev ~f:(end_of_item (n+2)) tl with + | None -> + make_up p items, l + | Some(new_item, rest) -> + let p = p || has_paragraphs new_item in + match indents with + | [] -> + if debug then + eprintf "(OMD) spaces[] l=(%S)\n%!" (L.string_of_tokens l); + assert(items = []); (* ae... listes mal formes ?! *) + list_items ~p [n+2] ((U,[n+2],rev_to_t new_item)::items) rest + | i::_ -> + if debug then eprintf "(OMD) spaces(%d::_) n=%d l=(%S)\n%!" + i n (L.string_of_tokens l); + if i = n + 2 then + let items = (U,indents,rev_to_t new_item) :: items in + list_items ~p indents items rest + else if i < n + 2 then + let sublist, remains = + list_items ~p ((n+2)::indents) + [(U,(n+2)::indents,rev_to_t new_item)] + rest + in + list_items ~p indents (add sublist items) remains + else (* i > n + 2 *) + make_up p items, l + end + (* new ordered items *) + | Number _::Dot::(Space|Spaces _)::tl -> + begin + match fsplit_rev ~f:(end_of_item 0) tl with + | None -> + make_up p items, l + | Some(new_item, rest) -> + let p = p || has_paragraphs new_item in + assert_well_formed new_item; + match indents with + | [] -> + assert(items = []); + list_items ~p [0] ((O,[0],rev_to_t new_item)::items) rest + | 0::_ -> + list_items ~p indents ((O,indents,rev_to_t new_item)::items) rest + | _::_ -> + make_up p items, l + end + | Space::Number _::Dot::(Space|Spaces _)::tl -> + begin + match fsplit_rev ~f:(end_of_item 1) tl with + | None -> make_up p items, l + | Some(new_item, rest) -> + let p = p || has_paragraphs new_item in + match indents with + | [] -> + assert(items = []); + list_items ~p [1] ((O,[1],rev_to_t new_item)::items) rest + | 1::_ -> + list_items ~p indents ((O,indents,rev_to_t new_item)::items) rest + | i::_ -> + if i > 1 then + make_up p items, l + else (* i < 1 : new sub list*) + let sublist, remains = + list_items ~p (1::indents) + [(O,1::indents,rev_to_t new_item)] rest + in + list_items ~p:p indents (add sublist items) remains + end + | Spaces n::Number _::Dot::(Space|Spaces _)::tl -> + begin + match fsplit_rev ~f:(end_of_item (n+2)) tl with + | None -> + make_up p items, l + | Some(new_item, rest) -> + let p = p || has_paragraphs new_item in + match indents with + | [] -> + if debug then eprintf "(OMD) spaces[] l=(%S)\n%!" + (L.string_of_tokens l); + assert(items = []); (* ae... listes mal formes ?! *) + list_items ~p [n+2] ((O,[n+2],rev_to_t new_item)::items) rest + | i::_ -> + if debug then eprintf "(OMD) spaces(%d::_) n=%d l=(%S)\n%!" + i n (L.string_of_tokens l); + if i = n + 2 then + list_items ~p indents ((O,indents,rev_to_t new_item)::items) + rest + else if i < n + 2 then + let sublist, remains = + list_items ~p + ((n+2)::indents) + [(O,(n+2)::indents,rev_to_t new_item)] + rest + in + list_items ~p:p indents (add sublist items) remains + else (* i > n + 2 *) + make_up p items, l + end + (* *) + | Newlines 0::((Star|Minus|Plus)::(Space|Spaces _)::_ as l) + | Newlines 0::(Number _::Dot::(Space|Spaces _)::_ as l) + | Newlines 0::((Space|Spaces _)::Star::(Space|Spaces _)::_ as l) + | Newlines 0::((Space|Spaces _)::Number _::Dot::(Space|Spaces _)::_ as l) + -> + list_items ~p:true indents items l + | _ -> + if debug then + begin + let rec string_of_items items = + match items with + | [] -> "" + | (O,indent::_,item)::tl -> + sprintf "(O,i=%d,%S)" (indent) (Omd_backend.html_of_md item) + ^ string_of_items tl + | (U,indent::_,item)::tl -> + sprintf "(U,i=%d,%S)" (indent) (Omd_backend.html_of_md item) + ^ string_of_items tl + | _ -> "(weird)" + in + eprintf "(OMD) NALI parse_list: l=(%S) items=%s\n%!" + (L.string_of_tokens l) (string_of_items items) + end; + (* not a list item *) + make_up p items, l + in + match list_items ~p:false [] [] l with + | rp, l -> + rp::r, [Newline], l + + + + let icode ?(default_lang=default_lang) r _p l = + assert_well_formed l; + (* indented code: returns (r,p,l) where r is the result, p is the + last thing read, l is the remains *) + let dummy_tag = Tag("dummy_tag", + object + method to_string = "" + method parser_extension = fun r p l -> None + end) in + let accu = Buffer.create 64 in + let rec loop s tl = match s, tl with + | (Newline|Newlines _ as p), (Space|Spaces(0|1))::_ -> + (* 1, 2 or 3 spaces. *) + (* -> Return what's been found as code because what follows isn't. *) + Code_block(default_lang, Buffer.contents accu) :: r, [p], tl + | (Newline|Newlines _ as p), Spaces(n)::tl -> + assert(n>0); + (* At least 4 spaces, it's still code. *) + Buffer.add_string accu (L.string_of_token p); + loop + (if n >= 4 then Spaces(n-4) else if n = 3 then Space else dummy_tag) + tl + | (Newline|Newlines _ as p), (not_spaces::_ as tl) -> (* stop *) + Code_block(default_lang, Buffer.contents accu) :: r, [p], tl + (* -> Return what's been found as code because it's no more code. *) + | p, e::tl -> + Buffer.add_string accu (L.string_of_token p); + (* html entities are to be converted later! *) + loop e tl + | p, [] -> + Buffer.add_string accu (L.string_of_token p); + Code_block(default_lang, Buffer.contents accu)::r, [p], [] + in + match l with + | Spaces n::tl -> + if n >= 4 then + Some(loop (Spaces(n-4)) tl) + else if n = 3 then + Some(loop Space tl) + else Some(loop dummy_tag tl) + | _ -> assert false + + + (* Returns [(r,p,l)] where [r] is the result, [p] is the last thing + read, and [l] is what remains. *) + let spaces_at_beginning_of_line main_loop default_lang n r previous lexemes = + assert_well_formed lexemes; + assert (n > 0); + if n <= 3 then ( + match lexemes with + | (Star|Minus|Plus) :: (Space|Spaces _) :: _ -> + (* unordered list *) + parse_list main_loop r [] (L.make_space n::lexemes) + | (Number _)::Dot::(Space|Spaces _)::tl -> + (* ordered list *) + parse_list main_loop r [] (L.make_space n::lexemes) + | [] + | (Newline|Newlines _) :: _ -> (* blank line, skip spaces *) + r, previous, lexemes + | _::_ -> + Text (" ")::r, previous, lexemes + ) + else ( (* n>=4, blank line or indented code *) + match lexemes with + | [] | (Newline|Newlines _) :: _ -> r, previous, lexemes + | _ -> + match + icode ~default_lang r [Newline] (L.make_space n :: lexemes) + with + | Some(r,p,l) -> r,p,l + | None -> + if debug then + eprintf "(OMD) Omd_parser.icode or \ + Omd_parser.main_loop is broken\n%!"; + assert false + ) + + let spaces_not_at_beginning_of_line ?(html=false) n r lexemes = + assert_well_formed lexemes; + assert (n > 0); + if n = 1 then + (Text " "::r), [Space], lexemes + else ( + match lexemes with + | Newline :: tl when not html -> + if debug then + eprintf + "(OMD) 2 or more spaces before a newline, eat the newline\n%!"; + Br::r, [Spaces(n-2)], tl + | Newlines k :: tl when not html -> + if debug then + eprintf + "(OMD) 2 or more spaces before a newline, eat 1 newline"; + let newlines = if k = 0 then Newline else Newlines(k-1) in + Br::r, [Spaces(n-2)], newlines :: tl + | _ -> + assert (n>1); + (Text (String.make n ' ')::r), [Spaces(n-2)], lexemes + ) + + + let maybe_autoemail r p l = + assert_well_formed l; + match l with + | Lessthan::tl -> + begin + match + fsplit ~excl:(function (Newline|Newlines _|Space|Spaces _) :: _-> true + | [] -> true + | _ -> false) + ~f:(function At::tl -> Split([],tl) | _ -> Continue) + tl + with + | None -> None + | Some(left, right) -> + match + fsplit + ~excl:(function + | (Newline|Newlines _|Space|Spaces _) :: _-> true + | [] -> true + | _ -> false) + ~f:(function Greaterthan::tl -> Split([],tl) + | Greaterthans 0::tl -> Split([],Greaterthan::tl) + | Greaterthans n::tl -> Split([],Greaterthans(n-1)::tl) + | _ -> Continue) + right + with + | None -> None + | Some(domain, tl) -> + let email = L.string_of_tokens left + ^ "@" ^ L.string_of_tokens domain in + Some(Url("mailto:"^email,[Text email],"")::r,[Greaterthan],tl) + end + | _ -> failwith "Omd_parser.maybe_autoemail: wrong use of the function." + + let is_hex s = + String.length s > 1 + && (s.[0] = 'X' || s.[0] = 'x') + && (let rec loop i = + i = String.length s + || + (match s.[i] with + | '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' -> + loop (succ i) + | _ -> false) + in loop 1) + + let mediatypetextomd : string list ref = ref [] + + let filter_text_omd_rev l = + let rec loop b r = function + | [] -> if b then r else l + | ("media:type", Some "text/omd")::tl -> + loop true r tl + | e::tl -> + loop b (e::r) tl + in + loop false [] l + + exception Orphan_closing of string * l * l + + let rec main_impl_rev ~html (r:r) (previous:p) (lexemes:l) = + (* if debug then eprintf "(OMD) main_impl_rev html=%b\n%!" html; *) + assert_well_formed lexemes; + if debug then + eprintf "(OMD) main_impl_rev html=%b r=%s p=(%s) l=(%s)\n%!" + html + (Omd_backend.sexpr_of_md (List.rev r)) + (L.destring_of_tokens previous) + (L.destring_of_tokens lexemes); + match previous, lexemes with + (* no more to process *) + | _, [] -> + (* return the result (/!\ it has to be reversed as some point) *) + r + + (* Tag: tag system $\cup$ high-priority extension mechanism *) + | _, Tag(_name, e) :: tl -> + begin match e#parser_extension r previous tl with + | Some(r, p, l) -> + main_impl_rev ~html r p l + | None -> + main_impl_rev ~html r previous tl + end + + (* HTML comments *) + | _, (Lessthan as t)::(Exclamation::Minuss 0::c as tl) -> + begin + let f = function + | (Minuss _ as m)::(Greaterthan|Greaterthans _ as g)::tl -> + Split([g;m], tl) + | _ -> + Continue + in + match fsplit ~f:f lexemes with + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> + main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> + main_impl_rev ~html r p l + end + | Some (comments, new_tl) -> + let r = Html_comment(L.string_of_tokens comments) :: r in + main_impl_rev ~html r [Newline] new_tl + end + + (* email-style quoting / blockquote *) + | ([]|[Newline|Newlines _]), Greaterthan::(Space|Spaces _)::_ -> + begin + match + emailstyle_quoting main_loop r previous (Newline::lexemes) + with + | Some(r,p,l) -> main_impl_rev ~html r p l + | None -> + if debug then + eprintf "(OMD) Omd_parser.emailstyle_quoting or \ + Omd_parser.main_loop is broken\n%!"; + assert false + end + + (* email-style quoting, with lines starting with spaces! *) + | ([]|[Newline|Newlines _]), (Space|Spaces(0|1) as s) + :: Greaterthan :: (Space|Spaces _)::_ -> + (* It's 1, 2 or 3 spaces, not more because it wouldn't mean + quoting anymore but code. *) + begin + let new_r, p, rest = + let foo, rest = + match unindent (L.length s) (Newline::lexemes) with + | (Newline|Newlines _)::foo, rest -> foo, rest + | res -> res + in + match + emailstyle_quoting main_loop [] previous (Newline::foo) + with + | Some(new_r, p, []) -> new_r, p, rest + | _ -> + if debug then + eprintf "(OMD) Omd_parser.emailstyle_quoting or \ + Omd_parser.main_loop is broken\n%!"; + assert false + in + main_impl_rev ~html (new_r@r) [Newline] rest + end + + (* minus *) + | ([]|[Newline|Newlines _]), + (Minus|Minuss _ as t) :: ((Space|Spaces _)::_ as tl) -> + (* maybe hr *) + begin match hr_m lexemes with + | None -> (* no hr, so it could be a list *) + begin match t with + | Minus -> (* it's a list *) + let md, new_p, new_l = + parse_list main_loop r [] lexemes + in + main_impl_rev ~html md new_p new_l + | _ -> (* not a list *) + begin match maybe_extension extensions r previous lexemes with + | None -> + main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> + main_impl_rev ~html r p l + end + end + | Some l -> (* hr *) + main_impl_rev ~html (Hr::r) [Newline] l + end + | ([]|[Newline|Newlines _]), (Minus|Minuss _ as t)::tl -> + begin match hr_m lexemes with + | None -> (* no hr, and it's not a list either + because it's not followed by spaces *) + begin match maybe_extension extensions r previous lexemes with + | None -> + main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> + main_impl_rev ~html r p l + end + | Some l -> (* hr *) + main_impl_rev ~html (Hr::r) [Newline] l + end + + (* hashes *) + | ([]|[(Newline|Newlines _)]), + (Hashs n as t) :: ((Space|Spaces _) :: ttl as tl) + | ([]|[(Newline|Newlines _)]), + (Hashs n as t) :: (ttl as tl) -> (* hash titles *) + if n <= 4 then + match read_title main_loop (n+2) r previous ttl with + | Some(r, p, l) -> main_impl_rev ~html r p l + | None -> + if debug then + eprintf "(OMD) Omd_parser.read_title or \ + Omd_parser.main_loop is broken\n%!"; + assert false + else + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + | ([]|[(Newline|Newlines _)]), Hash :: (Space|Spaces _) :: tl + | ([]|[(Newline|Newlines _)]), Hash :: tl -> (* hash titles *) + begin match read_title main_loop 1 r previous tl with + | Some(r, p, l) -> main_impl_rev ~html r p l + | None -> + if debug then + eprintf "(OMD) Omd_parser.read_title or \ + Omd_parser.main_loop is broken\n%!"; + assert false + end + | _, (Hash|Hashs _ as t) :: tl -> (* hash -- no title *) + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + + (* spaces after a newline: could lead to hr *) + | ([]|[Newline|Newlines _]), ((Space|Spaces _) as sp) :: tl -> + begin match hr tl with + | None -> + (* No [Hr], but maybe [Ul], [Ol], code,... *) + let n = L.length sp in + let r, p, l = + spaces_at_beginning_of_line main_loop default_lang n r previous tl in + main_impl_rev ~html r p l + | Some tl -> + main_impl_rev ~html (Hr::r) [Newline] tl + end + + (* spaces anywhere *) + | _, ((Space|Spaces _) as t) :: tl -> + (* too many cases to be handled here *) + let n = L.length t in + let r, p, l = spaces_not_at_beginning_of_line ~html n r tl in + main_impl_rev ~html r p l + + (* underscores *) + | _, (Underscore as t) :: tl -> (* one "orphan" underscore, or emph *) + (match uemph_or_bold 1 tl with + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + | Some(x, new_tl) -> + main_impl_rev ~html (Emph(main_impl ~html [] [t] x) :: r) [t] new_tl + ) + | _, (Underscores((0|1) as n) as t) :: tl -> + (* 2 or 3 "orphan" underscores, or emph/bold *) + (match uemph_or_bold (n+2) tl with + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + | Some(x, new_tl) -> + if n = 0 then (* 1 underscore *) + main_impl_rev ~html (Bold(main_impl ~html [] [t] x) :: r) [t] new_tl + else (* 2 underscores *) + main_impl_rev ~html (Emph([Bold(main_impl ~html [] [t] x)]) :: r) [t] new_tl + ) + + (* enumerated lists *) + | ([]|[Newline|Newlines _]), (Number _) :: Dot :: (Space|Spaces _) :: tl -> + let md, new_p, new_l = + parse_list main_loop r [] lexemes + in + main_impl_rev ~html md new_p new_l + + (* plus *) + | ([]|[(Newline|Newlines _)]), Plus :: (Space|Spaces _) :: _ -> + let md, new_p, new_l = + parse_list main_loop r [] lexemes + in + main_impl_rev ~html md new_p new_l + + (* stars *) + | ([]|[(Newline|Newlines _)]), Star :: (Space|Spaces _) :: _ -> + (* maybe hr or new list *) + begin match hr_s lexemes with + | Some l -> + main_impl_rev ~html (Hr::r) [Newline] l + | None -> + let md, new_p, new_l = + parse_list main_loop r [] lexemes + in + main_impl_rev ~html md new_p new_l + end + | ([]|[(Newline|Newlines _)]), Stars _ :: _ when hr_s lexemes <> None -> + (* hr *) + (match hr_s lexemes with + | Some l -> main_impl_rev ~html (Hr::r) [Newline] l + | None -> assert false + ) + | ([]|[(Newline|Newlines _)]), (Star as t) :: tl -> (* maybe hr *) + begin match hr_s lexemes with + | Some l -> + main_impl_rev ~html (Hr::r) [Newline] l + | None -> + (match semph_or_bold 1 tl with + | Some(x, new_tl) -> + main_impl_rev ~html (Emph(main_impl ~html [] [t] x) :: r) [t] new_tl + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> + main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> + main_impl_rev ~html r p l + end + ) + end + | _, (Star as t) :: tl -> (* one "orphan" star, or emph // can't be hr *) + (match semph_or_bold 1 tl with + | Some(x, new_tl) -> + main_impl_rev ~html (Emph(main_impl ~html [] [t] x) :: r) [t] new_tl + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + ) + | _, (Stars((0|1) as n) as t) :: tl -> + (* 2 or 3 "orphan" stars, or emph/bold *) + (match semph_or_bold (n+2) tl with + | Some(x, new_tl) -> + if n = 0 then + main_impl_rev ~html (Bold(main_impl ~html [] [t] x) :: r) [t] new_tl + else + main_impl_rev ~html (Emph([Bold(main_impl ~html [] [t] x)]) :: r) [t] new_tl + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + ) + + (* backslashes *) + | _, Backslash :: (Newline as t) :: tl -> (* \\n *) + main_impl_rev ~html (Br :: r) [t] tl + | _, Backslash :: Newlines 0 :: tl -> (* \\n\n\n\n... *) + main_impl_rev ~html (Br :: r) [Backslash; Newline] (Newline :: tl) + | _, Backslash :: Newlines n :: tl -> assert (n >= 0); (* \\n\n\n\n... *) + main_impl_rev ~html (Br :: r) [Backslash; Newline] + (Newlines (n-1) :: tl) + | _, Backslash :: (Backquote as t) :: tl -> (* \` *) + main_impl_rev ~html (Text ("`") :: r) [t] tl + | _, Backslash :: Backquotes 0 :: tl -> (* \````... *) + main_impl_rev ~html (Text ("`") :: r) [Backslash; Backquote] (Backquote :: tl) + | _, Backslash :: Backquotes n :: tl -> assert (n >= 0); (* \````... *) + main_impl_rev ~html (Text ("`") :: r) [Backslash; Backquote] + (Backquotes (n-1) :: tl) + | _, Backslash :: (Star as t) :: tl -> (* \* *) + main_impl_rev ~html (Text ("*") :: r) [t] tl + | _, Backslash :: Stars 0 :: tl -> (* \****... *) + main_impl_rev ~html (Text ("*") :: r) [Backslash; Star] (Star :: tl) + | _, Backslash :: Stars n :: tl -> assert (n >= 0); (* \****... *) + main_impl_rev ~html (Text ("*") :: r) [Backslash; Star] (Stars (n-1) :: tl) + | _, Backslash :: (Underscore as t) :: tl -> (* \_ *) + main_impl_rev ~html (Text ("_") :: r) [t] tl + | _, Backslash :: Underscores 0 :: tl -> (* \___... *) + main_impl_rev ~html (Text ("_") :: r) [Backslash; Underscore] (Underscore :: tl) + | _, Backslash :: Underscores n :: tl -> assert (n >= 0); (* \___... *) + main_impl_rev ~html (Text ("_") :: r) [Backslash; Underscore] + (Underscores (n-1) :: tl) + | _, Backslash :: (Obrace as t) :: tl -> (* \{ *) + main_impl_rev ~html (Text ("{") :: r) [t] tl + | _, Backslash :: Obraces 0 :: tl -> (* \{{{... *) + main_impl_rev ~html (Text ("{") :: r) [Backslash; Obrace] (Obrace :: tl) + | _, Backslash :: Obraces n :: tl -> assert (n >= 0); (* \{{{... *) + main_impl_rev ~html (Text ("{") :: r) [Backslash; Obrace] (Obraces (n-1) :: tl) + | _, Backslash :: (Cbrace as t) :: tl -> (* \} *) + main_impl_rev ~html (Text ("}") :: r) [t] tl + | _, Backslash :: Cbraces 0 :: tl -> (* \}}}... *) + main_impl_rev ~html (Text ("}") :: r) [Backslash; Cbrace] (Cbrace :: tl) + | _, Backslash :: Cbraces n :: tl -> assert (n >= 0); (* \}}}... *) + main_impl_rev ~html (Text ("}") :: r) [Backslash; Cbrace] (Cbraces (n-1) :: tl) + | _, Backslash :: (Obracket as t) :: tl -> (* \[ *) + main_impl_rev ~html (Text ("[") :: r) [t] tl + | _, Backslash :: Obrackets 0 :: tl -> (* \[[[... *) + main_impl_rev ~html (Text ("[") :: r) [Backslash; Obracket] (Obracket :: tl) + | _, Backslash :: Obrackets n :: tl -> assert (n >= 0); (* \[[[... *) + main_impl_rev ~html (Text ("[") :: r) [Backslash; Obracket] (Obrackets (n-1) :: tl) + | _, Backslash :: (Cbracket as t) :: tl -> (* \} *) + main_impl_rev ~html (Text ("]") :: r) [t] tl + | _, Backslash :: Cbrackets 0 :: tl -> (* \}}}... *) + main_impl_rev ~html (Text ("]") :: r) [Backslash; Cbracket] (Cbracket :: tl) + | _, Backslash :: Cbrackets n :: tl -> assert (n >= 0); (* \}}}... *) + main_impl_rev ~html (Text ("]") :: r) [Backslash; Cbracket] (Cbrackets (n-1) :: tl) + | _, Backslash :: (Oparenthesis as t) :: tl -> (* \( *) + main_impl_rev ~html (Text ("(") :: r) [t] tl + | _, Backslash :: Oparenthesiss 0 :: tl -> (* \(((... *) + main_impl_rev ~html (Text ("(") :: r) [Backslash; Oparenthesis] (Oparenthesis :: tl) + | _, Backslash :: Oparenthesiss n :: tl -> assert (n >= 0); (* \(((... *) + main_impl_rev ~html (Text ("(") :: r) [Backslash; Oparenthesis] + (Oparenthesiss (n-1) :: tl) + | _, Backslash :: (Cparenthesis as t) :: tl -> (* \) *) + main_impl_rev ~html (Text (")") :: r) [t] tl + | _, Backslash :: Cparenthesiss 0 :: tl -> (* \)))... *) + main_impl_rev ~html (Text (")") :: r) [Backslash; Cparenthesis] + (Cparenthesis :: tl) + | _, Backslash :: Cparenthesiss n :: tl -> assert (n >= 0); (* \)))... *) + main_impl_rev ~html (Text (")") :: r) [Backslash; Cparenthesis] + (Cparenthesiss (n-1) :: tl) + | _, Backslash :: (Plus as t) :: tl -> (* \+ *) + main_impl_rev ~html (Text ("+") :: r) [t] tl + | _, Backslash :: Pluss 0 :: tl -> (* \+++... *) + main_impl_rev ~html (Text ("+") :: r) [Backslash; Plus] (Plus :: tl) + | _, Backslash :: Pluss n :: tl -> assert (n >= 0); (* \+++... *) + main_impl_rev ~html (Text ("+") :: r) [Backslash; Plus] (Pluss (n-1) :: tl) + | _, Backslash :: (Minus as t) :: tl -> (* \- *) + main_impl_rev ~html (Text ("-") :: r) [t] tl + | _, Backslash :: Minuss 0 :: tl -> (* \---... *) + main_impl_rev ~html (Text ("-") :: r) [Backslash; Minus] (Minus :: tl) + | _, Backslash :: Minuss n :: tl -> assert (n >= 0); (* \---... *) + main_impl_rev ~html (Text ("-") :: r) [Backslash; Minus] (Minuss (n-1) :: tl) + | _, Backslash :: (Dot as t) :: tl -> (* \. *) + main_impl_rev ~html (Text (".") :: r) [t] tl + | _, Backslash :: Dots 0 :: tl -> (* \....... *) + main_impl_rev ~html (Text (".") :: r) [Backslash; Dot] (Dot :: tl) + | _, Backslash :: Dots n :: tl -> assert (n >= 0); (* \....... *) + main_impl_rev ~html (Text (".") :: r) [Backslash; Dot] (Dots (n-1) :: tl) + | _, Backslash :: (Exclamation as t) :: tl -> (* \! *) + main_impl_rev ~html (Text ("!") :: r) [t] tl + | _, Backslash :: Exclamations 0 :: tl -> (* \!!!... *) + main_impl_rev ~html (Text ("!") :: r) [Backslash; Exclamation] (Exclamation :: tl) + | _, Backslash :: Exclamations n :: tl -> assert (n >= 0); (* \!!!... *) + main_impl_rev ~html (Text ("!") :: r) [Backslash; Exclamation] + (Exclamations (n-1) :: tl) + | _, Backslash :: (Hash as t) :: tl -> (* \# *) + main_impl_rev ~html (Text ("#") :: r) [t] tl + | _, Backslash :: Hashs 0 :: tl -> (* \###... *) + main_impl_rev ~html (Text ("#") :: r) [Backslash; Hash] (Hash :: tl) + | _, Backslash :: Hashs n :: tl -> assert (n >= 0); (* \###... *) + main_impl_rev ~html (Text ("#") :: r) [Backslash; Hash] (Hashs (n-1) :: tl) + | _, Backslash :: (Greaterthan as t) :: tl -> (* \> *) + main_impl_rev ~html (Text (">") :: r) [t] tl + | _, Backslash :: Greaterthans 0 :: tl -> (* \>>>... *) + main_impl_rev ~html (Text (">") :: r) [Backslash; Greaterthan] (Greaterthan :: tl) + | _, Backslash :: Greaterthans n :: tl -> assert (n >= 0); (* \>>>... *) + main_impl_rev ~html (Text (">") :: r) [Backslash; Greaterthan] + (Greaterthans (n-1) :: tl) + | _, Backslash :: (Lessthan as t) :: tl -> (* \< *) + main_impl_rev ~html (Text ("<") :: r) [t] tl + | _, Backslash :: Lessthans 0 :: tl -> (* \<<<... *) + main_impl_rev ~html (Text ("<") :: r) [Backslash; Lessthan] (Lessthan :: tl) + | _, Backslash :: Lessthans n :: tl -> assert (n >= 0); (* \<<<... *) + main_impl_rev ~html (Text ("<") :: r) [Backslash; Lessthan] + (Lessthans (n-1) :: tl) + | _, (Backslashs 0 as t) :: tl -> (* \\\\... *) + main_impl_rev ~html (Text ("\\") :: r) [t] tl + | _, (Backslashs n as t) :: tl -> (* \\\\... *) + if n mod 2 = 0 then + main_impl_rev ~html (Text(String.make ((n+2)/2) '\\') :: r) [t] tl + else + main_impl_rev ~html (Text(String.make ((n+2)/2) '\\') :: r) [t] (Backslash :: tl) + | _, Backslash::[] -> + main_impl_rev ~html (Text "\\" :: r) [] [] + | _, Backslash::tl -> + main_impl_rev ~html (Text "\\" :: r) [Backslash] tl + + (* < *) + | _, (Lessthan|Lessthans _ as t) + :: (Word("http"|"https"|"ftp"|"ftps"|"ssh"|"afp"|"imap") as w) + :: Colon::Slashs(n)::tl -> + (* "semi-automatic" URLs *) + let rec read_url accu = function + | (Newline|Newlines _)::tl -> + None + | Greaterthan::tl -> + let url = + (L.string_of_token w) ^ "://" + ^ (if n = 0 then "" else String.make (n-1) '/') + ^ L.string_of_tokens (List.rev accu) + in Some(url, tl) + | x::tl -> + read_url (x::accu) tl + | [] -> + None + in + begin match read_url [] tl with + | Some(url, new_tl) -> + let r = + match t with + | Lessthans 0 -> Text "<" :: r + | Lessthans n -> Text(String.make (n+1) '<') :: r + | _ -> r + in + main_impl_rev ~html (Url(url,[Text url],"")::r) [] new_tl + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> + main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> + main_impl_rev ~html r p l + end + end + + + (* Word(w) *) + | _, Word w::tl -> + main_impl_rev ~html (Text w :: r) [Word w] tl + + (* newline at the end *) + | _, [Newline] -> + NL::r + + (* named html entity *) + | _, Ampersand::((Word w::((Semicolon|Semicolons _) as s)::tl) as tl2) -> + if StringSet.mem w htmlcodes_set then + begin match s with + | Semicolon -> + main_impl_rev ~html (Raw("&"^w^";")::r) [s] tl + | Semicolons 0 -> + main_impl_rev ~html (Raw("&"^w^";")::r) [s] (Semicolon::tl) + | Semicolons n -> + main_impl_rev ~html (Raw("&"^w^";")::r) [s] (Semicolons(n-1)::tl) + | _ -> assert false + end + else + main_impl_rev ~html (Raw("&")::r) [] tl2 + + (* digit-coded html entity *) + | _, Ampersand::((Hash::Number w::((Semicolon|Semicolons _) as s)::tl) + as tl2) -> + if String.length w <= 4 then + begin match s with + | Semicolon -> + main_impl_rev ~html (Raw("&#"^w^";")::r) [s] tl + | Semicolons 0 -> + main_impl_rev ~html (Raw("&#"^w^";")::r) [s] (Semicolon::tl) + | Semicolons n -> + main_impl_rev ~html (Raw("&#"^w^";")::r) [s] (Semicolons(n-1)::tl) + | _ -> assert false + end + else + main_impl_rev ~html (Raw("&")::r) [] tl2 + + (* maybe hex digit-coded html entity *) + | _, Ampersand::((Hash::Word w::((Semicolon|Semicolons _) as s)::tl) + as tl2) when is_hex w -> + if String.length w <= 4 then + begin match s with + | Semicolon -> + main_impl_rev ~html (Raw("&#"^w^";")::r) [s] tl + | Semicolons 0 -> + main_impl_rev ~html (Raw("&#"^w^";")::r) [s] (Semicolon::tl) + | Semicolons n -> + main_impl_rev ~html (Raw("&#"^w^";")::r) [s] (Semicolons(n-1)::tl) + | _ -> assert false + end + else + main_impl_rev ~html (Raw("&")::r) [] tl2 + + + (* Ampersand *) + | _, Ampersand::tl -> + main_impl_rev ~html (Raw("&")::r) [Ampersand] tl + + (* 2 Ampersands *) + | _, Ampersands(0)::tl -> + main_impl_rev ~html (Raw("&")::r) [] (Ampersand::tl) + + (* Several Ampersands (more than 2) *) + | _, Ampersands(n)::tl -> + main_impl_rev ~html (Raw("&")::r) [] (Ampersands(n-1)::tl) + + (* backquotes *) + | _, (Backquote|Backquotes _ as t)::tl -> + begin match bcode ~default_lang r previous lexemes with + | Some(r, p, l) -> main_impl_rev ~html r p l + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> + main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> + main_impl_rev ~html r p l + end + end + + (* HTML *) + (*
and
with or without space(s) *) + | _, (Lessthan::Word("br"|"hr" as w)::Slash + ::(Greaterthan|Greaterthans _ as g)::tl) + | _, (Lessthan::Word("br"|"hr" as w)::(Space|Spaces _)::Slash + ::(Greaterthan|Greaterthans _ as g)::tl) -> + begin match g with + | Greaterthans 0 -> + main_impl_rev ~html (Raw("<"^w^" />")::r) [Greaterthan] (Greaterthan::tl) + | Greaterthans n -> + main_impl_rev ~html (Raw("<"^w^" />")::r) [Greaterthan] + (Greaterthans(n-1)::tl) + | _ -> + main_impl_rev ~html (Raw("<"^w^" />")::r) [Greaterthan] tl + end + + (* awaited orphan html closing tag *) + | _, Lessthan::Slash::Word(w)::(Greaterthan|Greaterthans _ as g)::tl + when !mediatypetextomd <> [] -> + raise (Orphan_closing(w, + lexemes, + (match g with + | Greaterthans 0 -> Greaterthan::tl + | Greaterthans n -> Greaterthans(n-1)::tl + | _ -> tl))) + + (* block html *) + | ([] | [Newline|Newlines _|Tag("HTMLBLOCK", _)]), + (Lessthan as t) + ::((Word(tagnametop) as w) + ::((Space|Spaces _|Greaterthan|Greaterthans _) + ::_ as html_stuff) as tlx) -> + if StringSet.mem tagnametop inline_htmltags_set then + main_impl_rev ~html r [Word ""] lexemes + else if not (blind_html || StringSet.mem tagnametop htmltags_set) then + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tlx + | Some(r, p, l) -> main_impl_rev ~html r p l + end + else + let read_html() = + let module T = struct + type t = + | Awaiting of string + | Open of string + type interm = + | HTML of string * (string * string option) list * interm list + | FTOKENS of L.t + | RTOKENS of L.t + | MD of Omd_representation.t + let rec md_of_interm_list html l = + let md_of_interm_list ?(html=html) l = + md_of_interm_list html l + in + match l with + | [] -> [] + | HTML(t, a, c)::tl -> + ( + let f_a = filter_text_omd_rev a in + if f_a != a then + Html_block + (t, + f_a, + make_paragraphs + (md_of_interm_list ~html:false (List.rev c))) + :: md_of_interm_list tl + else + Html_block + (t, f_a, md_of_interm_list ~html:true (List.rev c)) + :: md_of_interm_list tl + ) + | MD md::tl -> + md@md_of_interm_list tl + | RTOKENS t1::FTOKENS t2::tl -> + md_of_interm_list (FTOKENS(List.rev_append t1 t2)::tl) + | RTOKENS t1::RTOKENS t2::tl -> + md_of_interm_list + (FTOKENS(List.rev_append t1 (List.rev t2))::tl) + | FTOKENS t1::FTOKENS t2::tl -> + md_of_interm_list (FTOKENS(t1@t2)::tl) + | FTOKENS t :: tl -> + if html then + Raw(L.string_of_tokens t) :: md_of_interm_list tl + else + main_loop ~html [] [Word ""] t + @ md_of_interm_list tl + | RTOKENS t :: tl -> + md_of_interm_list (FTOKENS(List.rev t) :: tl) + let md_of_interm_list l = md_of_interm_list true l + let string_of_tagstatus tagstatus = + let b = Buffer.create 64 in + List.iter (function + | Open t -> bprintf b "{B/Open %s}" t + | Awaiting t -> bprintf b "{B/Awaiting %s}" t + ) tagstatus; + Buffer.contents b + end in + let add_token_to_body x body = + match body with + | T.RTOKENS r :: body -> T.RTOKENS(x::r)::body + | _ -> T.RTOKENS[x] :: body + in + let rec loop (body:T.interm list) attrs tagstatus tokens = + if debug then + eprintf "(OMD) 3333 BHTML loop body=%S tagstatus=%S %S\n%!" + (Omd_backend.sexpr_of_md(T.md_of_interm_list body)) + (T.string_of_tagstatus tagstatus) + (L.destring_of_tokens tokens); + match tokens with + | [] -> + begin + match tagstatus with + | [] -> Some(body, tokens) + | T.Open t :: _ when StringSet.mem t html_void_elements -> + Some(body, tokens) + | _ -> + if debug then + eprintf "(OMD) 3401 BHTML Not enough to read\n%!"; + None + end + | Lessthans n::tokens -> + begin match tagstatus with + | T.Awaiting _ :: _ -> None + | _ -> + if debug then eprintf "(OMD) 3408 BHTML loop\n%!"; + loop + (add_token_to_body + (if n = 0 then Lessthan else Lessthans(n-1)) + body) + attrs tagstatus (Lessthan::tokens) + end + (* self-closing tags *) + | Slash::Greaterthan::tokens -> + begin match tagstatus with + | T.Awaiting(tagname) :: tagstatus + when StringSet.mem tagname html_void_elements -> + loop [T.HTML(tagname, attrs, [])] [] tagstatus tokens + | _ -> + if debug then eprintf "(OMD) 3419 BHTML loop\n%!"; + loop + (add_token_to_body + Slash + (add_token_to_body + Greaterthan + body)) + attrs tagstatus tokens + end + (* closing the tag opener *) + | Lessthan::Slash::(Word(tagname) as w) + ::(Greaterthan|Greaterthans _ as g)::tokens -> + begin match tagstatus with + | T.Open t :: _ when t = tagname -> + if debug then + eprintf "(OMD) 3375 BHTML properly closing %S\n%!" t; + Some(body, + (match g with + | Greaterthans 0 -> Greaterthan :: tokens + | Greaterthans n -> Greaterthans(n-1) :: tokens + | _ -> tokens)) + | T.Open t :: _ -> + if debug then + eprintf "(OMD) 3379 BHTML wrongly closing %S with %S 1\n%!" + t tagname; + loop (T.RTOKENS[g;w;Slash;Lessthan]::body) + [] tagstatus tokens + | T.Awaiting t :: _ -> + if debug then + eprintf "(OMD) 3383 BHTML wrongly closing %S with %S 2\n%!" + t tagname; + if !mediatypetextomd <> [] then + raise + (Orphan_closing(t, + lexemes, + (match g with + | Greaterthans 0 -> + Greaterthan::tokens + | Greaterthans n -> + Greaterthans(n-1)::tokens + | _ -> tokens))) + else + None + | [] -> + if debug then + eprintf "(OMD) BHTML wrongly closing %S 3\n%!" tagname; + None + end + (* tag *) + | Lessthan::(Word(tagname) as word)::tokens + when + blind_html + || StringSet.mem tagname htmltags_set + -> + if debug then + eprintf "(OMD) 3489 BHTML tagname && StringSet.mem t html_void_elements -> + None + | T.Awaiting _ :: _ -> None + | _ -> + if attrs <> [] then + begin + if debug then + eprintf "(OMD) 3496 BHTML tag %S but attrs <> []\n%!" + tagname; + None + end + else + begin + if debug then + eprintf "(OMD) 3421 BHTML tag %S, tagstatus=%S, \ + attrs=[], tokens=%S\n%!" + tagname (T.string_of_tagstatus tagstatus) + (L.destring_of_tokens tokens); + match + loop [] [] (T.Awaiting tagname::tagstatus) tokens + with + | None -> + if debug then eprintf "(OMD) 3489 BHTML loop\n%!"; + loop + (add_token_to_body + word + (add_token_to_body + Lessthan + body)) + attrs tagstatus tokens + | Some(b, tokens) -> + if debug then begin + eprintf "(OMD) 3433 BHTML tagstatus=%S tokens=%S\n%!" + (T.string_of_tagstatus tagstatus) + (L.string_of_tokens tokens) + end; + Some(b@body, tokens) + end + end + (* end of opening tag *) + | Greaterthan::tokens -> + begin match tagstatus with + | T.Awaiting t :: tagstatus -> + if List.mem ("media:type", Some "text/omd") attrs then + ( + mediatypetextomd := t :: !mediatypetextomd; + try + ignore(main_impl_rev ~html [] [] tokens); + if debug then + eprintf "(OMD) 3524 BHTML closing tag not found \ + in %S\n%!" (L.destring_of_tokens tokens); + warn + (sprintf + "Closing tag `%s' not found for text/omd zone." + t); + mediatypetextomd := List.tl !mediatypetextomd; + None + with Orphan_closing(tagname, delimiter, after) -> + let before = + let rec f r = function + | Lessthans n as e :: tl -> + begin match delimiter with + | Lessthan::_ -> + if Lessthan::tl = delimiter then + List.rev + (if n = 0 then + Lessthan::r + else + Lessthans(n-1)::r) + else + f (e::r) tl + | _ -> + if tl == delimiter || tl = delimiter then + List.rev r + else + f (e::r) tl + end + | e::tl as l -> + if l == delimiter || l = delimiter then + List.rev r + else if tl == delimiter || tl = delimiter then + List.rev (e::r) + else + f (e::r) tl + | [] -> List.rev r + in + f [] tokens + in + if debug then + eprintf "(OMD) 3552 BHTML tokens=%s delimiter=%s \ + after=%s before=%s (tagname=t)=%b\n%!" + (L.destring_of_tokens tokens) + (L.destring_of_tokens delimiter) + (L.destring_of_tokens after) + (L.destring_of_tokens before) + (tagname = t); + (match !mediatypetextomd with + | _ :: tl -> mediatypetextomd := tl + | [] -> assert false); + if tagname = t then + loop + [T.HTML + (t, + attrs, + [T.MD + (main_impl ~html [] [] + (tag_setext main_loop before))])] + [] + tagstatus + after + else + None + ) + else + begin + if debug then eprintf "(OMD) 3571 BHTML loop\n%!"; + match loop body [] (T.Open t::tagstatus) tokens with + | None -> + if debug then + eprintf "(OMD) 3519 BHTML \ + Couldn't find an closing tag for %S\n%!" + t; + None + | Some(body, l) -> + if debug then + eprintf "(OMD) 3498 BHTML Found a closing tag %s\n%!" t; + match tagstatus with + | _ :: _ -> + loop [T.HTML(t, attrs, body)] [] tagstatus l + | [] -> + Some([T.HTML(t, attrs, body)], l) + end + | T.Open t :: _ -> + if debug then + eprintf + "(OMD) 3591 BHTML Some `>` isn't for an opening tag\n%!"; + loop (add_token_to_body Greaterthan body) + attrs tagstatus tokens + | [] -> + if debug then + eprintf "(OMD) 3542 BHTML tagstatus=[]\n%!"; + None + end + + (* maybe attribute *) + | (Colon|Colons _|Underscore|Underscores _|Word _ as t)::tokens + | (Space|Spaces _) + ::(Colon|Colons _|Underscore|Underscores _|Word _ as t) + ::tokens + when (match tagstatus with + | T.Awaiting _ :: _ -> true + | _ -> false) -> + begin + let module Attribute_value = struct + type t = Empty of name | Named of name | Void + and name = string + end in + let open Attribute_value in + let rec extract_attribute accu = function + | (Space | Spaces _ | Newline) :: tokens-> + Empty(L.string_of_tokens(List.rev accu)), tokens + | (Greaterthan|Greaterthans _) :: _ as tokens-> + Empty(L.string_of_tokens(List.rev accu)), tokens + | Equal :: tokens -> + Named(L.string_of_tokens(List.rev accu)), tokens + | Colon | Colons _ | Underscore | Underscores _ | Word _ + | Number _ | Minus | Minuss _ | Dot | Dots _ as t :: tokens -> + extract_attribute (t::accu) tokens + | tokens -> Void, tokens + in + match extract_attribute [t] tokens with + | Empty attributename, tokens -> + (* attribute with no explicit value *) + if debug then eprintf "(OMD) 3628 BHTML loop\n%!"; + loop body ((attributename, None)::attrs) tagstatus tokens + | Named attributename, tokens -> + begin match tokens with + | Quotes 0 :: tokens -> + if debug then + eprintf "(OMD) 3661 BHTML empty attribute 1 %S\n%!" + (L.string_of_tokens tokens); + loop body ((attributename, Some "")::attrs) + tagstatus tokens + | Quote :: tokens -> + begin + if debug then + eprintf "(OMD) 3668 BHTML non empty attribute 1 %S\n%!" + (L.string_of_tokens tokens); + match + fsplit + ~excl:(function + | Quotes _ :: _ -> true + | _ -> false) + ~f:(function + | Quote::tl -> Split([], tl) + | _ -> Continue) + tokens + with + | None -> None + | Some(at_val, tokens) -> + if debug then eprintf "(OMD) 3654 BHTML loop\n%!"; + loop body ((attributename, + Some(L.string_of_tokens at_val)) + ::attrs) tagstatus tokens + end + | Doublequotes 0 :: tokens -> + begin + if debug then + eprintf "(OMD) 3690 BHTML empty attribute 2 %S\n%!" + (L.string_of_tokens tokens); + loop body ((attributename, Some "")::attrs) + tagstatus tokens + end + | Doublequote :: tokens -> + begin + if debug then + eprintf "(OMD) 3698 BHTML non empty attribute 2 %S\n%!" + (L.string_of_tokens tokens); + match fsplit + ~excl:(function + | Doublequotes _ :: _ -> true + | _ -> false) + ~f:(function + | Doublequote::tl -> Split([], tl) + | _ -> Continue) + tokens + with + | None -> None + | Some(at_val, tokens) -> + if debug then + eprintf "(OMD) 3622 BHTML %s=%S %s\n%!" + attributename + (L.string_of_tokens at_val) + (L.destring_of_tokens tokens); + loop body ((attributename, + Some(L.string_of_tokens at_val)) + ::attrs) tagstatus tokens + end + | _ -> None + end + | Void, _ -> None + end + + | x::tokens as dgts + when (match tagstatus with T.Open _ :: _ -> true | _ -> false) -> + begin + if debug then + eprintf "(OMD) 3620 BHTML general %S\n%!" + (L.string_of_tokens dgts); + loop (add_token_to_body x body) attrs tagstatus tokens + end + | (Newline | Space | Spaces _) :: tokens + when + (match tagstatus with T.Awaiting _ :: _ -> true | _ -> false) -> + begin + if debug then eprintf "(OMD) 3737 BHTML spaces\n%!"; + loop body attrs tagstatus tokens + end + | (Newlines _ as x) :: tokens + when + (match tagstatus with T.Awaiting _ :: _ -> true | _ -> false) -> + begin + if debug then eprintf "(OMD) 3827 BHTML newlines\n%!"; + warn "there are empty lines in what may be an HTML block"; + loop (add_token_to_body x body) attrs tagstatus tokens + end + | _ -> + if debug then + eprintf "(OMD) 3742 BHTML fallback with \ + tokens=%s and tagstatus=%s\n%!" + (L.destring_of_tokens tokens) + (match tagstatus with + | [] -> "None" + | T.Awaiting _ :: _ -> "Awaiting" + | T.Open _ :: _ -> "Open (can't be)"); + (match tagstatus with + | [] -> Some(body, tokens) + | T.Awaiting tag :: _ -> + warn (sprintf "expected to read an open HTML tag (%s), \ + but found nothing" tag); + None + | T.Open tag :: _ -> + warn (sprintf "expected to find the closing HTML tag for %s, \ + but found nothing" tag); + None) + in + if debug then eprintf "(OMD) 3408 BHTML loop\n%!"; + match loop [] [] [] lexemes with + | Some(h, rest) -> + Some(T.md_of_interm_list h, rest) + | None -> None + in + begin match read_html() with + | Some(h, rest) -> + main_impl_rev ~html (h@r) [Tag("HTMLBLOCK", empty_extension)] rest + | None -> + let text = L.string_of_token t in + main_impl_rev ~html (Text(text ^ tagnametop)::r) [w] html_stuff + end + (* / end of block HTML. *) + + + (* inline HTML *) + | _, + (Lessthan as t) + ::((Word(tagnametop) as w) + ::((Space|Spaces _|Greaterthan|Greaterthans _) + ::_ as html_stuff) as tlx) -> + if (strict_html && not(StringSet.mem tagnametop inline_htmltags_set)) + || not(blind_html || StringSet.mem tagnametop htmltags_set) + then + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tlx + | Some(r, p, l) -> main_impl_rev ~html r p l + end + else + let read_html() = + let module T = struct + type t = + | Awaiting of string + | Open of string + type interm = + | HTML of string * (string * string option) list * interm list + | TOKENS of L.t + | MD of Omd_representation.t + let rec md_of_interm_list = function + | [] -> [] + | HTML(t, a, c)::tl -> + Html(t, a, md_of_interm_list(List.rev c))::md_of_interm_list tl + | MD md::tl -> md @ md_of_interm_list tl + | TOKENS t1::TOKENS t2::tl -> + md_of_interm_list (TOKENS(t1@t2)::tl) + | TOKENS t :: tl -> + main_impl ~html [] [Word ""] (t) + @ md_of_interm_list tl + let string_of_tagstatus tagstatus = + let b = Buffer.create 64 in + List.iter (function + | Open t -> bprintf b "{I/Open %s}" t + | Awaiting t -> bprintf b "{I/Awaiting %s}" t + ) tagstatus; + Buffer.contents b + end in + let add_token_to_body x body = + T.TOKENS[x]::body + in + let rec loop (body:T.interm list) attrs tagstatus tokens = + if debug then + eprintf "(OMD) 3718 loop tagstatus=(%s) %s\n%!" + (* eprintf "(OMD) 3718 loop tagstatus=(%s) body=(%s) %s\n%!" *) + (T.string_of_tagstatus tagstatus) + (* (Omd_backend.sexpr_of_md(T.md_of_interm_list body)) *) + (L.destring_of_tokens tokens); + match tokens with + | [] -> + begin + match tagstatus with + | [] -> Some(body, tokens) + | T.Open(t)::_ when StringSet.mem t html_void_elements -> + Some(body, tokens) + | _ -> + if debug then + eprintf "(OMD) Not enough to read for inline HTML\n%!"; + None + end + | Lessthans n::tokens -> + begin match tagstatus with + | T.Awaiting _ :: _ -> None + | _ -> + loop + (add_token_to_body + (if n = 0 then Lessthan else Lessthans(n-1)) + body) + attrs tagstatus (Lessthan::tokens) + end + (* self-closing tags *) + | Slash::Greaterthan::tokens -> + begin match tagstatus with + | T.Awaiting(tagname)::tagstatus + when StringSet.mem tagname html_void_elements -> + loop [T.HTML(tagname, attrs, [])] [] tagstatus tokens + | _ -> + loop (T.TOKENS[Greaterthan;Slash]::body) + attrs tagstatus tokens + end + (* multiple newlines are not to be seen in inline HTML *) + | Newlines _ :: _ -> + if debug then eprintf "(OMD) Multiple lines in inline HTML\n%!"; + (match tagstatus with + | [] -> Some(body, tokens) + | _ -> warn "multiple newlines in inline HTML"; None) + (* maybe code *) + | (Backquote | Backquotes _ as b)::tl -> + begin match tagstatus with + | T.Awaiting _ :: _ -> + if debug then + eprintf "(OMD) maybe code in inline HTML: no code\n%!"; + None + | [] -> + if debug then + eprintf "(OMD) maybe code in inline HTML: none\n%!"; + None + | T.Open _ :: _ -> + if debug then + eprintf "(OMD) maybe code in inline HTML: let's try\n%!"; + begin match bcode [] [Space] tokens with + | Some (((Code _::_) as c), p, l) -> + if debug then + eprintf "(OMD) maybe code in inline HTML: \ + confirmed\n%!"; + loop (T.MD c::body) [] tagstatus l + | _ -> + if debug then + eprintf "(OMD) maybe code in inline HTML: failed\n%!"; + loop (T.TOKENS[b]::body) [] tagstatus tl + end + end + (* closing the tag *) + | Lessthan::Slash::(Word(tagname) as w) + ::(Greaterthan|Greaterthans _ as g)::tokens -> + begin match tagstatus with + | T.Open t :: _ when t = tagname -> + if debug then + eprintf "(OMD) 4136 properly closing %S tokens=%s\n%!" + t (L.string_of_tokens tokens); + Some(body, + (match g with + | Greaterthans 0 -> Greaterthan :: tokens + | Greaterthans n -> Greaterthans(n-1) :: tokens + | _ -> tokens)) + | T.Open t :: _ -> + if debug then + eprintf "(OMD) 4144 \ + wrongly closing %S with %S 1\n%!" t tagname; + loop (T.TOKENS[g;w;Slash;Lessthan]::body) [] tagstatus tokens + | T.Awaiting t :: _ -> + if debug then + eprintf "(OMD) 4149 \ + wrongly closing %S with %S 2\n%!" t tagname; + None + | [] -> + if debug then + eprintf "(OMD) 4154 \ + wrongly closing nothing with %S 3\n%!" + tagname; + None + end + (* tag *) + | Lessthan::(Word(tagname) as word)::tokens + when + blind_html + || (strict_html && StringSet.mem tagname inline_htmltags_set) + || (not strict_html && StringSet.mem tagname htmltags_set) + -> + if debug then eprintf "(OMD) <%s...\n%!" tagname; + begin match tagstatus with + | T.Open(t) :: _ + when t <> tagname && StringSet.mem t html_void_elements -> + None + | T.Awaiting _ :: _ -> None + | _ -> + begin + if debug then + eprintf "(OMD) 3796 tag %s, attrs=[]\n%!" tagname; + match loop [] [] (T.Awaiting tagname::tagstatus) tokens + with + | None -> + loop (T.TOKENS[word;Lessthan]::body) + attrs tagstatus tokens + | Some(b,tokens) -> + Some(b@body, tokens) + end + end + (* end of opening tag *) + | Greaterthan::tokens -> + if debug then + eprintf "(OMD) 4185 end of opening tag tokens=%s \ + tagstatus=%s\n%!" + (L.string_of_tokens tokens) + (T.string_of_tagstatus tagstatus); + begin match tagstatus with + | T.Awaiting t :: tagstatus as ts -> + begin match loop body [] (T.Open t::tagstatus) tokens with + | None -> + if debug then + eprintf "(OMD) 4186 \ + Couldn't find an closing tag for %S\n%!" + t; + None + | Some(b, tokens) -> + if debug then + eprintf + "(OMD) 4192 Found a closing tag %s ts=%s \ + tokens=%s\n%!" + t + (T.string_of_tagstatus ts) + (L.string_of_tokens tokens); + match tagstatus with + | [] -> + Some(T.HTML(t, attrs, b)::body, tokens) + | _ -> + (* Note: we don't care about the value of + [attrs] here because in we have a + [tagstatus] matches [T.Open _ :: _] and + there's a corresponding filter that will + take care of attrs that will take care of + it. *) + loop (T.HTML(t, attrs, b)::body) [] tagstatus tokens + end + | T.Open t :: _ -> + if debug then + eprintf + "(OMD) Turns out an `>` isn't for an opening tag\n%!"; + loop (T.TOKENS[Greaterthan]::body) attrs tagstatus tokens + | [] -> + if debug then + eprintf "(OMD) 4202 tagstatus=[]\n%!"; + None + end + + (* maybe attribute *) + | (Colon|Colons _|Underscore|Underscores _|Word _ as t)::tokens + | (Space|Spaces _) + ::(Colon|Colons _|Underscore|Underscores _|Word _ as t) + ::tokens + when (match tagstatus with + | T.Awaiting _ :: _ -> true + | _ -> false) -> + begin + let module Attribute_value = struct + type t = Empty of name | Named of name | Void + and name = string + end in + let open Attribute_value in + let rec extract_attribute accu = function + | (Space | Spaces _ | Newline) :: tokens-> + Empty(L.string_of_tokens(List.rev accu)), tokens + | (Greaterthan|Greaterthans _) :: _ as tokens-> + Empty(L.string_of_tokens(List.rev accu)), tokens + | Equal :: tokens -> + Named(L.string_of_tokens(List.rev accu)), tokens + | Colon | Colons _ | Underscore | Underscores _ | Word _ + | Number _ | Minus | Minuss _ | Dot | Dots _ as t :: tokens -> + extract_attribute (t::accu) tokens + | tokens -> Void, tokens + in + match extract_attribute [t] tokens with + | Empty attributename, tokens -> + (* attribute with no explicit value *) + loop body ((attributename, None)::attrs) tagstatus tokens + | Named attributename, tokens -> + begin match tokens with + | Quotes 0 :: tokens -> + if debug then + eprintf "(OMD) (IHTML) empty attribute 1 %S\n%!" + (L.string_of_tokens tokens); + loop body ((attributename, Some "")::attrs) tagstatus tokens + | Quote :: tokens -> + begin + if debug then + eprintf "(OMD) (IHTML) non empty attribute 1 %S\n%!" + (L.string_of_tokens tokens); + match + fsplit + ~excl:(function + | Quotes _ :: _ -> true + | _ -> false) + ~f:(function + | Quote::tl -> Split([], tl) + | _ -> Continue) + tokens + with + | None -> None + | Some(at_val, tokens) -> + loop body ((attributename, + Some(L.string_of_tokens at_val)) + ::attrs) tagstatus tokens + end + | Doublequotes 0 :: tokens -> + begin + if debug then + eprintf "(OMD) (IHTML) empty attribute 2 %S\n%!" + (L.string_of_tokens tokens); + loop body ((attributename, Some "")::attrs) tagstatus tokens + end + | Doublequote :: tokens -> + begin + if debug then + eprintf "(OMD) (IHTML) non empty attribute 2 %S\n%!" + (L.string_of_tokens tokens); + match fsplit + ~excl:(function + | Doublequotes _ :: _ -> true + | _ -> false) + ~f:(function + | Doublequote::tl -> Split([], tl) + | _ -> Continue) + tokens + with + | None -> None + | Some(at_val, tokens) -> + if debug then + eprintf "(OMD) (3957) %s=%S %s\n%!" attributename + (L.string_of_tokens at_val) + (L.destring_of_tokens tokens); + loop body ((attributename, + Some(L.string_of_tokens at_val)) + ::attrs) tagstatus tokens + end + | _ -> None + end + | Void, _ -> None + end + + | Backslash::x::tokens + when (match tagstatus with T.Open _ :: _ -> true | _ -> false) -> + loop (T.TOKENS[Backslash;x]::body) attrs tagstatus tokens + | Backslashs(n)::x::tokens + when (match tagstatus with T.Open _ :: _ -> true | _ -> false) + && n mod 2 = 1 -> + loop (T.TOKENS[Backslashs(n);x]::body) attrs tagstatus tokens + + | x::tokens + when (match tagstatus with T.Open _ :: _ -> true | _ -> false) -> + begin + if debug then + eprintf "(OMD) (4161) general %S\n%!" + (L.string_of_tokens (x::tokens)); + loop (T.TOKENS[x]::body) attrs tagstatus tokens + end + | (Newline | Space | Spaces _) :: tokens + when + (match tagstatus with T.Awaiting _ :: _ -> true | _ -> false) -> + begin + if debug then eprintf "(OMD) (4289) spaces\n%!"; + loop body attrs tagstatus tokens + end + | _ -> + if debug then + eprintf "(OMD) (4294) \ + fallback with tokens=%s and tagstatus=%s\n%!" + (L.destring_of_tokens tokens) + (T.string_of_tagstatus tagstatus); + (match tagstatus with + | [] -> Some(body, tokens) + | T.Awaiting tag :: _ -> + warn (sprintf "expected to read an open HTML tag (%s), \ + but found nothing" tag); + None + | T.Open tag :: _ -> + warn (sprintf "expected to find the closing HTML tag for %s, \ + but found nothing" tag); + None) + in match loop [] [] [] lexemes with + | Some(html, rest) -> + Some(T.md_of_interm_list html, rest) + | None -> None + in + begin match read_html() with + | Some(h, rest) -> + main_impl_rev ~html (h@r) [Greaterthan] rest + | None -> + let text = L.string_of_token t in + main_impl_rev ~html (Text(text ^ tagnametop)::r) [w] html_stuff + end + (* / end of inline HTML. *) + + (* < : emails *) + | _, (Lessthan as t)::tl -> + begin match maybe_autoemail r previous lexemes with + | Some(r,p,l) -> main_impl_rev ~html r p l + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> + main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> + main_impl_rev ~html r p l + end + end + + (* line breaks *) + | _, Newline::tl -> + main_impl_rev ~html (NL::r) [Newline] tl + | _, Newlines _::tl -> + main_impl_rev ~html (NL::NL::r) [Newline] tl + + (* [ *) + | _, (Obracket as t)::tl -> + begin match maybe_link main_loop r previous tl with + | Some(r, p, l) -> main_impl_rev ~html r p l + | None -> + match maybe_reference main_loop rc r previous tl with + | Some(r, p, l) -> main_impl_rev ~html r p l + | None -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + end + + (* img *) + | _, (Exclamation|Exclamations _ as t) + ::Obracket::Cbracket::Oparenthesis::tl -> + (* image insertion with no "alt" *) + (* ![](/path/to/img.jpg) *) + (try + begin + let b, tl = read_until_cparenth ~bq:true ~no_nl:false tl in + (* new lines there are allowed *) + let r (* updated result *) = match t with + | Exclamations 0 -> Text "!" :: r + | Exclamations n -> Text(String.make (n+1) '!') :: r + | _ -> r in + match + try Some(read_until_space ~bq:false ~no_nl:true b) + with Premature_ending -> None + with + | Some(url, tls) -> + let title, should_be_empty_list = + read_until_dq ~bq:true (snd (read_until_dq ~bq:true tls)) in + let url = L.string_of_tokens url in + let title = L.string_of_tokens title in + main_impl_rev ~html (Img("", url, title) :: r) [Cparenthesis] tl + | None -> + let url = L.string_of_tokens b in + main_impl_rev ~html (Img("", url, "") :: r) [Cparenthesis] tl + end + with + | NL_exception -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + ) + + (* img ref *) + | _, (Exclamation as t) + ::Obracket::Cbracket::Obracket::tl -> + (* ref image insertion with no "alt" *) + (* ![][ref] *) + (try + let id, tl = read_until_cbracket ~bq:true ~no_nl:true tl in + let fallback = extract_fallback main_loop tl lexemes in + let id = L.string_of_tokens id in + main_impl_rev ~html (Img_ref(rc, id, "", fallback) :: r) [Cbracket] tl + with NL_exception -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + ) + + + (* img *) + | _, (Exclamation|Exclamations _ as t)::Obracket::tl -> + (* image insertion with "alt" *) + (* ![Alt text](/path/to/img.jpg "Optional title") *) + (try + match read_until_cbracket ~bq:true tl with + | alt, Oparenthesis::ntl -> + (try + let alt = L.string_of_tokens alt in + let path_title, rest = + read_until_cparenth ~bq:true ~no_nl:false ntl in + let path, title = + try + read_until_space ~bq:true ~no_nl:true path_title + with Premature_ending -> path_title, [] in + let title, nothing = + if title <> [] then + read_until_dq ~bq:true (snd(read_until_dq ~bq:true title)) + else [], [] in + if nothing <> [] then + raise NL_exception; (* caught right below *) + let r = + match t with + | Exclamations 0 -> Text "!" :: r + | Exclamations n -> Text(String.make (n+1) '!') :: r + | _ -> r in + let path = L.string_of_tokens path in + let title = L.string_of_tokens title in + main_impl_rev ~html (Img(alt, path, title) :: r) [Cparenthesis] rest + with + | NL_exception + (* if NL_exception was raised, then fall back to "text" *) + | Premature_ending -> + begin match maybe_extension extensions r previous lexemes with + | None -> + main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> + main_impl_rev ~html r p l + end + ) + | alt, Obracket::Word(id)::Cbracket::ntl + | alt, Obracket::(Space|Spaces _)::Word(id)::Cbracket::ntl + | alt, Obracket::(Space|Spaces _)::Word(id)::(Space|Spaces _) + ::Cbracket::ntl + | alt, Obracket::Word(id)::(Space|Spaces _)::Cbracket::ntl -> + let fallback = extract_fallback main_loop ntl lexemes in + let alt = L.string_of_tokens alt in + main_impl_rev ~html (Img_ref(rc, id, alt, fallback)::r) [Cbracket] ntl + | alt, Obracket::((Newline|Space|Spaces _|Word _|Number _)::_ + as ntl) -> + (try + match read_until_cbracket ~bq:true ~no_nl:false ntl with + | [], rest -> raise Premature_ending + | id, rest -> + let fallback = extract_fallback main_loop rest lexemes in + let id = L.string_of_tokens id in + let alt = L.string_of_tokens alt in + main_impl_rev ~html (Img_ref(rc, id, alt, fallback)::r) + [Cbracket] + rest + with + | Premature_ending + | NL_exception -> + begin match maybe_extension extensions r previous lexemes with + | None -> + main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + ) + | _ -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + with + | Premature_ending -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + ) + + | _, + (At|Bar|Caret|Cbrace|Colon|Comma|Cparenthesis|Cbracket|Dollar + |Dot|Doublequote|Exclamation|Equal|Minus|Obrace|Oparenthesis + |Percent|Plus|Question|Quote|Semicolon|Slash|Tab|Tilde + |Greaterthan as t)::tl + -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + | _, (Number _ as t):: tl -> + begin match maybe_extension extensions r previous lexemes with + | None -> main_impl_rev ~html (Text(L.string_of_token t)::r) [t] tl + | Some(r, p, l) -> main_impl_rev ~html r p l + end + + | _, (Ats _ | Bars _ | Carets _ | Cbraces _ | Cbrackets _ | Colons _ + | Commas _ | Cparenthesiss _ | Dollars _ | Dots _ | Doublequotes _ + | Equals _ | Exclamations _ | Greaterthans _ | Lessthans _ + | Minuss _ | Obraces _ | Obrackets _ | Oparenthesiss _ + | Percents _ | Pluss _ | Questions _ | Quotes _ | Semicolons _ + | Slashs _ | Stars _ | Tabs _ | Tildes _ | Underscores _ as tk) + :: tl -> + begin match maybe_extension extensions r previous lexemes with + | None -> + let tk0, tks = L.split_first tk in + let text = L.string_of_token tk0 in + main_impl_rev ~html (Text text :: r) [tk0] (tks :: tl) + | Some(r, p, l) -> + main_impl_rev ~html r p l + end + + + and main_impl ~html (r:r) (previous:p) (lexemes:l) = + (* if debug then eprintf "(OMD) main_impl html=%b\n%!" html; *) + assert_well_formed lexemes; + List.rev (main_loop_rev ~html r previous lexemes) + + and main_loop ?(html=false) (r:r) (previous:p) (lexemes:l) = + main_impl ~html r previous lexemes + + and main_loop_rev ?(html=false) (r:r) (previous:p) (lexemes:l) = + main_impl_rev ~html r previous lexemes + + + let main_parse lexemes = + main_loop [] [] (tag_setext main_loop lexemes) + + let parse lexemes = + main_parse lexemes + +end + +let default_parse ?(extensions=[]) ?(default_lang="") lexemes = + let e = extensions and d = default_lang in + let module E = Default_env(Unit) in + let module M = + Make(struct + include E + let extensions = e + let default_lang = d + end) + in + M.main_parse lexemes + diff --git a/ocaml-lsp-server/src/omd/src/omd_parser.mli b/ocaml-lsp-server/src/omd/src/omd_parser.mli new file mode 100644 index 000000000..e8174bb48 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_parser.mli @@ -0,0 +1,379 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +(** Beware: the functions in this module may raise exceptions! If you + use them, you should be careful. *) + + +type r = Omd_representation.t +(** accumulator (beware, reversed tokens) *) + +and p = Omd_representation.tok list +(** context information: previous elements *) + +and l = Omd_representation.tok list +(** tokens to parse *) + +and main_loop = + ?html:bool -> + r -> (* accumulator (beware, reversed tokens) *) + p -> (* info: previous elements *) + l -> (* tokens to parse *) + Omd_representation.t (* final result *) +(** most important loop, which has to be given as an argument *) + + +val default_parse : + ?extensions:Omd_representation.extensions -> ?default_lang:string -> l + -> Omd_representation.t +(** Translate tokens to Markdown representation. + + @param lang language for blocks of code where it was not specified. + Default: [""]. +*) + +module type Env = +sig + val rc: Omd_representation.ref_container + (** reference container *) + val extensions : Omd_representation.extensions + (** list of parser extensions *) + val default_lang : string + (** default language for code blocks *) + val gh_uemph_or_bold_style : bool + (** flag: bold/emph using using underscores is by default + github-style, which means that underscores inside words are + left as underscore, rather than special characters, because + it's more convenient. However it is also less expressive + because then you can't bold/emph a part of a word. You might + want to set this flag to false. *) + val blind_html : bool + (** flag: if true, will not check whether a used HTML tag actually + exists in HTML. *) + val strict_html : bool + (** flag: if true, will only accept known inline HTML tags in inline HTML. *) + val warning : bool + (** flag: if true, will output warnings *) + val warn_error : bool + (** flag: if true, will convert warnings to errors *) +end + +module Default_env : functor (Unit: sig end) -> Env + +module Make : functor (Env : Env) -> +sig + + val rc: Omd_representation.ref_container + (** reference container *) + val extensions : Omd_representation.extensions + (** list of parser extensions *) + val default_lang : string + (** default language for code blocks *) + val gh_uemph_or_bold_style : bool + (** flag: bold/emph using using underscores is by default + github-style, which means that underscores inside words are + left as underscore, rather than special characters, because + it's more convenient. However it is also less expressive + because then you can't bold/emph a part of a word. You might + want to set this flag to false. *) + val blind_html : bool + (** flag: if true, will not check whether a used HTML tag actually + exists in HTML. *) + val strict_html : bool + (** flag: if true, will only accept known inline HTML tags in inline HTML. *) + + + val htmlcodes_set : Omd_utils.StringSet.t + (** set of known HTML codes *) + + val inline_htmltags_set : Omd_utils.StringSet.t + (** set of known inline HTML tags *) + + val htmltags_set : Omd_utils.StringSet.t + (** All known HTML tags *) + + val unindent_rev : + int -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + (** [unindent_rev n l] returns the same couple as [unindent n l] + except that the first element (which is a list) is reversed. + This function is used for lists. *) + + val unindent : + int -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + (** [unindent n l] returns [(unindented, rest)] where [unindented] is + the consecutive lines of [l] that are indented with at least [n] + spaces, and de-indented by [n] spaces. If [l] starts with a line + that is indented by less than [n] spaces, then it returns [([], l)]. + + + (* This function is used for lists, so it does not require [n] *) + (* spaces on every single line, but only on some specific ones of them. *) + + This function is used for lists and blockquotes. + + *) + + (* val unindent_strict_rev : *) + (* int -> *) + (* Omd_representation.tok list -> *) + (* Omd_representation.tok list * Omd_representation.tok list *) + (* (\** [unindent_strict_rev n l] returns the same couple as [unindent n l] *) + (* except that the first element (which is a list) is reversed. *) + (* This function is used for blockquotes. *\) *) + + (* val unindent_strict : *) + (* int -> *) + (* Omd_representation.tok list -> *) + (* Omd_representation.tok list * Omd_representation.tok list *) + (* (\** [unindent_strict n l] returns [(unindented, rest)] where [unindented] is *) + (* the consecutive lines of [l] that are indented with at least [n] *) + (* spaces, and de-indented by [n] spaces. If [l] starts with a line *) + (* that is indented by less than [n] spaces, then it returns [([], l)]. *) + (* This function is used for blockquotes. *) + (* *\) *) + + + + val is_blank : Omd_representation.tok list -> bool + (** [is_blank l] returns [true] if [l] only contains blanks, which are + spaces and newlines. *) + + val semph_or_bold : + int -> + Omd_representation.tok list -> + (Omd_representation.tok list * Omd_representation.tok list) option + (** [semph_or_bold n l] returns [None] if [l] doesn't start with + a bold/emph phrase (marked using stars), else it returns [Some(x,y)] + where [x] is the emph and/or bold phrase at the beginning of [l] + and [y] is the rest of [l]. *) + + val sm_uemph_or_bold : + int -> + Omd_representation.tok list -> + (Omd_representation.tok list * Omd_representation.tok list) option + (** [sm_uemph_or_bold n l] returns [None] if [l] doesn't start with + a bold/emph phrase (marked using underscores), else it returns [Some(x,y)] + where [x] is the emph and/or bold phrase at the beginning of [l] + and [y] is the rest of [l]. *) + + val gh_uemph_or_bold : + int -> + Omd_representation.tok list -> + (Omd_representation.tok list * Omd_representation.tok list) option + (** [gh_uemph_or_bold n l] returns [None] if [l] doesn't start with + a bold/emph phrase (marked using underscores), else it returns [Some(x,y)] + where [x] is the emph and/or bold phrase at the beginning of [l] + and [y] is the rest of [l]. *) + + val uemph_or_bold : + int -> + Omd_representation.tok list -> + (Omd_representation.tok list * Omd_representation.tok list) option + (** [uemph_or_bold n l] returns [None] if [l] doesn't start with a + bold/emph phrase (marked using underscores), else it returns + [Some(x,y)] where [x] is the emph and/or bold phrase at the + beginning of [l] and [y] is the rest of [l]. N.B. if + [!gh_uemph_or_bold_style] then in Github style (i.e., underscores + inside words are considered as underscores). *) + + val eat_blank : Omd_representation.tok list -> Omd_representation.tok list + (** [eat_blank l] returns [l] where all blanks at the beginning of the + list have been removed (it stops removing as soon as it meets an element + that is not a blank). Blanks are spaces and newlines only. *) + + val tag__maybe_h1 : main_loop -> Omd_representation.tok + (** [tag__maybe_h1 main_loop] is a tag that is injected everywhere that + might preceed a H1 title. It needs [main_loop] as argument because + it is used to parse the contents of the titles. *) + + val tag__maybe_h2 : main_loop -> Omd_representation.tok + (** [tag__maybe_h2 main_loop] is the same as [tag__maybe_h1 main_loop] + but for H2. *) + + val tag__md : Omd_representation.t -> Omd_representation.tok + (** [tag__md md] encapsulates [md] to make it a value of type [tok]. + Its purpose is to inject some pre-parsed markdown (i.e., [md] of type [t]) + in a yet-to-parse token stream of type [tok]. *) + + val tag_setext : + main_loop -> Omd_representation.tok list -> Omd_representation.tok list + (** Tag used for the lines that *might* be titles using setext-style. *) + + + val hr_m : l -> l option + (** [hr_m l] returns [Some nl] where [nl] is the remaining of [l] if [l] + contains a horizontal rule "drawn" with dashes. If there's no HR, then + returns [None].*) + + val hr_s : l -> l option + (** [hr_s l] is the same as [hr_m l] but for horizontal rules + "drawn" with stars instead. *) + + exception NL_exception + exception Premature_ending + + val read_until_gt : + ?bq:bool -> + ?no_nl:bool -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + val read_until_lt : + ?bq:bool -> + ?no_nl:bool -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + val read_until_cparenth : + ?bq:bool -> + ?no_nl:bool -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + val read_until_oparenth : + ?bq:bool -> + ?no_nl:bool -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + val read_until_dq : + ?bq:bool -> + ?no_nl:bool -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + val read_until_q : + ?bq:bool -> + ?no_nl:bool -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + val read_until_obracket : + ?bq:bool -> + ?no_nl:bool -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + val read_until_cbracket : + ?bq:bool -> + ?no_nl:bool -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + val read_until_space : + ?bq:bool -> + ?no_nl:bool -> + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + val read_until_newline : + Omd_representation.tok list -> + Omd_representation.tok list * Omd_representation.tok list + (** [read_until_...] are functions that read from a token list + and return two token lists: the first one is the tokens read + until a specific token is met, and the second one is the remainder. + The particularity of these functions is that they do consider + backslash-escaped characters and closing characters. + For instance, [read_until_gt "1 < 2 > 3 > 4"] returns + ["1 < 2 > 3 ", " 4"]: note that the ">" before " 4" has disappeared + and that [read_until_gt] takes a [tok list] (not a string) and + returns a couple of [tok list] (not a couple of strings), the + string notation is used here for concision. + + Until otherwise noted, those functions do *not* consider + backquote-trapped sections. + For instance, [read_until_gt "1 < 2 > 3 `>` 4"] + returns ["1 < 2 > 3 `", "` 4"]. + If you use these functions, you should make sure that they + do what you think they do (i.e., do look at the code). + + If the expected characters are not found, the exception + [Premature_ending] is raised. For instance, + [read_until_gt "1 < > 3"] raises [Premature_ending]. + + If [no_nl] is [true] (default value for [no_nl] is [false]) + and ['\n'] occurs before the splitting character, + then [NL_exception] is raised. + *) + + + val read_title : main_loop -> int -> r -> p -> l -> (r * p * l) option + (** [read_title main_loop n r p l] returns [Some(r,p,l)] + if it succeeds, [None] otherwise. + + [read_title main_loop n r p l] expects to read a [n]-level + hash-declared title from [l], where the hashes have *already* + been *removed*. If [n] is not between 1 and 6 (included), then + it returns [None]. + + [main_loop] is used to parse the contents of the title. + + [r] and [p] are the classical "result" and "previous" parameters. + *) + + val maybe_extension : + Omd_representation.extensions -> + r -> p -> l -> (r * p * l) option + (** [maybe_extension e r p l] returns [None] if there is no extension or + if extensions haven't had any effect, returns [Some(nr, np, nl)] if + at least one extension has applied successfully. *) + + val emailstyle_quoting : main_loop -> r -> p -> l -> (r * p * l) option + (** [emailstyle_quoting main_loop r p l] returns [Some(r,p,l)] with + [r] being the updated result, [p] being the last parsed token + and [l] being the remaining tokens to parse. If [emailstyle_quoting] + fails, then it returns [None], in which case its user is advise + to investigate why it returns [None] because there's possibly a + real problem. *) + + val maybe_reference : + main_loop -> + Omd_representation.ref_container -> r -> p -> l -> (r * p * l) option + (** [maybe_reference] tries to parse a reference, a reference definition or + a github-style short reference (e.g., [foo] as a shortcut for [foo][]), + and returns [Some(r,p,l)] if it succeeds, [None] otherwise. *) + + val maybe_link : main_loop -> r -> p -> l -> (r * p * l) option + (** [maybe_link] tries to parse a link, + and returns [Some(r,p,l)] if it succeeds, [None] otherwise. *) + + + val parse_list : main_loop -> r -> p -> l -> r * p * l + (** [parse_list main_loop r p l] parses a list from [l]. + + ***Important property*** + It is considered in Omd that a sub-list is always more indented than + the item that contains it (so, 2 items with different indentations cannot + have the direct same parent). + *) + + val make_paragraphs : Omd_representation.t -> Omd_representation.t + (** Since [Omd_parser.parse] doesn't build paragraphs, if you want + Markdown-style paragraphs, you need to apply this function to + the result of [Omd_parser.parse]. *) + + + val bcode : + ?default_lang:Omd_representation.name -> + r -> p -> l -> (r * p * l) option + (** [bcode default_lang r p l] + tries to parse some code that's delimited by backquotes, + and returns [Some(r,p,l)] if it succeeds, [None] otherwise. + *) + + val icode : + ?default_lang:Omd_representation.name -> + r -> p -> l -> (r * p * l) option + (** [icode default_lang r p l] + tries to parse some code that's delimited by space indentation. + It should always return [Some(r,p,l)], if it returns [None] + it means that it's been misused or there's a bug. + *) + + + val main_loop_rev : ?html:bool -> r -> p -> l -> r + val main_loop : ?html:bool -> r -> p -> l -> Omd_representation.t + val main_parse : Omd_representation.tok list -> Omd_representation.t + val parse : Omd_representation.tok list -> Omd_representation.t + +end + diff --git a/ocaml-lsp-server/src/omd/src/omd_representation.ml b/ocaml-lsp-server/src/omd/src/omd_representation.ml new file mode 100644 index 000000000..2f2362a6b --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_representation.ml @@ -0,0 +1,502 @@ +open Omd_utils +open Printf + +(** references, instances created in [Omd_parser.main_parse] and + accessed in the [Omd_backend] module. *) +module R = Map.Make(String) +class ref_container : object + val mutable c : (string * string) R.t + method add_ref : R.key -> string -> string -> unit + method get_ref : R.key -> (string * string) option + method get_all : (string * (string * string)) list + end = object + val mutable c = R.empty + val mutable c2 = R.empty + + method get_all = R.bindings c + + method add_ref name title url = + c <- R.add name (url, title) c; + let ln = String.lowercase_ascii name in + if ln <> name then c2 <- R.add ln (url, title) c2 + + method get_ref name = + try + let (url, title) as r = + try R.find name c + with Not_found -> + let ln = String.lowercase_ascii name in + try R.find ln c + with Not_found -> + R.find ln c2 + in Some r + with Not_found -> + None +end + +type element = + | H1 of t + | H2 of t + | H3 of t + | H4 of t + | H5 of t + | H6 of t + | Paragraph of t + | Text of string + | Emph of t + | Bold of t + | Ul of t list + | Ol of t list + | Ulp of t list + | Olp of t list + | Code of name * string + | Code_block of name * string + | Br + | Hr + | NL + | Url of href * t * title + | Ref of ref_container * name * string * fallback + | Img_ref of ref_container * name * alt * fallback + | Html of name * (string * string option) list * t + | Html_block of name * (string * string option) list * t + | Html_comment of string + | Raw of string + | Raw_block of string + | Blockquote of t + | Img of alt * src * title + | X of + < name : string; + to_html : ?indent:int -> (t -> string) -> t -> string option; + to_sexpr : (t -> string) -> t -> string option; + to_t : t -> t option > +and fallback = < to_string : string ; to_t : t > +and name = string +and alt = string +and src = string +and href = string +and title = string +and t = element list + + +let rec loose_compare t1 t2 = match t1,t2 with + | H1 e1::tl1, H1 e2::tl2 + | H2 e1::tl1, H2 e2::tl2 + | H3 e1::tl1, H3 e2::tl2 + | H4 e1::tl1, H4 e2::tl2 + | H5 e1::tl1, H5 e2::tl2 + | H6 e1::tl1, H6 e2::tl2 + | Emph e1::tl1, Emph e2::tl2 + | Bold e1::tl1, Bold e2::tl2 + | Blockquote e1::tl1, Blockquote e2::tl2 + | Paragraph e1::tl1, Paragraph e2::tl2 + -> + (match loose_compare e1 e2 with + | 0 -> loose_compare tl1 tl2 + | i -> i) + + | Ul e1::tl1, Ul e2::tl2 + | Ol e1::tl1, Ol e2::tl2 + | Ulp e1::tl1, Ulp e2::tl2 + | Olp e1::tl1, Olp e2::tl2 + -> + (match loose_compare_lists e1 e2 with + | 0 -> loose_compare tl1 tl2 + | i -> i) + + | (Code _ as e1)::tl1, (Code _ as e2)::tl2 + | (Br as e1)::tl1, (Br as e2)::tl2 + | (Hr as e1)::tl1, (Hr as e2)::tl2 + | (NL as e1)::tl1, (NL as e2)::tl2 + | (Html _ as e1)::tl1, (Html _ as e2)::tl2 + | (Html_block _ as e1)::tl1, (Html_block _ as e2)::tl2 + | (Raw _ as e1)::tl1, (Raw _ as e2)::tl2 + | (Raw_block _ as e1)::tl1, (Raw_block _ as e2)::tl2 + | (Html_comment _ as e1)::tl1, (Html_comment _ as e2)::tl2 + | (Img _ as e1)::tl1, (Img _ as e2)::tl2 + | (Text _ as e1)::tl1, (Text _ as e2)::tl2 + -> + (match compare e1 e2 with + | 0 -> loose_compare tl1 tl2 + | i -> i) + + | Code_block(l1,c1)::tl1, Code_block(l2,c2)::tl2 + -> + (match compare l1 l2, String.length c1 - String.length c2 with + | 0, 0 -> + (match compare c1 c2 with + | 0 -> loose_compare tl1 tl2 + | i -> i) + | 0, 1 -> + (match compare c1 (c2^"\n") with + | 0 -> loose_compare tl1 tl2 + | i -> i) + | 0, -1 -> + (match compare (c1^"\n") c2 with + | 0 -> loose_compare tl1 tl2 + | i -> i) + | i, _ -> i + ) + + | Url (href1, t1, title1)::tl1, Url (href2, t2, title2)::tl2 + -> + (match compare href1 href2 with + | 0 -> (match loose_compare t1 t2 with + | 0 -> (match compare title1 title2 with + | 0 -> loose_compare tl1 tl2 + | i -> i) + | i -> i) + | i -> i) + + | Ref (ref_container1, name1, x1, fallback1)::tl1, + Ref (ref_container2, name2, x2, fallback2)::tl2 + | Img_ref (ref_container1, name1, x1, fallback1)::tl1, + Img_ref (ref_container2, name2, x2, fallback2)::tl2 + -> + (match compare (name1, x1) (name2, x2) with + | 0 -> + let cff = + if fallback1#to_string = fallback2#to_string then + 0 + else + loose_compare (fallback1#to_t) (fallback2#to_t) + in + if cff = 0 then + match + compare (ref_container1#get_all) (ref_container2#get_all) + with + | 0 -> loose_compare tl1 tl2 + | i -> i + else + cff + | i -> i) + + | X e1::tl1, X e2::tl2 -> + (match compare (e1#name) (e2#name) with + | 0 -> (match compare (e1#to_t) (e2#to_t) with + | 0 -> loose_compare tl1 tl2 + | i -> i) + | i -> i) + | X _::_, _ -> 1 + | _, X _::_ -> -1 + | _ -> compare t1 t2 + +and loose_compare_lists l1 l2 = + match l1, l2 with + | [], [] -> 0 + | e1::tl1, e2::tl2 -> + (match loose_compare e1 e2 with + | 0 -> loose_compare_lists tl1 tl2 + | i -> i) + | _, [] -> 1 + | _ -> -1 + + +type tok = (* Cs(n) means (n+2) times C *) +| Ampersand +| Ampersands of int +| At +| Ats of int +| Backquote +| Backquotes of int +| Backslash +| Backslashs of int +| Bar +| Bars of int +| Caret +| Carets of int +| Cbrace +| Cbraces of int +| Colon +| Colons of int +| Comma +| Commas of int +| Cparenthesis +| Cparenthesiss of int +| Cbracket +| Cbrackets of int +| Dollar +| Dollars of int +| Dot +| Dots of int +| Doublequote +| Doublequotes of int +| Exclamation +| Exclamations of int +| Equal +| Equals of int +| Greaterthan +| Greaterthans of int +| Hash +| Hashs of int +| Lessthan +| Lessthans of int +| Minus +| Minuss of int +| Newline +| Newlines of int +| Number of string +| Obrace +| Obraces of int +| Oparenthesis +| Oparenthesiss of int +| Obracket +| Obrackets of int +| Percent +| Percents of int +| Plus +| Pluss of int +| Question +| Questions of int +| Quote +| Quotes of int +| Semicolon +| Semicolons of int +| Slash +| Slashs of int +| Space +| Spaces of int +| Star +| Stars of int +| Tab +| Tabs of int +| Tilde +| Tildes of int +| Underscore +| Underscores of int +| Word of string +| Tag of name * extension + +and extension = < + parser_extension : + t -> tok list -> tok list -> ((t * tok list * tok list) option); + to_string : string +> + +type extensions = extension list + +let empty_extension = object + method parser_extension r p l = None + method to_string = "" +end + +let rec normalise_md l = + if debug then + eprintf "(OMD) normalise_md\n%!"; + let rec loop = function + | [NL;NL;NL;NL;NL;NL;NL;] + | [NL;NL;NL;NL;NL;NL;] + | [NL;NL;NL;NL;NL;] + | [NL;NL;NL;NL;] + | [NL;NL;NL;] + | [NL;NL] + | [NL] -> [] + | [] -> [] + | NL::NL::NL::tl -> loop (NL::NL::tl) + | Text t1::Text t2::tl -> loop (Text(t1^t2)::tl) + | NL::(((Paragraph _|H1 _|H2 _|H3 _|H4 _|H5 _|H6 _ + |Code_block _|Ol _|Ul _|Olp _|Ulp _)::_) as tl) -> loop tl + | Paragraph[Text " "]::tl -> loop tl + | Paragraph[]::tl -> loop tl + | Paragraph(p)::tl -> Paragraph(loop p)::loop tl + | H1 v::tl -> H1(loop v)::loop tl + | H2 v::tl -> H2(loop v)::loop tl + | H3 v::tl -> H3(loop v)::loop tl + | H4 v::tl -> H4(loop v)::loop tl + | H5 v::tl -> H5(loop v)::loop tl + | H6 v::tl -> H6(loop v)::loop tl + | Emph v::tl -> Emph(loop v)::loop tl + | Bold v::tl -> Bold(loop v)::loop tl + | Ul v::tl -> Ul(List.map loop v)::loop tl + | Ol v::tl -> Ol(List.map loop v)::loop tl + | Ulp v::tl -> Ulp(List.map loop v)::loop tl + | Olp v::tl -> Olp(List.map loop v)::loop tl + | Blockquote v::tl -> Blockquote(loop v)::loop tl + | Url(href,v,title)::tl -> Url(href,(loop v),title)::loop tl + | Text _ + | Code _ + | Code_block _ + | Br + | Hr + | NL + | Ref _ + | Img_ref _ + | Html _ + | Html_block _ + | Html_comment _ + | Raw _ + | Raw_block _ + | Img _ + | X _ as v::tl -> v::loop tl + in + let a = loop l in + let b = loop a in + if a = b then + a + else + normalise_md b + + + +let dummy_X = + X (object + method name = "dummy" + method to_html ?(indent=0) _ _ = None + method to_sexpr _ _ = None + method to_t _ = None + end) + + +let rec visit f = function + | [] -> [] + | Paragraph v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Paragraph(visit f v)::visit f tl + end + | H1 v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> H1(visit f v)::visit f tl + end + | H2 v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> H2(visit f v)::visit f tl + end + | H3 v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> H3(visit f v)::visit f tl + end + | H4 v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> H4(visit f v)::visit f tl + end + | H5 v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> H5(visit f v)::visit f tl + end + | H6 v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> H6(visit f v)::visit f tl + end + | Emph v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Emph(visit f v)::visit f tl + end + | Bold v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Bold(visit f v)::visit f tl + end + | Ul v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Ul(List.map (visit f) v)::visit f tl + end + | Ol v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Ol(List.map (visit f) v)::visit f tl + end + | Ulp v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Ulp(List.map (visit f) v)::visit f tl + end + | Olp v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Olp(List.map (visit f) v)::visit f tl + end + | Blockquote v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Blockquote(visit f v)::visit f tl + end + | Url(href,v,title) as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Url(href,visit f v,title)::visit f tl + end + | Text v as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Code _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Code_block _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Ref _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Img_ref _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Html _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Html_block _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Html_comment _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Raw _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Raw_block _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Img _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | X _ as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> e::visit f tl + end + | Br as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Br::visit f tl + end + | Hr as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> Hr::visit f tl + end + | NL as e::tl -> + begin match f e with + | Some(l) -> l@visit f tl + | None -> NL::visit f tl + end + + diff --git a/ocaml-lsp-server/src/omd/src/omd_representation.mli b/ocaml-lsp-server/src/omd/src/omd_representation.mli new file mode 100644 index 000000000..6f0474e92 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_representation.mli @@ -0,0 +1,188 @@ + +module R : Map.S with type key = string + +class ref_container : + object + val mutable c : (string * string) R.t + method add_ref : R.key -> string -> string -> unit + method get_ref : R.key -> (string * string) option + method get_all : (string * (string * string)) list + end +type element = + | H1 of t + | H2 of t + | H3 of t + | H4 of t + | H5 of t + | H6 of t + | Paragraph of t + | Text of string + | Emph of t + | Bold of t + | Ul of t list + | Ol of t list + | Ulp of t list + | Olp of t list + | Code of name * string + | Code_block of name * string + | Br + | Hr + | NL + | Url of href * t * title + | Ref of ref_container * name * string * fallback + | Img_ref of ref_container * name * alt * fallback + | Html of name * (string * string option) list * t + | Html_block of name * (string * string option) list * t + | Html_comment of string + | Raw of string + | Raw_block of string + | Blockquote of t + | Img of alt * src * title + | X of + < name : string; + to_html : ?indent:int -> (t -> string) -> t -> string option; + to_sexpr : (t -> string) -> t -> string option; + to_t : t -> t option > +and fallback = < to_string : string ; to_t : t > +and name = string +and alt = string +and src = string +and href = string +and title = string +and t = element list + +type tok = + Ampersand (* one & *) + | Ampersands of int (* [Ampersands(n)] is (n+2) consecutive occurrences of & *) + | At (* @ *) + | Ats of int (* @@.. *) + | Backquote (* ` *) + | Backquotes of int (* ``.. *) + | Backslash (* \\ *) + | Backslashs of int (* \\\\.. *) + | Bar (* | *) + | Bars of int (* ||.. *) + | Caret (* ^ *) + | Carets of int (* ^^.. *) + | Cbrace (* } *) + | Cbraces of int (* }}.. *) + | Colon (* : *) + | Colons of int (* ::.. *) + | Comma (* , *) + | Commas of int (* ,,.. *) + | Cparenthesis (* ) *) + | Cparenthesiss of int (* )).. *) + | Cbracket (* ] *) + | Cbrackets of int (* ]].. *) + | Dollar (* $ *) + | Dollars of int (* $$.. *) + | Dot (* . *) + | Dots of int (* .... *) + | Doublequote (* \034 *) + | Doublequotes of int (* \034\034.. *) + | Exclamation (* ! *) + | Exclamations of int (* !!.. *) + | Equal (* = *) + | Equals of int (* ==.. *) + | Greaterthan (* > *) + | Greaterthans of int (* >>.. *) + | Hash (* # *) + | Hashs of int (* ##.. *) + | Lessthan (* < *) + | Lessthans of int (* <<.. *) + | Minus (* - *) + | Minuss of int (* --.. *) + | Newline (* \n *) + | Newlines of int (* \n\n.. *) + | Number of string + | Obrace (* { *) + | Obraces of int (* {{.. *) + | Oparenthesis (* ( *) + | Oparenthesiss of int (* ((.. *) + | Obracket (* [ *) + | Obrackets of int (* [[.. *) + | Percent (* % *) + | Percents of int (* %%.. *) + | Plus (* + *) + | Pluss of int (* ++.. *) + | Question (* ? *) + | Questions of int (* ??.. *) + | Quote (* ' *) + | Quotes of int (* ''.. *) + | Semicolon (* ; *) + | Semicolons of int (* ;;.. *) + | Slash (* / *) + | Slashs of int (* //.. *) + | Space (* *) + | Spaces of int (* .. *) + | Star (* * *) + | Stars of int (* **.. *) + | Tab (* \t *) + | Tabs of int (* \t\t.. *) + | Tilde (* ~ *) + | Tildes of int (* ~~.. *) + | Underscore (* _ *) + | Underscores of int (* __.. *) + | Word of string + | Tag of name * extension +(** Lexer's tokens. If you want to use the parser with an extended + lexer, you may use the constructor [Tag] to implement + the parser's extension. In the parser, [Tag] is used (at least) + 3 times in order to represent metadata or to store data. + + The integers carried by constructors means that the represented + character appears (n+2) times. So, [Ampersand(0)] is "&&". + Notably, this allows to use the property that in the match + case [Ampersand _ ->], we know there are at least 2 ampersands. + This is particularly useful for some characters, such as newlines + and spaces. It's not useful for all of them indeed but it has + been designed this way for the sake of uniformity (one doesn't + want to know by heart which constructor have that "at least 2" + property and which haven't). +*) + +and extension = < + parser_extension : t -> tok list -> tok list -> ((t * tok list * tok list) option); + to_string : string +> +(** - [parser_extension] is a method that takes the current state of the + parser's data and returns None if nothing has been changed, + otherwise it returns the new state. The current state of the + parser's data is [(r, p, l)] where [r] is the result so far, [p] + is the list of the previous tokens (it's typically empty or + contains information on how many newlines we've just seen), and + [l] is the remaining tokens to parse. + - and [to_string] is a method that returns directly a string + representation of the object (it's normal if it returns the + empty string). *) + +type extensions = extension list +(** One must use this type to extend the parser. It's a list of + functions of type [extension]. They are processed in order (the + head is applied first), so be careful about it. If you use it + wrong, it will behave wrong. *) + +val empty_extension : extension +(** An empty extension *) + +val loose_compare : t -> t -> int +(** [loose_compare t1 t2] returns [0] if [t1] and [t2] + are equivalent, otherwise it returns another number. *) + +val normalise_md : t -> t +(** [normalise_md md] returns a copy of [md] where some elements + have been factorized. *) + +val visit : (element -> t option) -> t -> t +(** visitor for structures of type t: [visit f md] will return a new + potentially altered copy of [md] that has been created by the + visit of [md] by [f]. + + The function [f] takes each [element] (from [md]) and returns + [Some t] if it has effectively been applied to [element], and + [None] otherwise. When it returns [Some t], [t] replaces [element] + in the copy of [md], and when it returns [None], either [element] + is copied as it is in the copy of [md] or a visited version is + copied instead (well, that depends on if [element] has elements + inside of it or not). +*) diff --git a/ocaml-lsp-server/src/omd/src/omd_utils.ml b/ocaml-lsp-server/src/omd/src/omd_utils.ml new file mode 100644 index 000000000..a8f26530c --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_utils.ml @@ -0,0 +1,310 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013/2014 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +open Printf + +let debug = + let _DEBUG = + try + Some(Sys.getenv "DEBUG") + with _ -> None + and _OMD_DEBUG = + try + Some(Sys.getenv "OMD_DEBUG") + with _ -> None + in + match _DEBUG, _OMD_DEBUG with + | _, Some "false" -> + false + | Some _, None -> + eprintf "omd: debug mode activated because DEBUG is set, \ + you can deactivate the mode by unsetting DEBUG \ + or by setting OMD_DEBUG to the string \"false\".\n%!"; + true + | None, None -> + false + | _, Some _ -> + eprintf "omd: debug mode activated because OMD_DEBUG is set + to a value that isn't the string \"false\".\n%!"; + true + +exception Error of string + +let warn ?(we=false) msg = + if we then + raise (Error msg) + else + eprintf "(OMD) Warning: %s\n%!" msg + + +let trackfix = + try + ignore(Sys.getenv "OMD_FIX"); + eprintf "omd: tracking mode activated: token list are very often checked, \ + it might take a *very* long time if your input is large.\n%!"; + true + with Not_found -> + false + +let _ = if debug then Printexc.record_backtrace true + +let raise = + if debug then + (fun e -> + eprintf "(OMD) Exception raised: %s\n%!" (Printexc.to_string e); + raise e) + else + raise + +module StringSet : sig + include Set.S with type elt = string + val of_list : elt list -> t +end = struct + include Set.Make(String) + let of_list l = List.fold_left (fun r e -> add e r) empty l +end + + +type 'a split = 'a list -> 'a split_action +and 'a split_action = + | Continue + | Continue_with of 'a list * 'a list + | Split of 'a list * 'a list + + +let fsplit_rev ?(excl=(fun _ -> false)) ~(f:'a split) l + : ('a list * 'a list) option = + let rec loop accu = function + | [] -> + begin + match f [] with + | Split(left, right) -> Some(left@accu, right) + | Continue_with(left, tl) -> loop (left@accu) tl + | Continue -> None + end + | e::tl as l -> + if excl l then + None + else match f l with + | Split(left, right) -> Some(left@accu, right) + | Continue_with(left, tl) -> loop (left@accu) tl + | Continue -> loop (e::accu) tl + in loop [] l + +let fsplit ?(excl=(fun _ -> false)) ~f l = + match fsplit_rev ~excl:excl ~f:f l with + | None -> None + | Some(rev, l) -> Some(List.rev rev, l) + +let id_of_string ids s = + let n = String.length s in + let out = Buffer.create 0 in + (* Put [s] into [b], replacing non-alphanumeric characters with dashes. *) + let rec loop started i = + if i = n then () + else + match s.[i] with + | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' as c -> + Buffer.add_char out c ; + loop true (i + 1) + (* Don't want to start with dashes. *) + | _ when not started -> + loop false (i + 1) + | _ -> + Buffer.add_char out '-' ; + loop false (i + 1) + in + loop false 0 ; + let s' = Buffer.contents out in + if s' = "" then "" + else + (* Find out the index of the last character in [s'] that isn't a dash. *) + let last_trailing = + let rec loop i = + if i < 0 || s'.[i] <> '-' then i + else loop (i - 1) + in + loop (String.length s' - 1) + in + (* Trim trailing dashes. *) + ids#mangle @@ String.sub s' 0 (last_trailing + 1) + +(* only convert when "necessary" *) +let htmlentities ?(md=false) s = + let module Break = struct exception Break end in + let b = Buffer.create 64 in + let rec loop i = + if i = String.length s then + () + else + let () = + match s.[i] with + | ( '0' .. '9' | 'a' .. 'z' | 'A' .. 'Z' ) as c -> Buffer.add_char b c + | '"' -> Buffer.add_string b """ + | '\'' -> Buffer.add_string b "'" + | '&' -> + if md then + begin + try + let () = match s.[i+1] with + | '#' -> + let rec ff j = + match s.[j] with + | '0' .. '9' -> ff (succ j) + | ';' -> () + | _ -> raise Break.Break + in + ff (i+2) + | 'A' .. 'Z' | 'a' .. 'z' -> + let rec ff j = + match s.[j] with + | 'A' .. 'Z' | 'a' .. 'z' -> ff (succ j) + | ';' -> () + | _ -> raise Break.Break + in + ff (i+2) + | _ -> raise Break.Break + in + Buffer.add_string b "&" + with _ -> Buffer.add_string b "&" + end + else + Buffer.add_string b "&" + | '<' -> Buffer.add_string b "<" + | '>' -> Buffer.add_string b ">" + | c -> Buffer.add_char b c + in loop (succ i) + in + loop 0; + Buffer.contents b + + +let minimalize_blanks s = + let l = String.length s in + let b = Buffer.create l in + let rec loop f i = + if i = l then + Buffer.contents b + else + match s.[i] with + | ' ' | '\t' | '\n' -> + loop true (succ i) + | c -> + if Buffer.length b > 0 && f then + Buffer.add_char b ' '; + loop false (succ i) + in loop false 0 + +let rec eat f = function + | [] -> [] + | e::tl as l -> if f e then eat f tl else l + + +let rec extract_html_attributes (html:string) = + let rec cut_on_char_from s i c = + match String.index_from s i c with + | 0 -> "", String.sub s 1 (String.length s - 1) + | j -> String.sub s i (j-i), String.sub s (j+1) (String.length s - (j+1)) + in + let remove_prefix_spaces s = + if s = "" then + s + else if s.[0] <> ' ' then + s + else + let rec loop i = + if i = String.length s then + String.sub s i (String.length s - i) + else + match s.[i] with + | ' ' -> loop (i+1) + | _ -> String.sub s i (String.length s - i) + in loop 1 + in + let remove_suffix_spaces s = + if s = "" then + s + else if s.[String.length s - 1] <> ' ' then + s + else + let rec loop i = + match s.[i] with + | ' ' -> loop (i-1) + | _ -> String.sub s 0 (i+1) + in loop (String.length s - 1) + in + let rec loop s res i = + if i = String.length s then + res + else + match + try + Some (take_attribute s i) + with Not_found -> None + with + | Some (((_,_) as a), new_s) -> + loop new_s (a::res) 0 + | None -> res + and take_attribute s i = + let name, after_eq = cut_on_char_from s i '=' in + let name = remove_suffix_spaces name in + let after_eq = remove_prefix_spaces after_eq in + let value, rest = cut_on_char_from after_eq 1 after_eq.[0] in + (name,value), remove_prefix_spaces rest + in + if (* Has it at least one attribute? *) + try String.index html '>' < String.index html ' ' + with Not_found -> true + then + [] + else + match html.[1] with + | '<' | ' ' -> + extract_html_attributes + (remove_prefix_spaces (String.sub html 1 (String.length html - 1))) + | _ -> + try + let html = snd (cut_on_char_from html 0 ' ') in + loop (String.sub html 0 (String.index html '>')) [] 0 + with Not_found -> [] + +let rec extract_inner_html (html:string) = + let rec cut_on_char_from s i c = + match String.index_from s i c with + | 0 -> "", String.sub s 1 (String.length s - 1) + | j -> String.sub s i (j-i), String.sub s (j+1) (String.length s - (j+1)) + in + let rec rcut_on_char_from s i c = + match String.rindex_from s i c with + | 0 -> "", String.sub s 1 (String.length s - 1) + | j -> String.sub s 0 j, String.sub s (j+1) (String.length s - (j+1)) + in + let _, p = cut_on_char_from html 0 '>' in + let r, _ = rcut_on_char_from p (String.length p - 1) '<' in + r + + +let html_void_elements = StringSet.of_list [ + "img"; + "input"; + "link"; + "meta"; + "br"; + "hr"; + "source"; + "wbr"; + "param"; + "embed"; + "base"; + "area"; + "col"; + "track"; + "keygen"; +] + +let ( @ ) l1 l2 = + List.rev_append (List.rev l1) l2 diff --git a/ocaml-lsp-server/src/omd/src/omd_utils.mli b/ocaml-lsp-server/src/omd/src/omd_utils.mli new file mode 100644 index 000000000..7c36c3d03 --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_utils.mli @@ -0,0 +1,118 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013/2014 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +val debug : bool +(** Equals [true] if the environment variable DEBUG is set, + or if the environment variable OMD_DEBUG is set to a string + that is not ["false"]. *) + +val trackfix : bool + +exception Error of string + +val raise : exn -> 'a +(** Same as [Pervasives.raise] except if [debug] equals true, + in which case it prints a trace on stderr before raising the exception. *) + +val warn : ?we:bool -> string -> unit +(** [warn we x] prints a warning with the message [x] if [we] is true, + else raises [Omd_utils.Error x]. *) + +module StringSet : + sig + include Set.S with type elt = string + val of_list : elt list -> t + end +(** Set of [string]. Cf. documentation of {!Set.S} *) + +type 'a split = 'a list -> 'a split_action +(** Type of a split function *) + +and 'a split_action = + (** Don't split yet *) + | Continue + + (** Don't split yet but continue with those two lists instead of default *) + | Continue_with of 'a list * 'a list + + (** Do split with this split scheme *) + | Split of 'a list * 'a list +(** Type of a split action *) + + +val fsplit_rev : + ?excl:('a list -> bool) -> + f:'a split -> 'a list -> ('a list * 'a list) option +(** [fsplit_rev ?excl ~f l] returns [Some(x,y)] where [x] is the + **reversed** list of the consecutive elements of [l] that obey the + split function [f]. + Note that [f] is applied to a list of elements and not just an + element, so that [f] can look farther in the list when applied. + [f l] returns [Continue] if there're more elements to consume, + [Continue_with(left,right)] if there's more elements to consume + but we want to choose what goes to the left part and what remains + to process (right part), and returns [Split(left,right)] if + the splitting is decided. + When [f] is applied to an empty list, if it returns [Continue] + then the result will be [None]. + + If [excl] is given, then [excl] is applied before [f] is, to check + if the splitting should be stopped right away. When the split + fails, it returns [None]. *) + + +val fsplit : + ?excl:('a list -> bool) -> + f:'a split -> 'a list -> ('a list * 'a list) option +(** [fsplit ?excl ~f l] returns [Some(List.rev x, y)] + if [fsplit ?excl ~f l] returns [Some(x,y)], else it returns [None]. *) + +val id_of_string : < mangle : string -> string; .. > -> string -> string +(** [id_of_string ids id] returns a mangled version of [id], using the + method [ids#mangle]. If you don't need mangling, you may use + [object method mangle x = x end] for [ids]. However, the name + [ids] also means that your object should have knowledge of all IDs + it has issued, in order to avoid collision. This is why + [id_of_string] asks for an object rather than "just a + function". *) + +val htmlentities : ?md:bool -> string -> string +(** [htmlentities s] returns a new string in which html-significant + characters have been converted to html entities. For instance, + "" is converted to "<Foo&Bar>". *) + +val minimalize_blanks : string -> string +(** [minimalize_blanks s] returns a copy of [s] in which the first and last + characters are never blank, and two consecutive blanks never happen. *) + + +val eat : ('a -> bool) -> 'a list -> 'a list +(** [eat f l] returns [l] where elements satisfying [f] have been removed, + but it stops removing as soon as one element doesn't satisfy [f]. *) + + +val extract_html_attributes : string -> (string * string) list +(** Takes some HTML and returns the list of attributes of the first + HTML tag. + Notes: + * Doesn't check the validity of HTML tags or attributes. + * Doesn't support backslash escaping. + * Attribute names are delimited by the space and equal characters. + * Attribute values are either delimited by the double quote + or the simple quote character. +*) + +val extract_inner_html : string -> string +(** Takes an HTML node and returns the contents of the node. + If it's not given a node, it returns something rubbish. +*) + +val html_void_elements : StringSet.t +(** HTML void elements *) + +val ( @ ) : 'a list -> 'a list -> 'a list +(** Tail-recursive version of [Pervasives.(@)]. *) diff --git a/ocaml-lsp-server/src/omd/src/omd_xtxt.ml b/ocaml-lsp-server/src/omd/src/omd_xtxt.ml new file mode 100644 index 000000000..68b49f66a --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_xtxt.ml @@ -0,0 +1,28 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +(* xtxt = eXTernal eXTension *) + +(* let extensions = ref [] *) + +(* let get () = *) +(* !extensions *) + +(* let register e = *) +(* extensions := e :: !extensions *) + +(* let set es = extensions := es *) + +(* let activate ... *) + +(* (\* let deactivate ... *\) *) + +(* priority (integer?) *) +(* pre-extension *) +(* post-extension *) + + diff --git a/ocaml-lsp-server/src/omd/src/omd_xtxt.mli b/ocaml-lsp-server/src/omd/src/omd_xtxt.mli new file mode 100644 index 000000000..9c1d8749b --- /dev/null +++ b/ocaml-lsp-server/src/omd/src/omd_xtxt.mli @@ -0,0 +1,9 @@ +(***********************************************************************) +(* omd: Markdown frontend in OCaml *) +(* (c) 2013 by Philippe Wang *) +(* Licence : ISC *) +(* http://www.isc.org/downloads/software-support-policy/isc-license/ *) +(***********************************************************************) + +(** xtxt = eXTernal eXTension *) +