1515// [http://www.gnu.org/licenses](http://www.gnu.org/licenses).
1616//
1717// `c.pest` - Pest parser definition for the C language
18- // ====================================================
18+ // =============================================================================
1919//
2020// Comments
21- // --------
21+ // -----------------------------------------------------------------------------
2222doc_block = _{ inline_comment | block_comment }
2323
24- // Per the [C standard, section
25- // 6.4.3](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3220.pdf#page=65),
24+ // Per the
25+ // [C standard, section 6.4.3](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3220.pdf#page=65),
2626// "white-space consists of: (space, horizontal tab, new-line, vertical tab, and
2727// form-feed)." Omit newlines, since the rest of this parser uses these.
2828vertical_tab = { "\x0B" }
2929form_feed = { "\x0C" }
3030white_space = { (" " | "\t" | vertical_tab | form_feed)* }
3131
32- // The [C standard, section
33- // 6.4.9](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3220.pdf#page=65),
32+ // The
33+ // [C standard, section 6.4.9](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3220.pdf#page=65),
3434// defines inline and block comments.
3535//
3636// ### Inline comments
@@ -50,110 +50,106 @@ block_comment_closing_delim_1 = { unused }
5050block_comment_closing_delim_2 = { unused }
5151
5252// Code
53- // ----
53+ // -----------------------------------------------------------------------------
5454//
55- // Per the [C standard, section
56- // 5.1.1.2](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3220.pdf#page=24),
55+ // Per the
56+ // [C standard, section 5.1.1.2](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3220.pdf#page=24),
5757// if a line of code ends with a backslash, it continues on the next line. This
5858// is a logical line; treat it as a single line. Therefore, consider a
5959// backslash-newline (or anything that's not a newline) a part of the current
6060// logical line. Note that this parser doesn't apply this rule to comments
6161// (which, per the spec, it should) for several reasons:
6262//
63- // 1. Comments continued onto another line don't look like a comment; this
64- // would confuse most developers.
65- //
66- // 2. The backslash-newline in a comment creates a [hard line
67- // break](https://spec.commonmark.org/0.31.2/#hard-line-breaks) in Markdown,
68- // which means inserting a hard line break this way in an inline comment
69- // requires the next line to omit the inline comment delimiters. For
70- // example:
71- //
72- // ```C
73- // // This is a hard line break\
74- // followed by a comment which must not include the // inline comment
75- // // delimiter on the line after the line break, but which must
76- // include them on following lines.
77- // ```
78- //
79- // 3. The CodeChat Editor web-to-code function produces incorrect results in
80- // this case, adding a comment delimiter when it shouldn't. To fix this, it
81- // would have to look for a backslash newline only in C/C++-like languages.
63+ // 1. Comments continued onto another line don't look like a comment; this would
64+ // confuse most developers.
65+ //
66+ // 2. The backslash-newline in a comment creates a
67+ // [hard line break](https://spec.commonmark.org/0.31.2/#hard-line-breaks) in
68+ // Markdown, which means inserting a hard line break this way in an inline
69+ // comment requires the next line to omit the inline comment delimiters. For
70+ // example:
71+ //
72+ // ```C
73+ // // This is a hard line break\
74+ // followed by a comment which must not include the // inline comment
75+ // // delimiter on the line after the line break, but which must
76+ // include them on following lines.
77+ // ```
78+ //
79+ // 3. The CodeChat Editor web-to-code function produces incorrect results in
80+ // this case, adding a comment delimiter when it shouldn't. To fix this, it
81+ // would have to look for a backslash newline only in C/C++-like languages.
8282logical_line_char = _{ ("\\" ~ NEWLINE) | not_newline }
8383code_line_token = _{ logical_line_char }
8484
8585// Dedenter
86- // --------
86+ // -----------------------------------------------------------------------------
8787//
8888// This parser runs separately; it dedents block comments. There are several
8989// cases:
9090//
91- // * A single line: `/* comment */`. No special handling needed.
92- // * Multiple lines, in two styles.
93- // * Each line of the comment is not consistently whitespace-indented. No
94- // special handling needed. For example:
95- //
96- // ```C
97- // /* This is
98- // not
99- // consistently indented. */
100- // ```
101- //
102- // * Each line of the comment is consistently whitespace-indented; for
103- // example:
104- //
105- // ```C
106- // /* This is
107- // consistently indented. */
108- // ```
109- //
110- // Consistently indented means the first non-whitespace character on a
111- // line aligns with, but never comes before, the comment's start.
112- // Another example:
113- //
114- // ```C
115- // /* This is
116- // correct
117- //
118- // indentation.
119- // */
120- // ```
121- //
122- // Note that the third (blank) line doesn't have an indent; since that
123- // line consists only of whitespace, this is OK. Likewise, the last line
124- // (containing the closing comment delimiter of `*/`) consists only of
125- // whitespace after the comment delimiters are removed.
126- //
127- // * Each line of the comment is consistently asterisk-indented; for
128- // example:
129- //
130- // ```C
131- // /* This is
132- // * correct
133- // *
134- // * indentation.
135- // */
136- // ```
137- //
138- // Note that in this case, no whitespace-only lines are allowed.
139- // Instead, the special case is lines which have a newline immediately
140- // after the `*`.
91+ // * A single line: `/* comment */`. No special handling needed.
92+ // * Multiple lines, in two styles.
93+ // * Each line of the comment is not consistently whitespace-indented. No
94+ // special handling needed. For example:
95+ //
96+ // ```C
97+ // /* This is
98+ // not
99+ // consistently indented. */
100+ // ```
101+ //
102+ // * Each line of the comment is consistently whitespace-indented; for
103+ // example:
104+ //
105+ // ```C
106+ // /* This is
107+ // consistently indented. */
108+ // ```
109+ //
110+ // Consistently indented means the first non-whitespace character on a line
111+ // aligns with, but never comes before, the comment's start. Another
112+ // example:
113+ //
114+ // ```C
115+ // /* This is
116+ // correct
117+ //
118+ // indentation.
119+ // */
120+ // ```
121+ //
122+ // Note that the third (blank) line doesn't have an indent; since that line
123+ // consists only of whitespace, this is OK. Likewise, the last line
124+ // (containing the closing comment delimiter of `*/`) consists only of
125+ // whitespace after the comment delimiters are removed.
126+ //
127+ // * Each line of the comment is consistently asterisk-indented; for example:
128+ //
129+ // ```C
130+ // /* This is
131+ // * correct
132+ // *
133+ // * indentation.
134+ // */
135+ // ```
136+ //
137+ // Note that in this case, no whitespace-only lines are allowed. Instead,
138+ // the special case is lines which have a newline immediately after the `*`.
141139//
142140// To implement this dedenting, we must have two paths to accepting the contents
143141// of a block comment. Otherwise, this parser rejects the block (it cannot be
144142// dedented). The approach:
145143//
146- // 1. The space-indented path. This requires:
147- // 1. The first line ends with a newline. (`valid_first_line`)
148- // 2. Non-first lines with contents must be properly indented. If a
149- // non-first line ends in a newline, it must not be the last line.
150- // (`dedented_line`)
151- // 3. A whitespace-only line must not be the last line, unless it has
152- // exactly the indent needed to align the closing comment delimiter
153- // (`last_line`).
154- // 2. The asterisk-indented path. The requirements are the same as the
155- // space-indented path, though the proper indent includes an asterisk in the
156- // correct location.
144+ // 1. The space-indented path. This requires:
145+ // 1. The first line ends with a newline. (`valid_first_line`)
146+ // 2. Non-first lines with contents must be properly indented. If a non-first
147+ // line ends in a newline, it must not be the last line. (`dedented_line`)
148+ // 3. A whitespace-only line must not be the last line, unless it has exactly
149+ // the indent needed to align the closing comment delimiter (`last_line`).
150+ // 2. The asterisk-indented path. The requirements are the same as the
151+ // space-indented path, though the proper indent includes an asterisk in the
152+ // correct location.
157153dedenter = {
158154 SOI ~ indent ~ valid_first_line ~ (valid_space_line+ | valid_star_line+) ~ DROP ~ EOI
159155}
0 commit comments