fix: taglink special-cases |(| |{| …

justinmk · justinmk · commit c8aeb9490a40 · 2022-10-19T16:35:34.000+02:00
diff --git a/README.md b/README.md
@@ -46,9 +46,10 @@ Known issues
   `:help lcs-tab`.
 - `url` doesn't handle _surrounding_ parens. E.g. `(https://example.com/#yay)` yields `word`
 - `url` doesn't handle _nested_ parens. E.g. `(https://example.com/(foo)#yay)`
-- `column_heading` currently only recognizes tilde "~" preceded by space (i.e.
-  "foo ~" not "foo~"). This covers 99% of :help files, but the grammar should
+- `column_heading` currently only recognizes tilde `~` preceded by space (i.e.
+  `foo ~` not `foo~`). This covers 99% of :help files, but the grammar should
   probably support "foo~" also.
+- `column_heading` children should be plaintext. Currently its children are parsed as `$._atom`.
 
 TODO
 ----
diff --git a/corpus/arguments.txt b/corpus/arguments.txt
@@ -92,11 +92,11 @@ EXTERNAL *netrw-externapp* {{{2
 (help_file
   (block
     (line
-      (word)
-      (ERROR
-        (word))
       (argument
-        (word))
+        (word)
+        (ERROR
+          (word)
+          (word)))
       (word)
       (codespan
         (word))
diff --git a/corpus/optionlink.txt b/corpus/optionlink.txt
@@ -107,11 +107,11 @@ number: '04' 'ISO-10646-1' 'python3'
         (MISSING "*"))
       (word)
       (word)
-      (word)
-      (ERROR
-        (word))
       (argument
-        (word))
+        (word)
+        (ERROR
+          (word)
+          (word)))
       (word)
       (word))
     (line
diff --git a/corpus/taglink.txt b/corpus/taglink.txt
@@ -15,6 +15,8 @@ taglink alone
 ================================================================================
 taglink in text
 ================================================================================
+|(|, |)|, |`|, |{|, |}|.
+
 Hello |world| hello
 
 |-+|	+[num]	line
@@ -26,6 +28,23 @@ Hello |world| hello
 --------------------------------------------------------------------------------
 
 (help_file
+  (block
+    (line
+      (taglink
+        (word))
+      (word)
+      (taglink
+        (word))
+      (word)
+      (taglink
+        (word))
+      (word)
+      (taglink
+        (word))
+      (word)
+      (taglink
+        (word))
+      (word)))
   (block
     (line
       (word)
diff --git a/grammar.js b/grammar.js
@@ -199,7 +199,15 @@ module.exports = grammar({
     // Link to option: 'foo'. Lowercase non-digit ASCII, minimum 2 chars. #14
     optionlink: ($) => _word($, /[a-z][a-z]+/, "'", "'"),
     // Link to tag: |foo|
-    taglink: ($) => _word($, /[^|\n\t ]+/, '|', '|'),
+    taglink: ($) => _word($, choice(
+          token.immediate(/[^|\n\t ]+/),
+          // Special cases: |(| |{| …
+          token.immediate('{'),
+          token.immediate('}'),
+          token.immediate('('),
+          token.immediate(')'),
+          token.immediate('`'),
+    ), '|', '|'),
     // Inline code (may contain whitespace!): `foo bar`
     codespan: ($) => _word($, /[^``\n]+/, '`', '`'),
     // Argument: {arg}
@@ -208,9 +216,10 @@ module.exports = grammar({
 });
 
 // Word delimited by special chars.
-// The word_regex capture is aliased to "word" because they are semantically
-// the same: atoms of captured plain text.
-function _word($, word_regex, c1, c2, fname) {
+// `rule` can be a rule function or regex. It is aliased to "word" because they are
+// semantically the same: atoms of captured plain text.
+function _word($, rule, c1, c2, fname) {
+  rule = rule.test !== undefined ? token.immediate(rule) : rule
   fname = fname ?? 'text';
-  return seq(c1, field(fname, alias(token.immediate(word_regex), $.word)), token.immediate(c2));
+  return seq(c1, field(fname, alias(rule, $.word)), token.immediate(c2));
 }