1616import java .util .Map ;
1717import java .util .Set ;
1818import java .util .TreeMap ;
19+ import java .util .regex .Matcher ;
20+ import java .util .regex .Pattern ;
1921import java .util .stream .Collectors ;
2022
2123import org .eclipse .xtext .Grammar ;
2224import org .eclipse .xtext .GrammarUtil ;
2325import org .eclipse .xtext .TerminalRule ;
2426
2527import com .google .common .base .Joiner ;
28+ import com .google .common .collect .Iterables ;
2629import com .google .gson .annotations .Expose ;
2730
2831/**
@@ -125,8 +128,8 @@ protected TextMateGrammar init(Grammar grammar) {
125128 }
126129 TextMateGrammar result = new TextMateGrammar ();
127130 result .setScopeName (scopeName );
128- TextMateRule keywords = getKeywordControlRule (grammar , ignoreCase );
129- result .addRule (keywords );
131+ result . addRule ( getKeywordControlRule (grammar , ignoreCase ) );
132+ result .addRule (getPunctuationRule ( grammar , ignoreCase ) );
130133
131134 Set <String > seenTerminalRules = new HashSet <>();
132135 for (TextMateRule pattern : patterns ) {
@@ -164,7 +167,7 @@ protected AutoRule newAutoRule() {
164167 protected String getLanguageName (Grammar grammar ) {
165168 return GrammarUtil .getSimpleName (grammar ).toLowerCase (Locale .ROOT );
166169 }
167-
170+
168171 protected TextMateRule getKeywordControlRule (Grammar grammar , boolean ignoreCase ) {
169172 StringBuilder matchBuilder = new StringBuilder ();
170173 if (ignoreCase ) {
@@ -173,15 +176,54 @@ protected TextMateRule getKeywordControlRule(Grammar grammar, boolean ignoreCase
173176 matchBuilder .append ("\\ b(" );
174177 List <String > allKeywords = GrammarUtil .getAllKeywords (grammar )
175178 .stream ()
176- .filter (s -> s .matches ("\\ w+" ))
179+ .filter (s -> s .matches ("\\ w+" ))
177180 .sorted (Comparator .naturalOrder ())
178181 .collect (Collectors .toList ());
179- matchBuilder . append ( Joiner .on ("|" ).join ( allKeywords ) );
182+ Joiner .on ("|" ).appendTo ( matchBuilder , allKeywords );
180183 matchBuilder .append (")\\ b" );
181184 MatchRule result = new MatchRule ();
182185 result .setName ("keyword.control." + getLanguageName (grammar ));
183186 result .setMatch (matchBuilder .toString ());
184187 return result ;
185188 }
186189
190+ protected TextMateRule getPunctuationRule (Grammar grammar , boolean ignoreCase ) {
191+ StringBuilder matchBuilder = new StringBuilder ();
192+ if (ignoreCase ) {
193+ matchBuilder .append ("(?i)" );
194+ }
195+ matchBuilder .append ("(" );
196+ List <String > allKeywords = GrammarUtil .getAllKeywords (grammar )
197+ .stream ()
198+ .filter (s -> !s .matches ("\\ w+" ))
199+ .sorted (Comparator .naturalOrder ())
200+ .collect (Collectors .toList ());
201+ Joiner .on ("|" ).appendTo (matchBuilder , Iterables .transform (allKeywords , this ::escapeAndAddWordBoundaries ));
202+ matchBuilder .append (")" );
203+ MatchRule result = new MatchRule ();
204+ result .setName ("punctuation." + getLanguageName (grammar ));
205+ result .setMatch (matchBuilder .toString ());
206+ return result ;
207+ }
208+
209+ private static final Pattern START_IS_LETTER = Pattern .compile ("^\\ w" );
210+ private static final Pattern END_IS_LETTER = Pattern .compile ("\\ w$" );
211+ protected String escapeAndAddWordBoundaries (String token ) {
212+ StringBuilder result = new StringBuilder ();
213+ if (START_IS_LETTER .matcher (token ).find ()) {
214+ result .append ("\\ b" );
215+ }
216+ result .append (escapeForRegex (token ));
217+ if (END_IS_LETTER .matcher (token ).find ()) {
218+ result .append ("\\ b" );
219+ }
220+ return result .toString ();
221+ }
222+
223+ private static final Pattern REGEX_CONTROL_CHARS = Pattern .compile ("[\\ \\ ^$.*+?()\\ [\\ ]{}|]" );
224+ private static String escapeForRegex (String input ) {
225+ Matcher matcher = REGEX_CONTROL_CHARS .matcher (input );
226+ return matcher .replaceAll (match -> Matcher .quoteReplacement ("\\ " + match .group ()));
227+ }
228+
187229}
0 commit comments