@@ -58,14 +58,17 @@ alias RscGrammar = Grammar;
5858}
5959
6060@description {
61- The conversion consists of two stages:
61+ The conversion consists of three stages:
62+ - preprocessing (function `preprocess`);
6263 - analysis (function `analyze`);
6364 - transformation (function `transform`).
6465
65- The aim of the analysis stage is to select those productions of the Rascal
66- grammar that are "suitable for conversion" to TextMate rules. The aim of the
67- transformation stage is to subsequently convert those productions and
68- produce a TextMate grammar.
66+ The aim of the preprocessing stage is to slightly massage the Rascal grammar
67+ to make analysis and transformation easier (e.g., replace singleton ranges
68+ with just the corresponding literal). The aim of the analysis stage is to
69+ select those productions of the Rascal grammar that are "suitable for
70+ conversion" to TextMate rules. The aim of the transformation stage is to
71+ subsequently convert those productions and produce a TextMate grammar.
6972
7073 To be able to cleanly separate analysis and transformation, productions
7174 selected during the analysis stage are wrapped into *conversion units* that
@@ -161,8 +164,6 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc)
161164 Each production in the list (including the synthetic ones) is *suitable for
162165 conversion* to a TextMate rule. A production is "suitable for conversion"
163166 when it satisfies each of the following conditions:
164- - it is non-recursive;
165- - it does not match newlines;
166167 - it does not match the empty word;
167168 - it has a `@category` tag.
168169
@@ -171,36 +172,42 @@ private RscGrammar replaceLegacySemanticTokenTypes(RscGrammar rsc)
171172
172173@description {
173174 The analysis consists of three stages:
174- 1. selection of user-defined productions;
175- 2. creation of synthetic delimiters production;
176- 3. creation of synthetic keywords production.
177-
178- In stage 1, a dependency graph among all productions that occur in `rsc`
179- (specifically: `prod` constructors) is created. This dependency graph is
180- subsequently pruned to keep only the suitable-for-conversion productions:
181- - first, productions with a cyclic dependency on themselves are removed;
182- - next, productions that only involve single-line matching are retained;
183- - next, productions that only involve non-empty word matching are retained;
184- - next, productions that have a `@category` tag are retained.
175+ 1. selection of user-defined productions;
176+ 2. creation of synthetic delimiters production;
177+ 3. creation of synthetic keywords production;
178+ 4. wrapping of productions inside conversion units.
179+
180+ In stage 1, each user-defined production (specifically: `prod` constructor)
181+ that occurs in `rsc` is selected for conversion when it fulfils the
182+ following requirements:
183+ - it has a unique `@category` tag;
184+ - it doesn't match the empty word.
185185
186186 In stage 2, the set of all delimiters that occur in `rsc` is created. This
187187 set is subsequently reduced by removing:
188188 - strict prefixes of delimiters;
189- - delimiters that enclose user-defined productions;
190- - delimiters that occur at the beginning of user-defined productions.
189+ - delimiters that also occur as outer delimiters of
190+ suitable-for-conversion productions;
191+ - delimiters that also occur as inner delimiters of
192+ suitable-for-conversion productions.
191193
192194 In stage 3, the set of all keywords that occur in `rsc` is created.
195+
196+ In stage 4, each suitable-for-conversion production is wrapped in a
197+ conversion unit with additional metadata (e.g., the inner/outer delimiters
198+ of the production). The list of conversion units is subsequently reduced
199+ by removing strict prefixes, and sorted.
193200}
194201
195202list [ConversionUnit ] analyze (RscGrammar rsc , str name ) {
196203 str jobLabel = "Analyzing<name == "" ? "" : " (<name > )" > " ;
197204 jobStart (jobLabel , work = 6 );
198205
199- // Analyze productions
206+ // Stage 1: Analyze productions
200207 jobStep (jobLabel , "Analyzing productions" );
201208 list [Production ] prods = [p | /p : prod (_, _, _) <- rsc ];
202209
203- // Analyze categories
210+ // Stage 1: Analyze categories
204211 jobStep (jobLabel , "Analyzing categories" );
205212 prods = for (p <- prods ) {
206213
@@ -221,24 +228,24 @@ list[ConversionUnit] analyze(RscGrammar rsc, str name) {
221228 append p ;
222229 }
223230
224- // Analyze emptiness
231+ // Stage 1: Analyze emptiness
225232 jobStep (jobLabel , "Analyzing emptiness" );
226233 prods = [p | p <- prods , !tryParse (rsc , delabel (p .def ), "" )];
227234
228- // Analyze delimiters
235+ // Stage 2: Analyze delimiters
229236 jobStep (jobLabel , "Analyzing delimiters" );
230237 set [Symbol ] delimiters = {s | /Symbol s := rsc , isDelimiter (delabel (s ))};
231238 delimiters &= removeStrictPrefixes (delimiters );
232239 delimiters -= {s | p <- prods , /just (s ) := getOuterDelimiterPair (rsc , p )};
233240 delimiters -= {s | p <- prods , /just (s ) := getInnerDelimiterPair (rsc , p , getOnlyFirst = true )};
234241 list [Production ] prodsDelimiters = [prod (lex (DELIMITERS_PRODUCTION_NAME ), [\alt (delimiters )], {})];
235242
236- // Analyze keywords
243+ // Stage 3: Analyze keywords
237244 jobStep (jobLabel , "Analyzing keywords" );
238245 set [Symbol ] keywords = {s | /Symbol s := rsc , isKeyword (delabel (s ))};
239246 list [Production ] prodsKeywords = [prod (lex (KEYWORDS_PRODUCTION_NAME ), [\alt (keywords )], {\tag ("category" ("keyword.control" ))})];
240247
241- // Prepare units
248+ // Stage 4: Prepare units
242249 jobStep (jobLabel , "Preparing units" );
243250 bool isEmptyProd (prod (_, [\alt (alternatives )], _))
244251 = alternatives == {};
@@ -260,8 +267,13 @@ list[ConversionUnit] analyze(RscGrammar rsc, str name) {
260267
261268@description {
262269 The transformation consists of two stages:
263- 1. creation of TextMate rules;
264- 2. composition of TextMate rules into a TextMate grammar.
270+ 1. creation of TextMate rules;
271+ 2. composition of TextMate rules into a TextMate grammar.
272+
273+ Stage 1 is organized as a pipeline that, step-by-step, adds names and rules
274+ to the conversion units. First, it adds unique names. Next, it adds "inner
275+ rules". Last, it adds "outer rules". See module
276+ `lang::textmate::ConversionUnit` for an explanation of inner/outer rules.
265277}
266278
267279TmGrammar transform (list [ConversionUnit ] units , str name , NameGeneration nameGeneration = long ()) {
0 commit comments