Skip to content

Commit 5c325a5

Browse files
authored
Merge pull request #22 from SWAT-engineering/ignore-category-production-inside-category-production
Support nested categories
2 parents 09aab8c + 10284f9 commit 5c325a5

File tree

18 files changed

+497
-79
lines changed

18 files changed

+497
-79
lines changed

rascal-textmate-core/.editorconfig

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Editor configuration, see http://editorconfig.org
2+
root = true
3+
4+
[*]
5+
charset = utf-8
6+
indent_style = space
7+
indent_size = 2
8+
insert_final_newline = true
9+
trim_trailing_whitespace = true
10+
max_line_length = 80
11+
12+
[*.sh]
13+
end_of_line = lf
14+
15+
[*.java]
16+
indent_size = 4
17+
max_line_length = 120
18+
19+
[*.rsc]
20+
indent_size = 4
21+
max_line_length = 120

rascal-textmate-core/.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
target
2-
node_modules
2+
node_modules
3+
4+
src/main/rascal/Scratch.rsc

rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,15 @@ RegExp toRegExp(Grammar g, list[Symbol] symbols, set[Attr] attributes) {
6060
RegExp toRegExp(Grammar g, \label(_, symbol))
6161
= toRegExp(g, symbol);
6262
RegExp toRegExp(Grammar g, \parameter(_, _)) {
63-
throw "Presumably unreachable..."; } // Covered by `lookup` (which substitutes actuals for formals)
63+
throw "Presumably unreachable..."; } // Covered by `prodsOf` (which substitutes actuals for formals)
6464
6565
// `ParseTree`: Start
6666
RegExp toRegExp(Grammar g, \start(symbol))
6767
= toRegExp(g, symbol);
6868
6969
// `ParseTree`: Non-terminals
7070
RegExp toRegExp(Grammar g, Symbol s)
71-
= infix("|", [toRegExp(g, p) | p <- lookup(g, s)]) when isNonTerminalType(s);
71+
= infix("|", [toRegExp(g, p) | p <- prodsOf(g, s)]) when isNonTerminalType(s);
7272
7373
// `ParseTree`: Terminals
7474
RegExp toRegExp(Grammar _, \lit(string))
@@ -103,7 +103,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) {
103103
prefixConditions = [c | c <- conditions, isPrefixCondition(c)];
104104
suffixConditions = [c | c <- conditions, isSuffixCondition(c)];
105105
deleteConditions = [c | c <- conditions, isDeleteCondition(c)];
106-
106+
107107
// Convert except conditions (depends on previous conversion)
108108
if (_ <- exceptConditions) {
109109
if (/\choice(symbol, alternatives) := g) {
@@ -112,7 +112,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) {
112112
= \label(l, _) := def
113113
? \except(l) notin exceptConditions
114114
: true;
115-
115+
116116
re = infix("|", toRegExps(g, {a | a <- alternatives, keep(a)}));
117117
}
118118
}
@@ -130,7 +130,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) {
130130
// Convert delete conditions (depends on previous conversions)
131131
if (_ <- deleteConditions) {
132132
RegExp delete = infix("|", [toRegExp(g, s) | \delete(s) <- deleteConditions]);
133-
133+
134134
// TODO: Explain this complicated conversion...
135135
str string = "(?=(?\<head\><re.string>)(?\<tail\>.*)$)(?!(?:<delete.string>)\\k\<tail\>$)\\k\<head\>";
136136
list[str] categories = ["", *re.categories, "", *delete.categories];
@@ -196,7 +196,7 @@ str encode(int char) = preEncoded[char] ? "\\x{<toHex(char)>}";
196196
private set[int] charRange(str from, str to) = {*[charAt(from, 0)..charAt(to, 0) + 1]};
197197
198198
private str toHex(int i)
199-
= i < 16
199+
= i < 16
200200
? hex[i]
201201
: toHex(i / 16) + toHex(i % 16);
202202

rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,18 @@ bool tryParse(Grammar g, Symbol s, str input, bool allowAmbiguity = false) {
3737
Checks if symbol `s` is recursive in grammar `g`
3838
}
3939

40-
bool isRecursive(Grammar g, Symbol s) {
41-
set[Symbol] getChildren(Symbol s)
42-
= {s | p <- lookup(g, s), /Symbol s := p.symbols};
40+
bool isRecursive(Grammar g, Symbol s, set[Symbol] checking = {})
41+
= s in checking || any(p <- prodsOf(g, delabel(s)),
42+
/Symbol child := p.symbols,
43+
isRecursive(g, child, checking = checking + s));
4344
44-
bool check(set[Symbol] checking, Symbol s)
45-
= s in checking
46-
? true
47-
: any(child <- getChildren(s), check(checking + s, child));
48-
49-
return check({}, s);
45+
@synopsis{
46+
Checks if production `p` is recursive in grammar `g`
5047
}
5148
49+
bool isRecursive(Grammar g, Production p)
50+
= any(/Symbol s := p.symbols, isRecursive(g, s));
51+
5252
@synopsis{
5353
Representation of a pointer to a symbol in (the list of symbols of) a
5454
production. This is useful to distinguish between different occurrences of
@@ -70,7 +70,7 @@ alias Pointer = tuple[Production p, int index];
7070
7171
```
7272
lexical X = Y;
73-
lexical Y = alt1: "[" "[" "[" Z1 "]" "]" "]" | alt2: "<" Z2 ">";
73+
lexical Y = alt1: "[" "[" "[" Z1 "]" "]" "]" | alt2: "<" Z2 ">";
7474
lexical Z1 = "foo" "bar";
7575
lexical Z2 = "baz";
7676
```
@@ -80,7 +80,7 @@ alias Pointer = tuple[Production p, int index];
8080
- `<X,0>`
8181
- `<Y.alt1,3>`
8282
- `<Z1,1>`
83-
83+
8484
The list of pointers to `"qux"` is just empty.
8585
}
8686
@@ -92,7 +92,7 @@ list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward())
9292
if (ith == needle) {
9393
return [<haystack, i>];
9494
}
95-
for (isNonTerminalType(ith), child <- lookup(g, ith)) {
95+
for (isNonTerminalType(ith), child <- prodsOf(g, ith)) {
9696
if (list[Pointer] l: [_, *_] := doFind(doing + haystack, child, s)) {
9797
return [<haystack, i>] + l;
9898
}
@@ -106,19 +106,26 @@ list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward())
106106
}
107107
108108
@synopsis{
109-
Lookups a list of productions for symbol `s` in grammar `g`, replacing
109+
Gets the list of productions that contain symbol `s` in grammar `g`
110+
}
111+
112+
set[Production] prodsWith(Grammar g, Symbol s)
113+
= {parent | /parent: prod(_, /Symbol _: s, _) := g};
114+
115+
@synopsis{
116+
Gets the list of productions of symbol `s` in grammar `g`, replacing
110117
formal parameters with actual parameters when needed
111118
}
112119
113-
list[Production] lookup(Grammar g, s: \parameterized-sort(name, actual))
120+
list[Production] prodsOf(Grammar g, s: \parameterized-sort(name, actual))
114121
= [subst(p, formal, actual) | /p: prod(\parameterized-sort(name, formal), _, _) := g.rules[s] ? []]
115122
+ [subst(p, formal, actual) | /p: prod(label(_, \parameterized-sort(name, formal)), _, _) := g.rules[s] ? []];
116123
117-
list[Production] lookup(Grammar g, s: \parameterized-lex(name, actual))
124+
list[Production] prodsOf(Grammar g, s: \parameterized-lex(name, actual))
118125
= [subst(p, formal, actual) | /p: prod(\parameterized-lex(name, formal), _, _) := g.rules[s] ? []]
119126
+ [subst(p, formal, actual) | /p: prod(label(_, \parameterized-lex(name, formal)), _, _) := g.rules[s] ? []];
120127
121-
default list[Production] lookup(Grammar g, Symbol s)
128+
default list[Production] prodsOf(Grammar g, Symbol s)
122129
= [p | /p: prod(s, _, _) := g.rules[s] ? []]
123130
+ [p | /p: prod(label(_, s), _, _) := g.rules[s] ? []];
124131
@@ -130,7 +137,7 @@ default list[Production] lookup(Grammar g, Symbol s)
130137
&T subst(&T t, list[Symbol] from, list[Symbol] to)
131138
= subst(t, toMapUnique(zip2(from, to)))
132139
when size(from) == size(to);
133-
140+
134141
private &T subst(&T t, map[Symbol, Symbol] m)
135142
= visit (t) { case Symbol s => m[s] when s in m };
136143
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
module lang::rascal::grammar::analyze::Categories
2+
3+
import Grammar;
4+
import ParseTree;
5+
6+
import lang::rascal::grammar::Util;
7+
8+
@synopsis{
9+
Special value to indicate that a production has no category
10+
}
11+
12+
public str NO_CATEGORY = "";
13+
14+
@synopsis{
15+
Gets a set of categories such that, for each category, there exists a string
16+
with that category produced by production `p`, as part of a string produced
17+
by a start production of grammar `g`
18+
}
19+
20+
set[str] getCategories(Grammar g, Production p)
21+
= getCategoriesByProduction(g)[p];
22+
23+
@memo
24+
private map[Production, set[str]] getCategoriesByProduction(Grammar g) {
25+
map[Production, set[str]] ret = (p: {} | /p: prod(_, _, _) := g);
26+
27+
void doGet(Production p, set[str] parentCategories) {
28+
set[str] categories = {c | /\tag("category"(str c)) := p};
29+
30+
set[str] old = ret[p];
31+
set[str] new = _ <- categories ? categories : old + parentCategories;
32+
ret[p] = new;
33+
34+
// If the new categories of `p` are different from the old ones, then
35+
// propagate these changes to the children of `p`
36+
for (old != new, /Symbol s := p.symbols, child <- prodsOf(g, delabel(s))) {
37+
doGet(child, new);
38+
}
39+
}
40+
41+
// Propagate categories from the roots of the grammar
42+
for (root: prod(\start(_), _, _) <- ret) {
43+
doGet(root, {NO_CATEGORY});
44+
}
45+
46+
return ret;
47+
}

rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ DelimiterPair getInnerDelimiterPair(Grammar g, Symbol s, bool getOnlyFirst = fal
4949
```
5050
lexical X = Y;
5151
lexical Y = Y1 | Y2;
52-
lexical Y1 = "[" Z "]";
52+
lexical Y1 = "[" Z "]";
5353
lexical Y2 = "[" Z ")" [a-z];
5454
lexical Z = [a-z];
5555
```
@@ -83,7 +83,7 @@ private map[Symbol, Maybe[Symbol]] getInnerDelimiterBySymbol(Grammar g, Directio
8383
@memo
8484
private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g, Direction direction, bool getOnlyFirst = false) {
8585
map[Production, Maybe[Symbol]] ret = (p: nothing() | /p: prod(_, _, _) := g);
86-
86+
8787
solve (ret) {
8888
for (p <- ret, ret[p] == nothing()) {
8989
for (s <- reorder(p.symbols, direction)) {
@@ -108,7 +108,7 @@ private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g,
108108
}
109109
110110
private set[Production] getChildren(Grammar g, Symbol s)
111-
= {*lookup(g, s)};
111+
= {*prodsOf(g, s)};
112112
113113
@synopsis{
114114
Gets the unique rightmost delimiter (`begin`) and the unique leftmost
@@ -122,7 +122,7 @@ private set[Production] getChildren(Grammar g, Symbol s)
122122
```
123123
lexical X = Y;
124124
lexical Y = Y1 | Y2;
125-
lexical Y1 = "[" Z "]";
125+
lexical Y1 = "[" Z "]";
126126
lexical Y2 = "[" Z ")" [a-z];
127127
lexical Z = [a-z];
128128
```
@@ -166,7 +166,7 @@ private map[Symbol, Maybe[Symbol]] getOuterDelimiterBySymbol(Grammar g, Directio
166166
ret[s] = unique(delimiters);
167167
}
168168
}
169-
169+
170170
return ret;
171171
}
172172

rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ private map[Production, Maybe[set[Segment]]] getSegmentsByProduction(Grammar g)
5555
}
5656
5757
private Maybe[set[Segment]] getSegmentsWithEnvironment(
58-
Grammar g, list[Symbol] symbols,
58+
Grammar g, list[Symbol] symbols,
5959
map[Production, Maybe[set[Segment]]] env) {
6060
6161
// General idea: Recursively traverse `symbols` from left to right, while
@@ -73,9 +73,9 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
7373
set[Symbol] nested = {s | /Symbol s := head};
7474
7575
Maybe[set[Segment]] finished = get(running, [], final = tail == []);
76-
76+
7777
// If the head contains a non-terminal, then: (1) finish the running
78-
// segment; (2) lookup the segments of the non-terminals in the
78+
// segment; (2) look up the segments of the non-terminals in the
7979
// environment, if any; (3) compute the segments of the tail. Return the
8080
// union of 1-3.
8181
if (any(s <- nested, isNonTerminalType(s))) {
@@ -85,15 +85,15 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
8585
sets += finished;
8686
8787
// (2)
88-
sets += for (s <- nested, isNonTerminalType(s), p <- lookup(g, s)) {
88+
sets += for (s <- nested, isNonTerminalType(s), p <- prodsOf(g, s)) {
8989
9090
bool isInitial(Segment seg)
9191
= seg.initial && running.initial && running.symbols == [];
9292
bool isFinal(Segment seg)
9393
= seg.final && tail == [];
9494
Segment update(Segment seg)
9595
= seg[initial = isInitial(seg)][final = isFinal(seg)];
96-
96+
9797
append just(segs) := env[p] ? just({update(seg) | seg <- segs}) : nothing();
9898
}
9999
@@ -103,21 +103,21 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
103103
// Return union
104104
return (sets[0] | union(it, \set) | \set <- sets[1..]);
105105
}
106-
106+
107107
// If the head doesn't contain a non-terminal, but it has a newline,
108108
// then: (1) finish the running segment; (2) compute the segments of the
109109
// tail. Return the union of 1-2. Note: the head, as it has a newline,
110110
// is ignored and won't be part of any segment.
111111
else if (any(s <- nested, hasNewline(g, s))) {
112112
return union(finished, get(segment([]), tail));
113113
}
114-
114+
115115
// If the head doesn't contain a non-terminal, and if it doesn't have a
116116
// newline, then add the head to the running segment and proceed with
117117
// the tail.
118118
else {
119119
Segment old = running;
120-
Segment new = old[symbols = old.symbols + head];
120+
Segment new = old[symbols = old.symbols + head];
121121
return get(new, tail);
122122
}
123123
}
@@ -130,7 +130,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
130130
}
131131
132132
bool hasNewline(Grammar g, Symbol s) {
133-
return any(p <- lookup(g, delabel(s)), hasNewline(g, p));
133+
return any(p <- prodsOf(g, delabel(s)), hasNewline(g, p));
134134
}
135135
136136
@synopsis{
@@ -149,7 +149,7 @@ private map[Production, bool] hasNewlineByProduction(Grammar g) {
149149
for (p <- ret, !ret[p]) {
150150
set[Symbol] nonTerminals = {s | /Symbol s := p.symbols, isNonTerminalType(s)};
151151
ret[p] = ret[p] || any(/r: range(_, _) := p.symbols, hasNewline(r))
152-
|| any(s <- nonTerminals, Production child <- lookup(g, s), ret[child]);
152+
|| any(s <- nonTerminals, Production child <- prodsOf(g, s), ret[child]);
153153
}
154154
}
155155
@@ -165,7 +165,7 @@ private map[Production, bool] hasNewlineByProduction(Grammar g) {
165165
166166
bool hasNewline(str s)
167167
= LF in chars(s);
168-
168+
169169
bool hasNewline(range(begin, end))
170170
= begin <= LF && LF <= end;
171171

rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ private map[Symbol, Maybe[set[Symbol]]] firstBySymbol(Grammar g, bool(Symbol) pr
5656
for (s <- ret, nothing() == ret[s]) {
5757
if (predicate(s)) {
5858
ret[s] = just({s});
59-
} else if (list[Production] prods: [_, *_] := lookup(g, s)) {
59+
} else if (list[Production] prods: [_, *_] := prodsOf(g, s)) {
6060
ret[s] = (just({}) | union(it, firstOf(reorder(p.symbols, dir))) | p <- prods);
6161
} else {
6262
ret[s] = just({\empty()});
@@ -84,7 +84,7 @@ set[Symbol] follow(Grammar g, Symbol s)
8484
@memo
8585
private map[Symbol, Maybe[set[Symbol]]] followBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) {
8686
map[Symbol, Maybe[set[Symbol]]] ret = (delabel(s): nothing() | s <- g.rules); // Non-terminals
87-
87+
8888
Maybe[set[Symbol]] followOf(Symbol parent, [])
8989
= ret[delabel(parent)];
9090
Maybe[set[Symbol]] followOf(Symbol parent, [h, *t])
@@ -142,6 +142,8 @@ private default Maybe[int] max(Maybe[int] _, Maybe[int] _) = nothing();
142142
Computes the length of a terminal symbol as a range
143143
}
144144
145+
Range length(label(_, symbol)) = length(symbol);
146+
145147
Range length(\lit(string)) = <size(string), just(size(string))>;
146148
Range length(\cilit(string)) = <size(string), just(size(string))>;
147149
Range length(\char-class(_)) = <1, just(1)>;

0 commit comments

Comments
 (0)