Skip to content

Commit 09cce09

Browse files
committed
Add a unit test which checks that a simple example even parses. Turns out the \n needs to be moved to the end
1 parent 5a258ba commit 09cce09

File tree

5 files changed

+87
-44
lines changed

5 files changed

+87
-44
lines changed

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
6262
case ALIGNRELN:{
6363
reverse = jj_consume_token(ALIGNRELN);
6464
node = SubNode(GraphRelation.ALIGNED_ROOT);
65-
jj_consume_token(10);
6665
break;
6766
}
6867
case 13:
@@ -74,19 +73,18 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
7473
label_1:
7574
while (true) {
7675
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
77-
case 11:{
76+
case 10:{
7877
;
7978
break;
8079
}
8180
default:
8281
jj_la1[0] = jj_gen;
8382
break label_1;
8483
}
85-
jj_consume_token(11);
84+
jj_consume_token(10);
8685
node = SubNode(GraphRelation.ITERATOR);
8786
children.add(node);
8887
}
89-
jj_consume_token(10);
9088
break;
9189
}
9290
default:
@@ -95,16 +93,16 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
9593
throw new ParseException();
9694
}
9795
if (children.size() > 1)
98-
node = new CoordinationPattern(true, children, true, true);
99-
if (deprecatedAmp) {
100-
{if (true) throw new SemgrexParseException("Use of & in semgrex patterns is now illegal. It is equivalent to the same expression without the &. Offending expression: " + startToken);}
101-
}
102-
if (deprecatedNodeConj) {
103-
{if (true) throw new SemgrexParseException("Use of node conjugation (expressions such as '< [foo bar]' or '< [foo & bar]') is now illegal. The issue is that expressions such as '[foo bar] < zzz' may intuitively mean that foo < zzz, bar < zzz, zzz the same for both cases, but that is not the way the parser interpreted this expression. Changing the functionality might break existing expressions, and anyway this can be rewritten in various ways such as 'zzz > foo > bar' or 'foo < zzz=a : bar < zzz=a'. Offending expression: " + startToken);}
104-
}
96+
node = new CoordinationPattern(true, children, true, true);
97+
if (deprecatedAmp) {
98+
{if (true) throw new SemgrexParseException("Use of & in semgrex patterns is now illegal. It is equivalent to the same expression without the &. Offending expression: " + startToken);}
99+
}
100+
if (deprecatedNodeConj) {
101+
{if (true) throw new SemgrexParseException("Use of node conjugation (expressions such as '< [foo bar]' or '< [foo & bar]') is now illegal. The issue is that expressions such as '[foo bar] < zzz' may intuitively mean that foo < zzz, bar < zzz, zzz the same for both cases, but that is not the way the parser interpreted this expression. Changing the functionality might break existing expressions, and anyway this can be rewritten in various ways such as 'zzz > foo > bar' or 'foo < zzz=a : bar < zzz=a'. Offending expression: " + startToken);}
102+
}
105103
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
106-
case 12:{
107-
jj_consume_token(12);
104+
case 11:{
105+
jj_consume_token(11);
108106
jj_consume_token(UNIQ);
109107
uniqKeys = new ArrayList<>();
110108
label_2:
@@ -122,12 +120,20 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
122120
nextIdentifier = identifier();
123121
uniqKeys.add(nextIdentifier.image);
124122
}
123+
for (String key : uniqKeys) {
124+
if (!knownVariables.contains(key)) {
125+
{if (true) throw new SemgrexParseException("Semgrex pattern |" + startToken + "| asked for uniq of node " + key + " which does not exist in the pattern");}
126+
}
127+
}
128+
// TODO: do shit here
129+
125130
break;
126131
}
127132
default:
128133
jj_la1[3] = jj_gen;
129134
;
130135
}
136+
jj_consume_token(12);
131137
{if ("" != null) return node;}
132138
throw new Error("Missing return statement in function");
133139
}
@@ -573,11 +579,11 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
573579
case IDENTIFIER:{
574580
attr = identifier();
575581
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
576-
case 11:
582+
case 10:
577583
case 24:{
578584
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
579-
case 11:{
580-
attrType = jj_consume_token(11);
585+
case 10:{
586+
attrType = jj_consume_token(10);
581587
break;
582588
}
583589
case 24:{
@@ -628,8 +634,8 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
628634
throw new ParseException();
629635
}
630636
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
631-
case 11:{
632-
attrType = jj_consume_token(11);
637+
case 10:{
638+
attrType = jj_consume_token(10);
633639
break;
634640
}
635641
case 24:{
@@ -690,8 +696,8 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
690696
throw new ParseException();
691697
}
692698
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
693-
case 11:{
694-
attrType = jj_consume_token(11);
699+
case 10:{
700+
attrType = jj_consume_token(10);
695701
break;
696702
}
697703
case 24:{
@@ -842,7 +848,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
842848
jj_la1_init_0();
843849
}
844850
private static void jj_la1_init_0() {
845-
jj_la1_0 = new int[] {0x800,0x100a2010,0x24,0x1000,0xe003c,0xe003c,0x100a2000,0x8000,0xf003c,0x10000,0xe003c,0x8003c,0x200000,0x24,0x224,0x224,0x400000,0x800000,0x3c,0x100a2000,0x8000,0x100b0000,0x10000,0x100a0000,0x10080000,0x1000800,0x224,0x224,0x1000800,0x224,0x4000000,0x224,0x1000800,0x224,0x3000800,0x1a4,0x4000000,0x1a4,0x800000,0x24,};
851+
jj_la1_0 = new int[] {0x400,0x100a2010,0x24,0x800,0xe003c,0xe003c,0x100a2000,0x8000,0xf003c,0x10000,0xe003c,0x8003c,0x200000,0x24,0x224,0x224,0x400000,0x800000,0x3c,0x100a2000,0x8000,0x100b0000,0x10000,0x100a0000,0x10080000,0x1000400,0x224,0x224,0x1000400,0x224,0x4000000,0x224,0x1000400,0x224,0x3000400,0x1a4,0x4000000,0x1a4,0x800000,0x24,};
846852
}
847853

848854
/** Constructor with InputStream. */

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.jj

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -92,25 +92,39 @@ SemgrexPattern Root() : {
9292
startToken = getToken(1);
9393
}
9494
(
95-
(reverse = <ALIGNRELN> node = SubNode(GraphRelation.ALIGNED_ROOT) "\n")
96-
|
97-
( node = SubNode(GraphRelation.ROOT) { children.add(node); }
98-
( ":" node = SubNode(GraphRelation.ITERATOR) { children.add(node); } )*
99-
"\n"
95+
(
96+
(reverse = <ALIGNRELN> node = SubNode(GraphRelation.ALIGNED_ROOT))
97+
|
98+
( node = SubNode(GraphRelation.ROOT) { children.add(node); }
99+
( ":" node = SubNode(GraphRelation.ITERATOR) { children.add(node); } )*
100+
)
100101
)
101-
)
102-
{
103-
if (children.size() > 1)
104-
node = new CoordinationPattern(true, children, true, true);
105-
if (deprecatedAmp) {
106-
throw new SemgrexParseException("Use of & in semgrex patterns is now illegal. It is equivalent to the same expression without the &. Offending expression: " + startToken);
107-
}
108-
if (deprecatedNodeConj) {
109-
throw new SemgrexParseException("Use of node conjugation (expressions such as '< [foo bar]' or '< [foo & bar]') is now illegal. The issue is that expressions such as '[foo bar] < zzz' may intuitively mean that foo < zzz, bar < zzz, zzz the same for both cases, but that is not the way the parser interpreted this expression. Changing the functionality might break existing expressions, and anyway this can be rewritten in various ways such as 'zzz > foo > bar' or 'foo < zzz=a : bar < zzz=a'. Offending expression: " + startToken);
102+
{
103+
if (children.size() > 1)
104+
node = new CoordinationPattern(true, children, true, true);
105+
if (deprecatedAmp) {
106+
throw new SemgrexParseException("Use of & in semgrex patterns is now illegal. It is equivalent to the same expression without the &. Offending expression: " + startToken);
107+
}
108+
if (deprecatedNodeConj) {
109+
throw new SemgrexParseException("Use of node conjugation (expressions such as '< [foo bar]' or '< [foo & bar]') is now illegal. The issue is that expressions such as '[foo bar] < zzz' may intuitively mean that foo < zzz, bar < zzz, zzz the same for both cases, but that is not the way the parser interpreted this expression. Changing the functionality might break existing expressions, and anyway this can be rewritten in various ways such as 'zzz > foo > bar' or 'foo < zzz=a : bar < zzz=a'. Offending expression: " + startToken);
110+
}
110111
}
111-
}
112+
)
113+
(
114+
(
115+
"::" <UNIQ> { uniqKeys = new ArrayList<>(); } (nextIdentifier = identifier() { uniqKeys.add(nextIdentifier.image); })*
116+
{
117+
for (String key : uniqKeys) {
118+
if (!knownVariables.contains(key)) {
119+
throw new SemgrexParseException("Semgrex pattern asked for uniq of node " + key + " which does not exist in the pattern");
120+
}
121+
}
122+
// TODO: do shit here
123+
}
124+
)?
125+
)
112126
(
113-
("::" <UNIQ> { uniqKeys = new ArrayList<>(); } (nextIdentifier = identifier() { uniqKeys.add(nextIdentifier.image); })* )?
127+
"\n"
114128
)
115129
{
116130
return node;

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserConstants.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ interface SemgrexParserConstants {
4444
"\"#\"",
4545
"\"$\"",
4646
"<REGEX>",
47-
"\"\\n\"",
4847
"\":\"",
4948
"\"::\"",
49+
"\"\\n\"",
5050
"\"(\"",
5151
"\")\"",
5252
"\"|\"",

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserTokenManager.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ private int jjMoveStringLiteralDfa0_0(){
6565
switch(curChar)
6666
{
6767
case 10:
68-
return jjStopAtPos(0, 10);
68+
return jjStopAtPos(0, 12);
6969
case 33:
7070
jjmatchedKind = 17;
7171
return jjMoveStringLiteralDfa1_0(0x1000000L);
@@ -82,8 +82,8 @@ private int jjMoveStringLiteralDfa0_0(){
8282
case 44:
8383
return jjStopAtPos(0, 21);
8484
case 58:
85-
jjmatchedKind = 11;
86-
return jjMoveStringLiteralDfa1_0(0x2001000L);
85+
jjmatchedKind = 10;
86+
return jjMoveStringLiteralDfa1_0(0x2000800L);
8787
case 59:
8888
return jjStopAtPos(0, 26);
8989
case 61:
@@ -119,8 +119,8 @@ private int jjMoveStringLiteralDfa1_0(long active0){
119119
switch(curChar)
120120
{
121121
case 58:
122-
if ((active0 & 0x1000L) != 0L)
123-
return jjStopAtPos(1, 12);
122+
if ((active0 & 0x800L) != 0L)
123+
return jjStopAtPos(1, 11);
124124
else if ((active0 & 0x1000000L) != 0L)
125125
return jjStopAtPos(1, 24);
126126
break;
@@ -425,7 +425,7 @@ else if (curChar < 128)
425425
/** Token literal values. */
426426
public static final String[] jjstrLiteralImages = {
427427
"", null, "\165\156\151\161", null, "\100", null, null, "\43", "\44", null,
428-
"\12", "\72", "\72\72", "\50", "\51", "\174", "\46", "\41", "\77", "\133", "\135",
428+
"\72", "\72\72", "\12", "\50", "\51", "\174", "\46", "\41", "\77", "\133", "\135",
429429
"\54", "\176", "\75", "\41\72", "\72\173", "\73", "\175", "\173", };
430430
protected Token jjFillToken()
431431
{

test/src/edu/stanford/nlp/semgraph/semgrex/SemgrexTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,6 +1458,29 @@ public void testBrackets() {
14581458
"[ate/VBD subj>Billz/NNP obj>[muffins compound>strawberry]]");
14591459
}
14601460

1461+
/**
1462+
* Test that an illegal uniq expression throws an exception
1463+
*<br>
1464+
* Specifically, the expectation is for a SemgrexParseException
1465+
*/
1466+
public void testBrokenUniq() {
1467+
try {
1468+
String pattern = "{word:foo}=foo :: uniq bar";
1469+
SemgrexPattern semgrex = SemgrexPattern.compile(pattern);
1470+
throw new RuntimeException("This expression is now illegal");
1471+
} catch (SemgrexParseException e) {
1472+
// yay
1473+
}
1474+
}
1475+
1476+
/**
1477+
* Test that a simple uniq expression is correctly parsed
1478+
*/
1479+
public void testParsesUniq() {
1480+
String pattern = "{word:foo}=foo :: uniq foo";
1481+
SemgrexPattern semgrex = SemgrexPattern.compile(pattern);
1482+
}
1483+
14611484
public static void outputResults(String pattern, String graph,
14621485
String ... ignored) {
14631486
outputResults(SemgrexPattern.compile(pattern),

0 commit comments

Comments
 (0)