@@ -1464,31 +1464,39 @@ public void testBrackets() {
14641464 "[ate/VBD subj>Billz/NNP obj>[muffins compound>strawberry]]" );
14651465 }
14661466
1467+ String [] BATCH_PARSES = {
1468+ "[foo-1 nmod> bar-2]" ,
1469+ "[foo-1 obj> bar-2]" ,
1470+ "[bar-1 compound> baz-2]" ,
1471+ "[foo-1 nmod> baz-2 obj> bar-3]" ,
1472+ };
1473+
14671474 /**
1468- * A simple test of the batch search - should return 3 of the 4 sentences
1475+ * Build a list of sentences with BasicDependenciesAnnotation
14691476 */
1470- public void testBatchSearch () {
1471- String [] parses = {
1472- "[foo-1 nmod> bar-2]" ,
1473- "[foo-1 obj> bar-2]" ,
1474- "[bar-1 compound> baz-2]" ,
1475- "[foo-1 nmod> baz-2 obj> bar-3]" ,
1476- };
1477+ public List <CoreMap > buildSmallBatch () {
14771478 List <CoreMap > sentences = new ArrayList <>();
1478- for (String parse : parses ) {
1479+ for (String parse : BATCH_PARSES ) {
14791480 SemanticGraph graph = SemanticGraph .valueOf (parse );
14801481 CoreMap sentence = new ArrayCoreMap ();
14811482 sentence .set (SemanticGraphCoreAnnotations .BasicDependenciesAnnotation .class , graph );
14821483 sentence .set (CoreAnnotations .TextAnnotation .class , parse );
14831484 sentences .add (sentence );
14841485 }
1486+ return sentences ;
1487+ }
14851488
1489+ /**
1490+ * A simple test of the batch search - should return 3 of the 4 sentences
1491+ */
1492+ public void testBatchSearch () {
1493+ List <CoreMap > sentences = buildSmallBatch ();
14861494 SemgrexPattern semgrex = SemgrexPattern .compile ("{word:foo}=x > {}=y" );
14871495 List <Pair <CoreMap , List <SemgrexMatch >>> matches = semgrex .matchSentences (sentences );
14881496 String [] expectedMatches = {
1489- parses [0 ],
1490- parses [1 ],
1491- parses [3 ],
1497+ BATCH_PARSES [0 ],
1498+ BATCH_PARSES [1 ],
1499+ BATCH_PARSES [3 ],
14921500 };
14931501 int [] expectedCount = {1 , 1 , 2 };
14941502 assertEquals (expectedMatches .length , matches .size ());
@@ -1507,7 +1515,7 @@ public void testBrokenUniq() {
15071515 try {
15081516 String pattern = "{word:foo}=foo :: uniq bar" ;
15091517 SemgrexPattern semgrex = SemgrexPattern .compile (pattern );
1510- throw new RuntimeException ("This expression is now illegal " );
1518+ throw new RuntimeException ("This expression should fail because the node name is unknown " );
15111519 } catch (SemgrexParseException e ) {
15121520 // yay
15131521 }
@@ -1521,6 +1529,57 @@ public void testParsesUniq() {
15211529 SemgrexPattern semgrex = SemgrexPattern .compile (pattern );
15221530 }
15231531
1532+ /**
1533+ * Test the uniq functionality on a few simple parses
1534+ */
1535+ public void testBatchUniq () {
1536+ List <CoreMap > sentences = buildSmallBatch ();
1537+ SemgrexPattern semgrex = SemgrexPattern .compile ("{word:foo}=x > {}=y :: uniq x" );
1538+ List <Pair <CoreMap , List <SemgrexMatch >>> matches = semgrex .matchSentences (sentences );
1539+ // only the first foo sentence should match when using "uniq x"
1540+ assertEquals (1 , matches .size ());
1541+ assertEquals (BATCH_PARSES [0 ], matches .get (0 ).first ().get (CoreAnnotations .TextAnnotation .class ));
1542+ assertEquals (1 , matches .get (0 ).second ().size ());
1543+
1544+ semgrex = SemgrexPattern .compile ("{word:foo}=x > {}=y :: uniq" );
1545+ matches = semgrex .matchSentences (sentences );
1546+ // same thing happens when using "uniq" and no nodes - only one match will occur
1547+ assertEquals (1 , matches .size ());
1548+ assertEquals (BATCH_PARSES [0 ], matches .get (0 ).first ().get (CoreAnnotations .TextAnnotation .class ));
1549+ assertEquals (1 , matches .get (0 ).second ().size ());
1550+
1551+ semgrex = SemgrexPattern .compile ("{word:foo}=x > {}=y :: uniq y" );
1552+ matches = semgrex .matchSentences (sentences );
1553+ // now it should match both foo>bar and foo>baz
1554+ assertEquals (2 , matches .size ());
1555+ assertEquals (BATCH_PARSES [0 ], matches .get (0 ).first ().get (CoreAnnotations .TextAnnotation .class ));
1556+ assertEquals (1 , matches .get (0 ).second ().size ());
1557+ assertEquals (BATCH_PARSES [3 ], matches .get (1 ).first ().get (CoreAnnotations .TextAnnotation .class ));
1558+ assertEquals (1 , matches .get (1 ).second ().size ());
1559+
1560+ semgrex = SemgrexPattern .compile ("{}=x > {}=y :: uniq x y" );
1561+ matches = semgrex .matchSentences (sentences );
1562+ // now it should batch each of foo>bar, bar>baz, foo>baz
1563+ assertEquals (3 , matches .size ());
1564+ assertEquals (BATCH_PARSES [0 ], matches .get (0 ).first ().get (CoreAnnotations .TextAnnotation .class ));
1565+ assertEquals (1 , matches .get (0 ).second ().size ());
1566+ assertEquals (BATCH_PARSES [2 ], matches .get (1 ).first ().get (CoreAnnotations .TextAnnotation .class ));
1567+ assertEquals (1 , matches .get (1 ).second ().size ());
1568+ assertEquals (BATCH_PARSES [3 ], matches .get (2 ).first ().get (CoreAnnotations .TextAnnotation .class ));
1569+ assertEquals (1 , matches .get (2 ).second ().size ());
1570+ }
1571+
1572+ public static void outputBatchResults (SemgrexPattern pattern , List <CoreMap > sentences ) {
1573+ List <Pair <CoreMap , List <SemgrexMatch >>> matches = pattern .matchSentences (sentences );
1574+ for (Pair <CoreMap , List <SemgrexMatch >> sentenceMatch : matches ) {
1575+ System .out .println ("Pattern matched at:" );
1576+ System .out .println (sentenceMatch .first ());
1577+ for (SemgrexMatch match : sentenceMatch .second ()) {
1578+ System .out .println (match );
1579+ }
1580+ }
1581+ }
1582+
15241583 public static void outputResults (String pattern , String graph ,
15251584 String ... ignored ) {
15261585 outputResults (SemgrexPattern .compile (pattern ),
0 commit comments