|
10 | 10 | import java.io.InputStream; |
11 | 11 | import java.io.IOException; |
12 | 12 | import java.io.OutputStream; |
| 13 | +import java.util.ArrayList; |
13 | 14 | import java.util.List; |
14 | 15 | import java.util.stream.Collectors; |
15 | 16 |
|
| 17 | +import edu.stanford.nlp.ling.CoreAnnotations; |
16 | 18 | import edu.stanford.nlp.ling.CoreLabel; |
17 | 19 | import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer; |
18 | 20 | import edu.stanford.nlp.pipeline.CoreNLPProtos; |
19 | 21 | import edu.stanford.nlp.semgraph.SemanticGraph; |
| 22 | +import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; |
20 | 23 | import edu.stanford.nlp.semgraph.SemanticGraphEdge; |
21 | 24 | import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher; |
22 | 25 | import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern; |
| 26 | +import edu.stanford.nlp.util.ArrayCoreMap; |
| 27 | +import edu.stanford.nlp.util.CoreMap; |
| 28 | +import edu.stanford.nlp.util.Pair; |
23 | 29 | import edu.stanford.nlp.util.ProcessProtobufRequest; |
24 | 30 |
|
25 | 31 | public class ProcessSemgrexRequest extends ProcessProtobufRequest { |
26 | 32 | /** |
27 | 33 | * Builds a single inner SemgrexResult structure from the pair of a SemgrexPattern and a SemanticGraph |
28 | 34 | */ |
29 | | - public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexPattern pattern, SemanticGraph graph, int patternIdx, int graphIdx) { |
| 35 | + public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexPattern pattern, SemanticGraph graph, List<SemgrexMatch> matches, int patternIdx, int graphIdx) { |
30 | 36 | CoreNLPProtos.SemgrexResponse.SemgrexResult.Builder semgrexResultBuilder = CoreNLPProtos.SemgrexResponse.SemgrexResult.newBuilder(); |
31 | | - SemgrexMatcher matcher = pattern.matcher(graph); |
32 | | - while (matcher.find()) { |
| 37 | + for (SemgrexMatch matcher : matches) { |
33 | 38 | CoreNLPProtos.SemgrexResponse.Match.Builder matchBuilder = CoreNLPProtos.SemgrexResponse.Match.newBuilder(); |
34 | 39 | matchBuilder.setMatchIndex(matcher.getMatch().index()); |
35 | 40 | matchBuilder.setSemgrexIndex(patternIdx); |
@@ -74,37 +79,63 @@ public static CoreNLPProtos.SemgrexResponse.SemgrexResult matchSentence(SemgrexP |
74 | 79 | return semgrexResultBuilder.build(); |
75 | 80 | } |
76 | 81 |
|
| 82 | + public static CoreNLPProtos.SemgrexResponse processRequest(List<CoreMap> sentences, List<SemgrexPattern> patterns) { |
| 83 | + CoreNLPProtos.SemgrexResponse.Builder responseBuilder = CoreNLPProtos.SemgrexResponse.newBuilder(); |
| 84 | + List<Pair<CoreMap, List<Pair<SemgrexPattern, List<SemgrexMatch>>>>> allMatches = new ArrayList<>(); |
| 85 | + for (CoreMap sentence : sentences) { |
| 86 | + allMatches.add(new Pair<>(sentence, new ArrayList<>())); |
| 87 | + } |
| 88 | + for (SemgrexPattern pattern : patterns) { |
| 89 | + List<Pair<CoreMap, List<SemgrexMatch>>> patternMatches = pattern.matchSentences(sentences, true); |
| 90 | + for (int i = 0; i < sentences.size(); ++i) { |
| 91 | + Pair<CoreMap, List<SemgrexMatch>> sentenceMatches = patternMatches.get(i); |
| 92 | + allMatches.get(i).second().add(new Pair<>(pattern, sentenceMatches.second())); |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + int graphIdx = 0; |
| 97 | + for (Pair<CoreMap, List<Pair<SemgrexPattern, List<SemgrexMatch>>>> sentenceMatches : allMatches) { |
| 98 | + CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder(); |
| 99 | + |
| 100 | + int patternIdx = 0; |
| 101 | + SemanticGraph graph = sentenceMatches.first().get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); |
| 102 | + for (Pair<SemgrexPattern, List<SemgrexMatch>> patternMatches : sentenceMatches.second()) { |
| 103 | + SemgrexPattern pattern = patternMatches.first(); |
| 104 | + graphResultBuilder.addResult(matchSentence(pattern, graph, patternMatches.second(), patternIdx, graphIdx)); |
| 105 | + ++patternIdx; |
| 106 | + } |
| 107 | + |
| 108 | + responseBuilder.addResult(graphResultBuilder.build()); |
| 109 | + ++graphIdx; |
| 110 | + } |
| 111 | + return responseBuilder.build(); |
| 112 | + } |
| 113 | + |
77 | 114 | /** |
78 | 115 | * For a single request, iterate through the SemanticGraphs it |
79 | 116 | * includes, and add the results of each Semgrex operation included |
80 | 117 | * in the request. |
81 | 118 | */ |
82 | 119 | public static CoreNLPProtos.SemgrexResponse processRequest(CoreNLPProtos.SemgrexRequest request) { |
83 | 120 | ProtobufAnnotationSerializer serializer = new ProtobufAnnotationSerializer(); |
84 | | - CoreNLPProtos.SemgrexResponse.Builder responseBuilder = CoreNLPProtos.SemgrexResponse.newBuilder(); |
85 | 121 |
|
86 | | - List<SemgrexPattern> patterns = request.getSemgrexList().stream().map(SemgrexPattern::compile).collect(Collectors.toList()); |
87 | | - int graphIdx = 0; |
| 122 | + List<CoreMap> sentences = new ArrayList<>(); |
88 | 123 | for (CoreNLPProtos.SemgrexRequest.Dependencies sentence : request.getQueryList()) { |
89 | | - CoreNLPProtos.SemgrexResponse.GraphResult.Builder graphResultBuilder = CoreNLPProtos.SemgrexResponse.GraphResult.newBuilder(); |
90 | | - |
91 | 124 | final List<CoreLabel> tokens; |
92 | 125 | if (sentence.getGraph().getTokenList().size() > 0) { |
93 | 126 | tokens = sentence.getGraph().getTokenList().stream().map(serializer::fromProto).collect(Collectors.toList()); |
94 | 127 | } else { |
95 | 128 | tokens = sentence.getTokenList().stream().map(serializer::fromProto).collect(Collectors.toList()); |
96 | 129 | } |
97 | 130 | SemanticGraph graph = ProtobufAnnotationSerializer.fromProto(sentence.getGraph(), tokens, "semgrex"); |
98 | | - int patternIdx = 0; |
99 | | - for (SemgrexPattern pattern : patterns) { |
100 | | - graphResultBuilder.addResult(matchSentence(pattern, graph, patternIdx, graphIdx)); |
101 | | - ++patternIdx; |
102 | | - } |
103 | | - |
104 | | - responseBuilder.addResult(graphResultBuilder.build()); |
105 | | - ++graphIdx; |
| 131 | + CoreMap coremap = new ArrayCoreMap(); |
| 132 | + coremap.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph); |
| 133 | + coremap.set(CoreAnnotations.TokensAnnotation.class, tokens); |
| 134 | + sentences.add(coremap); |
106 | 135 | } |
107 | | - return responseBuilder.build(); |
| 136 | + |
| 137 | + List<SemgrexPattern> patterns = request.getSemgrexList().stream().map(SemgrexPattern::compile).collect(Collectors.toList()); |
| 138 | + return processRequest(sentences, patterns); |
108 | 139 | } |
109 | 140 |
|
110 | 141 | /** |
|
0 commit comments