@@ -66,48 +66,68 @@ public String toEditString() {
6666 */
6767 @ Override
6868 public boolean evaluate (SemanticGraph sg , SemgrexMatcher sm ) {
69- List <IndexedWord > nodes = new ArrayList <>();
69+ Set <IndexedWord > nodeSet = new HashSet <>();
7070 for (String name : names ) {
7171 IndexedWord node = sm .getNode (name );
7272 if (node == null ) {
7373 return false ;
7474 }
75- nodes .add (node );
75+ nodeSet .add (node );
7676 }
77- Collections .sort (nodes );
7877
7978 IndexedWord head = null ;
80- for (IndexedWord candidate : nodes ) {
81- if ( sg .hasChildren (candidate )) {
82- // if multiple nodes have children inside the graph,
83- // perhaps we could merge them all,
84- // but the easiest thing to do is just abort
85- // TODO: an alternate approach would be to look for nodes with a head
86- // outside the nodes in the phrase to merge
79+ for (IndexedWord candidate : nodeSet ) {
80+ Set < IndexedWord > parents = sg .getParents (candidate );
81+ if ( parents . size () == 0 ) {
82+ // found a root
83+ // if something else is already the head,
84+ // we don't know how to handle that,
85+ // so we abort this operation
8786 if (head != null ) {
8887 return false ;
8988 }
9089 head = candidate ;
90+ continue ;
91+ }
92+ for (IndexedWord parent : parents ) {
93+ if (nodeSet .contains (parent )) {
94+ continue ;
95+ }
96+ // parent is outside this subtree
97+ // therefore, we can use this word as the head of the subtree
98+ // but if we already have a head, give up instead
99+ if (head != null ) {
100+ return false ;
101+ }
102+ head = candidate ;
103+ break ;
91104 }
92105 }
106+ if (head == null ) {
107+ return false ;
108+ }
93109
94- Set <Integer > depIndices = new HashSet <Integer >();
95- for (IndexedWord other : nodes ) {
96- if (other == head ) {
110+ // for now, only allow the head to have edges to children outside the subtree
111+ // TODO: instead, could make them all point to the new merged word...
112+ // but it's not clear that's a structure we want to allow merged
113+ for (IndexedWord candidate : nodeSet ) {
114+ if (candidate == head ) {
97115 continue ;
98116 }
99- Set <IndexedWord > parents = sg .getParents (other );
100- // this shouldn't happen
101- if (parents .size () == 0 ) {
102- return false ;
103- }
104- // iterate instead of just do the first in case
105- // this one day is doing an graph with extra dependencies
106- for (IndexedWord parent : parents ) {
107- if (parent != head ) {
117+ for (IndexedWord child : sg .getChildren (candidate )) {
118+ if (!nodeSet .contains (child )) {
108119 return false ;
109120 }
110121 }
122+ }
123+ ArrayList <IndexedWord > nodes = new ArrayList <>(nodeSet );
124+ Collections .sort (nodes );
125+
126+ Set <Integer > depIndices = new HashSet <Integer >();
127+ for (IndexedWord other : nodes ) {
128+ if (other == head ) {
129+ continue ;
130+ }
111131 depIndices .add (other .index ());
112132 }
113133
0 commit comments