1
1
package dataStructures .trie ;
2
2
3
+ import java .util .ArrayList ;
4
+ import java .util .HashMap ;
5
+ import java .util .List ;
6
+ import java .util .Map ;
7
+
3
8
/**
4
- * Implementation of Trie structure.
5
- * Supports the follwing common operations (see below for doc):
6
- * insert(String word)
7
- * search(String word)
8
- * startsWith(String prefix)
9
- * prune(String word)
9
+ * Implementation of a Trie; Here we consider strings (not case-sensitive)
10
10
*/
11
11
public class Trie {
12
12
private final TrieNode root ;
@@ -16,98 +16,179 @@ public Trie() {
16
16
}
17
17
18
18
/**
19
- * Insert a word into the trie; converts word to
20
- * to lower-case characters before insertion.
21
- *
22
- * @param word the string to be inserted
19
+ * TrieNode implementation. Note, fields are set to public for decreased verbosity.
20
+ */
21
+ private class TrieNode {
22
+ // CHECKSTYLE:OFF: VisibilityModifier
23
+ public Map <Character , TrieNode > children ; // or array of size 26 (assume not case-sensitive) to denote each char
24
+ // CHECKSTYLE:OFF: VisibilityModifier
25
+ public boolean isEnd ; // a marker to indicate whether the path from the root to this node forms a known word
26
+
27
+ public TrieNode () {
28
+ children = new HashMap <Character , TrieNode >();
29
+ isEnd = false ;
30
+ }
31
+ }
32
+
33
+ /**
34
+ * Inserts a word into the trie.
35
+ * @param word
23
36
*/
24
37
public void insert (String word ) {
25
- word = word .toLowerCase ();
26
- System .out .printf ("~~~~~~~Inserting '%s'~~~~~~~%n" , word );
27
- TrieNode node = root ;
38
+ word = word .toLowerCase (); // ignore case-sensitivity
39
+ TrieNode trav = root ;
28
40
for (int i = 0 ; i < word .length (); i ++) {
29
41
char curr = word .charAt (i );
30
- if (!node .containsKey (curr )) {
31
- node . insertKey (curr );
42
+ if (!trav . children .containsKey (curr )) {
43
+ trav . children . put (curr , new TrieNode ()); // recall, the edges represent the characters
32
44
}
33
- node = node . getNext (curr ); // go to the subsequent node!
45
+ trav = trav . children . get (curr );
34
46
}
35
- node . makeEnd ();
47
+ trav . isEnd = true ; // set word
36
48
}
37
49
38
50
/**
39
- * Search for a word (converted to lower-case) in the trie.
40
- *
41
- * @param word the string to look for
42
- * @return boolean representing whether the word was found
51
+ * Searches for a word in the trie.
52
+ * @param word
53
+ * @return true if the word is found, false otherwise.
43
54
*/
44
55
public boolean search (String word ) {
45
- word .toLowerCase ();
46
- System .out .printf ("~~~~~~~Searching '%s'~~~~~~~%n" , word );
47
- TrieNode node = root ;
56
+ word = word .toLowerCase ();
57
+ TrieNode trav = root ;
48
58
for (int i = 0 ; i < word .length (); i ++) {
49
59
char curr = word .charAt (i );
50
- if (node .containsKey (curr )) {
51
- node = node .getNext (curr );
52
- } else {
60
+ if (!trav .children .containsKey (curr )) {
53
61
return false ;
54
62
}
63
+ trav = trav .children .get (curr );
55
64
}
56
- return node .isEnd () ;
65
+ return trav .isEnd ;
57
66
}
58
67
59
68
/**
60
- * Search for a prefix (converted to lower-case) in the trie.
61
- * Note: very similar in implementation to search method
62
- * except the search here does not need to look for end flag
63
- *
64
- * @param prefix the string to look for
65
- * @return boolean representing whether the prefix exists
69
+ * Deletes a word from the trie.
70
+ * @param word
66
71
*/
67
- public boolean startsWith (String prefix ) {
68
- prefix = prefix .toLowerCase ();
69
- System .out .printf ("~~~~~~~Looking for prefix '%s'~~~~~~~%n" , prefix );
70
- TrieNode node = root ;
71
- for (int i = 0 ; i < prefix .length (); i ++) {
72
- char curr = prefix .charAt (i );
73
- if (node .containsKey (curr )) {
74
- node = node .getNext (curr );
75
- } else {
76
- return false ;
72
+ public void delete (String word ) {
73
+ word = word .toLowerCase ();
74
+ TrieNode trav = root ;
75
+ for (int i = 0 ; i < word .length (); i ++) {
76
+ char curr = word .charAt (i );
77
+ if (!trav .children .containsKey (curr )) {
78
+ return ; // word does not exist in trie, so just return
77
79
}
80
+ trav = trav .children .get (curr );
78
81
}
79
- return true ;
82
+ trav . isEnd = false ; // remove word from being tracked
80
83
}
81
84
85
+ // ABOVE ARE STANDARD METHODS OF A TYPICAL TRIE IMPLEMENTATION
86
+ // BELOW IMPLEMENTS TWO MORE COMMON / USEFUL METHODS FOR TRIE; IN PARTICULAR, NOTE THE PRUNING METHOD
87
+
82
88
/**
83
- * Removes a word from the trie by toggling the end flag;
84
- * if any of the end nodes (next nodes relative to current)
85
- * do not hold further characters, repetitively prune the trie
86
- * by removing these nodes from the hashmap of the current node.
87
- * Note: This method is useful in optimizing searching for a set of known words
88
- * especially when the data to be traversed has words that are similar in spelling/
89
- * repeated words which might have been previously found.
90
- *
91
- * @param word the word to be removed
89
+ * Deletes a word from the trie, and also prune redundant nodes. This is useful in keeping the trie compact.
90
+ * @param word
92
91
*/
93
- public void prune (String word ) {
94
- word = word .toLowerCase ();
95
- System .out .printf ("~~~~~~~Removing '%s'~~~~~~~%n" , word );
96
- TrieNode node = root ;
97
- TrieNode [] track = new TrieNode [word .length ()];
92
+ public void deleteAndPrune (String word ) {
93
+ List <TrieNode > trackNodes = new ArrayList <>();
94
+ TrieNode trav = root ;
98
95
for (int i = 0 ; i < word .length (); i ++) {
99
96
char curr = word .charAt (i );
100
- track [i ] = node ;
101
- node = node .getNext (curr );
97
+ if (!trav .children .containsKey (curr )) {
98
+ return ; // word does not exist in trie
99
+ }
100
+ trackNodes .add (trav );
101
+ trav = trav .children .get (curr );
102
102
}
103
- node .removeEnd ();
103
+ trav .isEnd = false ;
104
+
105
+ // now we start pruning
104
106
for (int i = word .length () - 1 ; i >= 0 ; i --) {
105
107
char curr = word .charAt (i );
106
- if (track [i ].getNext (curr ).getCharacters ().size () > 0 ) {
107
- break ; // done further nodes are required
108
+ TrieNode nodeBeforeCurr = trackNodes .get (i );
109
+ TrieNode nextNode = nodeBeforeCurr .children .get (curr );
110
+ if (!nextNode .isEnd && nextNode .children .size () == 0 ) { // node essentially doesn't track anything, remove
111
+ nodeBeforeCurr .children .remove (curr );
112
+ } else { // children.size() > 0; i.e. this node is still useful; no need to further prune upwards
113
+ break ;
114
+ }
115
+ }
116
+ }
117
+
118
+ /**
119
+ * Find all words with the specified prefix.
120
+ * @param prefix
121
+ * @return a list of words.
122
+ */
123
+ public List <String > wordsWithPrefix (String prefix ) {
124
+ List <String > ret = new ArrayList <>();
125
+ TrieNode trav = root ;
126
+ for (int i = 0 ; i < prefix .length (); i ++) {
127
+ char curr = prefix .charAt (i );
128
+ if (!trav .children .containsKey (curr )) {
129
+ return ret ; // no words with this prefix
130
+ }
131
+ trav = trav .children .get (curr );
132
+ }
133
+ List <StringBuilder > allSuffix = getAllSuffixFromNode (trav );
134
+ for (StringBuilder sb : allSuffix ) {
135
+ ret .add (prefix + sb .toString ());
136
+ }
137
+ return ret ;
138
+ }
139
+
140
+ /**
141
+ * Find all words in the trie.
142
+ * @return a list of words.
143
+ */
144
+ public List <String > getAllWords () {
145
+ List <StringBuilder > allWords = getAllSuffixFromNode (root );
146
+ List <String > ret = new ArrayList <>();
147
+ for (StringBuilder sb : allWords ) {
148
+ ret .add (sb .toString ());
149
+ }
150
+ return ret ;
151
+ }
152
+
153
+ /**
154
+ * Helper method to get suffix from the node.
155
+ * @param node
156
+ * @return
157
+ */
158
+ private List <StringBuilder > getAllSuffixFromNode (TrieNode node ) {
159
+ List <StringBuilder > ret = new ArrayList <>();
160
+ if (node .isEnd ) {
161
+ ret .add (new StringBuilder ("" ));
162
+ }
163
+ for (char c : node .children .keySet ()) {
164
+ TrieNode nextNode = node .children .get (c );
165
+ List <StringBuilder > allSuffix = getAllSuffixFromNode (nextNode );
166
+ for (StringBuilder sb : allSuffix ) {
167
+ sb .insert (0 , c ); // insert c at the front
168
+ ret .add (sb );
169
+ }
170
+ }
171
+ return ret ;
172
+ }
173
+
174
+ // BELOW IS A METHOD THAT IS USED FOR TESTING PURPOSES ONLY
175
+
176
+ /**
177
+ * Helper method for testing purposes.
178
+ * @param str
179
+ * @param pos
180
+ * @return
181
+ */
182
+ public Boolean checkNodeExistsAtPosition (String str , Integer pos ) {
183
+ TrieNode trav = root ;
184
+ for (int i = 0 ; i < pos ; i ++) {
185
+ char c = str .charAt (i );
186
+ if (trav .children .containsKey (c )) {
187
+ trav = trav .children .get (c );
108
188
} else {
109
- track [ i ]. getCharacters (). remove ( curr ) ;
189
+ return false ;
110
190
}
111
191
}
192
+ return true ;
112
193
}
113
194
}
0 commit comments