11package dataStructures .trie ;
22
3+ import java .util .ArrayList ;
4+ import java .util .HashMap ;
5+ import java .util .List ;
6+ import java .util .Map ;
7+
38/**
4- * Implementation of Trie structure.
5- * Supports the follwing common operations (see below for doc):
6- * insert(String word)
7- * search(String word)
8- * startsWith(String prefix)
9- * prune(String word)
9+ * Implementation of a Trie; Here we consider strings (not case-sensitive)
1010 */
1111public class Trie {
1212 private final TrieNode root ;
@@ -16,98 +16,179 @@ public Trie() {
1616 }
1717
1818 /**
19- * Insert a word into the trie; converts word to
20- * to lower-case characters before insertion.
21- *
22- * @param word the string to be inserted
19+ * TrieNode implementation. Note, fields are set to public for decreased verbosity.
20+ */
21+ private class TrieNode {
22+ // CHECKSTYLE:OFF: VisibilityModifier
23+ public Map <Character , TrieNode > children ; // or array of size 26 (assume not case-sensitive) to denote each char
24+ // CHECKSTYLE:OFF: VisibilityModifier
25+ public boolean isEnd ; // a marker to indicate whether the path from the root to this node forms a known word
26+
27+ public TrieNode () {
28+ children = new HashMap <Character , TrieNode >();
29+ isEnd = false ;
30+ }
31+ }
32+
33+ /**
34+ * Inserts a word into the trie.
35+ * @param word
2336 */
2437 public void insert (String word ) {
25- word = word .toLowerCase ();
26- System .out .printf ("~~~~~~~Inserting '%s'~~~~~~~%n" , word );
27- TrieNode node = root ;
38+ word = word .toLowerCase (); // ignore case-sensitivity
39+ TrieNode trav = root ;
2840 for (int i = 0 ; i < word .length (); i ++) {
2941 char curr = word .charAt (i );
30- if (!node .containsKey (curr )) {
31- node . insertKey (curr );
42+ if (!trav . children .containsKey (curr )) {
43+ trav . children . put (curr , new TrieNode ()); // recall, the edges represent the characters
3244 }
33- node = node . getNext (curr ); // go to the subsequent node!
45+ trav = trav . children . get (curr );
3446 }
35- node . makeEnd ();
47+ trav . isEnd = true ; // set word
3648 }
3749
3850 /**
39- * Search for a word (converted to lower-case) in the trie.
40- *
41- * @param word the string to look for
42- * @return boolean representing whether the word was found
51+ * Searches for a word in the trie.
52+ * @param word
53+ * @return true if the word is found, false otherwise.
4354 */
4455 public boolean search (String word ) {
45- word .toLowerCase ();
46- System .out .printf ("~~~~~~~Searching '%s'~~~~~~~%n" , word );
47- TrieNode node = root ;
56+ word = word .toLowerCase ();
57+ TrieNode trav = root ;
4858 for (int i = 0 ; i < word .length (); i ++) {
4959 char curr = word .charAt (i );
50- if (node .containsKey (curr )) {
51- node = node .getNext (curr );
52- } else {
60+ if (!trav .children .containsKey (curr )) {
5361 return false ;
5462 }
63+ trav = trav .children .get (curr );
5564 }
56- return node .isEnd () ;
65+ return trav .isEnd ;
5766 }
5867
5968 /**
60- * Search for a prefix (converted to lower-case) in the trie.
61- * Note: very similar in implementation to search method
62- * except the search here does not need to look for end flag
63- *
64- * @param prefix the string to look for
65- * @return boolean representing whether the prefix exists
69+ * Deletes a word from the trie.
70+ * @param word
6671 */
67- public boolean startsWith (String prefix ) {
68- prefix = prefix .toLowerCase ();
69- System .out .printf ("~~~~~~~Looking for prefix '%s'~~~~~~~%n" , prefix );
70- TrieNode node = root ;
71- for (int i = 0 ; i < prefix .length (); i ++) {
72- char curr = prefix .charAt (i );
73- if (node .containsKey (curr )) {
74- node = node .getNext (curr );
75- } else {
76- return false ;
72+ public void delete (String word ) {
73+ word = word .toLowerCase ();
74+ TrieNode trav = root ;
75+ for (int i = 0 ; i < word .length (); i ++) {
76+ char curr = word .charAt (i );
77+ if (!trav .children .containsKey (curr )) {
78+ return ; // word does not exist in trie, so just return
7779 }
80+ trav = trav .children .get (curr );
7881 }
79- return true ;
82+ trav . isEnd = false ; // remove word from being tracked
8083 }
8184
85+ // ABOVE ARE STANDARD METHODS OF A TYPICAL TRIE IMPLEMENTATION
86+ // BELOW IMPLEMENTS TWO MORE COMMON / USEFUL METHODS FOR TRIE; IN PARTICULAR, NOTE THE PRUNING METHOD
87+
8288 /**
83- * Removes a word from the trie by toggling the end flag;
84- * if any of the end nodes (next nodes relative to current)
85- * do not hold further characters, repetitively prune the trie
86- * by removing these nodes from the hashmap of the current node.
87- * Note: This method is useful in optimizing searching for a set of known words
88- * especially when the data to be traversed has words that are similar in spelling/
89- * repeated words which might have been previously found.
90- *
91- * @param word the word to be removed
89+ * Deletes a word from the trie, and also prune redundant nodes. This is useful in keeping the trie compact.
90+ * @param word
9291 */
93- public void prune (String word ) {
94- word = word .toLowerCase ();
95- System .out .printf ("~~~~~~~Removing '%s'~~~~~~~%n" , word );
96- TrieNode node = root ;
97- TrieNode [] track = new TrieNode [word .length ()];
92+ public void deleteAndPrune (String word ) {
93+ List <TrieNode > trackNodes = new ArrayList <>();
94+ TrieNode trav = root ;
9895 for (int i = 0 ; i < word .length (); i ++) {
9996 char curr = word .charAt (i );
100- track [i ] = node ;
101- node = node .getNext (curr );
97+ if (!trav .children .containsKey (curr )) {
98+ return ; // word does not exist in trie
99+ }
100+ trackNodes .add (trav );
101+ trav = trav .children .get (curr );
102102 }
103- node .removeEnd ();
103+ trav .isEnd = false ;
104+
105+ // now we start pruning
104106 for (int i = word .length () - 1 ; i >= 0 ; i --) {
105107 char curr = word .charAt (i );
106- if (track [i ].getNext (curr ).getCharacters ().size () > 0 ) {
107- break ; // done further nodes are required
108+ TrieNode nodeBeforeCurr = trackNodes .get (i );
109+ TrieNode nextNode = nodeBeforeCurr .children .get (curr );
110+ if (!nextNode .isEnd && nextNode .children .size () == 0 ) { // node essentially doesn't track anything, remove
111+ nodeBeforeCurr .children .remove (curr );
112+ } else { // children.size() > 0; i.e. this node is still useful; no need to further prune upwards
113+ break ;
114+ }
115+ }
116+ }
117+
118+ /**
119+ * Find all words with the specified prefix.
120+ * @param prefix
121+ * @return a list of words.
122+ */
123+ public List <String > wordsWithPrefix (String prefix ) {
124+ List <String > ret = new ArrayList <>();
125+ TrieNode trav = root ;
126+ for (int i = 0 ; i < prefix .length (); i ++) {
127+ char curr = prefix .charAt (i );
128+ if (!trav .children .containsKey (curr )) {
129+ return ret ; // no words with this prefix
130+ }
131+ trav = trav .children .get (curr );
132+ }
133+ List <StringBuilder > allSuffix = getAllSuffixFromNode (trav );
134+ for (StringBuilder sb : allSuffix ) {
135+ ret .add (prefix + sb .toString ());
136+ }
137+ return ret ;
138+ }
139+
140+ /**
141+ * Find all words in the trie.
142+ * @return a list of words.
143+ */
144+ public List <String > getAllWords () {
145+ List <StringBuilder > allWords = getAllSuffixFromNode (root );
146+ List <String > ret = new ArrayList <>();
147+ for (StringBuilder sb : allWords ) {
148+ ret .add (sb .toString ());
149+ }
150+ return ret ;
151+ }
152+
153+ /**
154+ * Helper method to get suffix from the node.
155+ * @param node
156+ * @return
157+ */
158+ private List <StringBuilder > getAllSuffixFromNode (TrieNode node ) {
159+ List <StringBuilder > ret = new ArrayList <>();
160+ if (node .isEnd ) {
161+ ret .add (new StringBuilder ("" ));
162+ }
163+ for (char c : node .children .keySet ()) {
164+ TrieNode nextNode = node .children .get (c );
165+ List <StringBuilder > allSuffix = getAllSuffixFromNode (nextNode );
166+ for (StringBuilder sb : allSuffix ) {
167+ sb .insert (0 , c ); // insert c at the front
168+ ret .add (sb );
169+ }
170+ }
171+ return ret ;
172+ }
173+
174+ // BELOW IS A METHOD THAT IS USED FOR TESTING PURPOSES ONLY
175+
176+ /**
177+ * Helper method for testing purposes.
178+ * @param str
179+ * @param pos
180+ * @return
181+ */
182+ public Boolean checkNodeExistsAtPosition (String str , Integer pos ) {
183+ TrieNode trav = root ;
184+ for (int i = 0 ; i < pos ; i ++) {
185+ char c = str .charAt (i );
186+ if (trav .children .containsKey (c )) {
187+ trav = trav .children .get (c );
108188 } else {
109- track [ i ]. getCharacters (). remove ( curr ) ;
189+ return false ;
110190 }
111191 }
192+ return true ;
112193 }
113194}
0 commit comments