88<b>
99Purpose: Perform tokenization on input files, such that later stages may read a stream of tokens one by one.<br>
1010Programmer: Gabriel Toban Harris <br>
11- Date: 2021-07-[27, 28]
11+ Date: 2021-07-[27, 28], 2021-7-30
1212</b>
1313*/
1414
@@ -91,6 +91,37 @@ public Returned_Data(final String EXTRA_DATA, final Token FULLY_FORMED_PART)
9191 */
9292 public final static String CONDITION_EXPR_END = ")" ;
9393
94+ /**
95+ * Simple concatenation of chars which are not allowed to be a part of any keyword, ID, or special marker of a part.
96+ */
97+ public final static String RESTRICTED_CHARS = ";=" + SENTINEL_START + SENTINEL_END + CONDITION_CARD_START + CONDITION_CARD_END + CONDITION_SCENARIO_START +
98+ CONDITION_SCENARIO_END + CONDITION_EXPR_START + CONDITION_EXPR_END ;
99+
100+ /**
101+ * Representation of unary operator not.
102+ */
103+ public final static Pattern NOT = Pattern .compile ("\\ s*NOT\\ s*" );
104+
105+ /**
106+ * Representation of binary operator and.
107+ */
108+ public final static Pattern AND = Pattern .compile ("\\ s*AND\\ s*" );
109+
110+ /**
111+ * Representation of binary operator or.
112+ */
113+ public final static Pattern OR = Pattern .compile ("\\ s*OR\\ s*" );
114+
115+ /**
116+ * Representation of binary operator xor.
117+ */
118+ public final static Pattern XOR = Pattern .compile ("\\ s*XOR\\ s*" );
119+
120+ /**
121+ * Simply the the predefined class \s.
122+ */
123+ public final static Pattern WHITE_SPACE_CHAR = Pattern .compile ("\\ s" );
124+
94125 /**
95126 * Pattern indicating the definition of the {@link Lexeme_Types#DECK_START} {@link Token}.
96127 */
@@ -114,8 +145,7 @@ public Returned_Data(final String EXTRA_DATA, final Token FULLY_FORMED_PART)
114145 /**
115146 * Definition of valid char that an the ID lexeme can have.
116147 */
117- public final static Pattern ID_CHAR_SET = Pattern .compile ("[^;=" + SENTINEL_START + SENTINEL_END + CONDITION_CARD_START + CONDITION_CARD_END + CONDITION_SCENARIO_START +
118- CONDITION_SCENARIO_END + CONDITION_EXPR_START + CONDITION_EXPR_END + "]" );
148+ public final static Pattern ID_CHAR_SET = Pattern .compile ("[^" + RESTRICTED_CHARS + "]" );
119149
120150 /**
121151 * Performs tokenization, after which other functions are called internally.
@@ -199,68 +229,66 @@ else if (placeholder.equals("*"))
199229 //defer to ID_CHAR_SET
200230 return new Returned_Data (new Token (Token .Lexeme_Types .ID , LINE_NUMBER , LEXEME .toString ()));
201231 }
202- //TODO:finish
203- //binary operators
204-
205- //display values
206- case "t" :
207- {
208- if (INPUT .hasNext ())
209- {
210- String placeholder = INPUT .next ();
211- LEXEME .append (placeholder );
212- //true keyword
213- if (placeholder .equals ("r" ))
214- {
215-
216-
217- LEXEME .append (placeholder = INPUT .next ());
218-
219- if (placeholder .equals ("u" ))
220- {
221- LEXEME .append (placeholder = INPUT .next ());
222-
223- if (placeholder .equals ("e" ))
224- {
225- placeholder = INPUT .next ();
226-
227- if ()
228- }
229- }
230- }
231-
232- return parse_ID (LINE_NUMBER , LEXEME , INPUT );
233- }
234-
235- return new Returned_Data (new Token (Token .Lexeme_Types .ID , LINE_NUMBER , LEXEME .toString ()));
236- }
237- //guess it is some sort of ID
232+ //parse multichar sequences
238233 default :
239- return gather_ID_chars (LINE_NUMBER , LEXEME , INPUT );
234+ return gather_keyword_chars (LINE_NUMBER , LEXEME , INPUT );
240235 }
241236 }
242237
243238 /**
244- * Determines which {@link Token} should be formed given the lexeme provided.
239+ * Function to attempt to find keywords, other wise defers to {@link #gather_ID_chars(long, StringBuilder, Scanner)}
245240 *
246241 * @param LINE_NUMBER of source file being read
247- * @param REMAINDER which is the extra data that should be feed back into {@link #tokenize(long, String, Scanner)}
248- * @param COMPLETE_LEXEME which is fully formed
242+ * @param LEXEME_START is the lexem formed thus far
243+ * @param INPUT source to be read from
249244 * @return the created {@link Token} wrapped in a {@link Returned_Data}
250245 */
251- private static Returned_Data parse_potentional_ID (final long LINE_NUMBER , final String REMAINDER , final String COMPLETE_LEXEME )
246+ private static Returned_Data gather_keyword_chars (final long LINE_NUMBER , final StringBuilder LEXEME_START , final Scanner INPUT )
252247 {
253- //Test lexeme for special sequences, if all fail then is in fact ID.
254- if (TREE_START .matcher (COMPLETE_LEXEME ).matches ())
255- return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .TREE_START , LINE_NUMBER , COMPLETE_LEXEME ));
256- else if (DISPLAY_START .matcher (COMPLETE_LEXEME ).matches ())
257- return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .DISPLAY_START , LINE_NUMBER , COMPLETE_LEXEME ));
258- else if (PROBABILITY_START .matcher (COMPLETE_LEXEME ).matches ())
259- return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .PROBABILITY_START , LINE_NUMBER , COMPLETE_LEXEME ));
260- else if (DECK_START .matcher (COMPLETE_LEXEME ).matches ())
261- return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .DECK_START , LINE_NUMBER , COMPLETE_LEXEME ));
262- else
263- return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .ID , LINE_NUMBER , COMPLETE_LEXEME ));
248+ String placeholder ;
249+
250+ while (INPUT .hasNext ())
251+ {
252+ placeholder = INPUT .next ();
253+
254+ //check for keyword end
255+ if (WHITE_SPACE_CHAR .matcher (placeholder ).matches ())
256+ {
257+ final String LEXEM = LEXEME_START .toString ();
258+
259+ if (AND .matcher (LEXEM ).matches ())
260+ return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .AND , LINE_NUMBER , LEXEM ));
261+ else if (OR .matcher (LEXEM ).matches ())
262+ return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .OR , LINE_NUMBER , LEXEM ));
263+ else if (NOT .matcher (LEXEM ).matches ())
264+ return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .NOT , LINE_NUMBER , LEXEM ));
265+ else if (XOR .matcher (LEXEM ).matches ())
266+ return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .XOR , LINE_NUMBER , LEXEM ));
267+ else
268+ return gather_ID_chars (LINE_NUMBER , LEXEME_START .append (placeholder ), INPUT );
269+ }
270+ //check for restricted char
271+ else if (RESTRICTED_CHARS .contains (placeholder ))
272+ {
273+ final String LEXEM = LEXEME_START .toString ();
274+
275+ if (AND .matcher (LEXEM ).matches ())
276+ return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .AND , LINE_NUMBER , LEXEM ));
277+ else if (OR .matcher (LEXEM ).matches ())
278+ return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .OR , LINE_NUMBER , LEXEM ));
279+ else if (NOT .matcher (LEXEM ).matches ())
280+ return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .NOT , LINE_NUMBER , LEXEM ));
281+ else if (XOR .matcher (LEXEM ).matches ())
282+ return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .XOR , LINE_NUMBER , LEXEM ));
283+ else
284+ return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .ID , LINE_NUMBER , LEXEM ));
285+ }
286+ //keep building
287+ else
288+ LEXEME_START .append (placeholder );
289+ }
290+
291+ return new Returned_Data (new Token (Token .Lexeme_Types .ID , LINE_NUMBER , LEXEME_START .toString ()));
264292 }
265293
266294 /**
@@ -289,27 +317,25 @@ private static Returned_Data gather_ID_chars(final long LINE_NUMBER, final Strin
289317 }
290318
291319 /**
292- * Subroutine to create a {@link Lexeme_Types#ID} {@link Token} .
320+ * Determines which {@link Token} should be formed given the lexeme provided .
293321 *
294322 * @param LINE_NUMBER of source file being read
295- * @param LEXEME_START is the lexem formed thus far
296- * @param INPUT source to be read from
323+ * @param REMAINDER which is the extra data that should be feed back into {@link #tokenize(long, String, Scanner)}
324+ * @param COMPLETE_LEXEME which is fully formed
297325 * @return the created {@link Token} wrapped in a {@link Returned_Data}
298326 */
299- private static Returned_Data parse_ID (final long LINE_NUMBER , final StringBuilder LEXEME_START , final Scanner INPUT )
327+ private static Returned_Data parse_potentional_ID (final long LINE_NUMBER , final String REMAINDER , final String COMPLETE_LEXEME )
300328 {
301- String placeholder ;
302-
303- while (INPUT .hasNext ())
304- {
305- placeholder = INPUT .next ();
306-
307- if (ID_CHAR_SET .matcher (placeholder ).matches ())
308- LEXEME_START .append (placeholder );
309- else
310- return new Returned_Data (placeholder , new Token (Token .Lexeme_Types .ID , LINE_NUMBER , LEXEME_START .toString ()));
311- }
312-
313- return new Returned_Data (new Token (Token .Lexeme_Types .ID , LINE_NUMBER , LEXEME_START .toString ()));
329+ //Test lexeme for special sequences, if all fail then is in fact ID.
330+ if (TREE_START .matcher (COMPLETE_LEXEME ).matches ())
331+ return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .TREE_START , LINE_NUMBER , COMPLETE_LEXEME ));
332+ else if (DISPLAY_START .matcher (COMPLETE_LEXEME ).matches ())
333+ return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .DISPLAY_START , LINE_NUMBER , COMPLETE_LEXEME ));
334+ else if (PROBABILITY_START .matcher (COMPLETE_LEXEME ).matches ())
335+ return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .PROBABILITY_START , LINE_NUMBER , COMPLETE_LEXEME ));
336+ else if (DECK_START .matcher (COMPLETE_LEXEME ).matches ())
337+ return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .DECK_START , LINE_NUMBER , COMPLETE_LEXEME ));
338+ else
339+ return new Returned_Data (REMAINDER , new Token (Token .Lexeme_Types .ID , LINE_NUMBER , COMPLETE_LEXEME ));
314340 }
315341}
0 commit comments