grc-cohort-21 · maple-johnson · Jan 21, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # ramblebot
 
+THIS IS A TEST CHANGE SETTING UP GIT
+
 A project to exercise Java, JUnit, git, GitHub, and code-reading skills. Students will create a language model to generate text.
 
 ## Expectations

diff --git a/sherlockHolmesTraining.txt b/sherlockHolmesTraining.txt
diff --git a/src/LowercaseSentenceTokenizer.java b/src/LowercaseSentenceTokenizer.java
@@ -1,11 +1,13 @@
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Scanner;
 
 /**
  * A tokenizer that converts text input to lowercase and splits it 
  * into a list of tokens, where each token is either a word or a period.
  */
-public class LowercaseSentenceTokenizer implements Tokenizer {
+public class LowercaseSentenceTokenizer implements Tokenizer 
+{
   /**
    * Tokenizes the text from the given Scanner. The method should 
    * convert the text to lowercase and split it into words and periods.
@@ -28,9 +30,33 @@ public class LowercaseSentenceTokenizer implements Tokenizer {
    * @param scanner the Scanner to read the input text from
    * @return a list of tokens, where each token is a word or a period
    */
-  public List<String> tokenize(Scanner scanner) {
+  public List<String> tokenize(Scanner scanner) 
+  {
     // TODO: Implement this function to convert the scanner's input to a list of words and periods
-    return null;
+    List<String> tokens = new ArrayList<String>();
+
+    // While there are new items to scan 
+    while (scanner.hasNext()) 
+     {
+          // Add the next item to a String variable
+          String punctuationCheck = scanner.next().toLowerCase();
+
+          // If it ends with punctuation, separate them as two items
+          if (punctuationCheck.endsWith(".") || punctuationCheck.endsWith("?") || punctuationCheck.endsWith("!")) 
+          {
+               tokens.add(punctuationCheck.substring(0, punctuationCheck.length() - 1));
+               tokens.add(punctuationCheck.substring(punctuationCheck.length() - 1));
+          }
+          else // Otherwise, just add the next item
+          {
+               tokens.add(punctuationCheck);
+          }
+
+     }
+
+    // Return the new List
+    return tokens;
   }
+
 }
 
diff --git a/src/LowercaseSentenceTokenizerTest.java b/src/LowercaseSentenceTokenizerTest.java
@@ -16,9 +16,18 @@ void testTokenizeWithNoCapitalizationOrPeriod() {
     }
 
     // Wave 2
-    /*
-     * Write your test here!
-     */
+    @Test
+    void testTokenizeSpaces()
+    {
+          //  Arrange
+          LowercaseSentenceTokenizer tokenizer = new LowercaseSentenceTokenizer();
+          Scanner scanner = new Scanner("hello     hi hi hi    hello hello");
+          // Act
+          List<String> tokens = tokenizer.tokenize(scanner);
+          // Assert
+          assertEquals(List.of("hello", "hi", "hi", "hi", "hello", "hello"), tokens);
+
+    }
 
 
     // Wave 3

diff --git a/src/RambleApp.java b/src/RambleApp.java
@@ -1,5 +1,7 @@
 import java.io.File;
 import java.io.FileNotFoundException;
+import java.io.FileWriter;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Scanner;
@@ -113,6 +115,18 @@ private void generateText(int numWords, String filename) {
         List<String> context = new ArrayList<>();
         context.add(tokens.get(0));
         System.out.print(context.get(0)); // Print the first word
+
+        //File initialization/writing the first word
+        try 
+        {
+          FileWriter outputFile = new FileWriter("rambleOutput.txt");
+          outputFile.write(context.get(0));
+          outputFile.close();
+        } 
+        catch (IOException e) 
+        {
+          e.getStackTrace();
+        }
 
         for (int i = 1; i < numWords; i++) {
             String nextWord = predictor.predictNextWord(context);
@@ -123,7 +137,38 @@ private void generateText(int numWords, String filename) {
                 System.out.println("If you have implemented it, there's a bug in your code where it's returning null for a prediction.");
                 break;
             }
-            System.out.print(" " + nextWord);
+            // Adjusted output to account for sentence ending punctuation.
+            if (nextWord.equals(".") || nextWord.equals("?") || nextWord.equals("!")) 
+            {
+               System.out.println(nextWord);
+            }
+            else
+            {
+               System.out.print(" " + nextWord);
+            }
+
+            //Appending all additional words to the rambleOutput.txt file
+            try 
+            {
+               // https://stackoverflow.com/questions/2885173/how-do-i-create-a-file-and-write-to-it
+               // Used the FileWriter section of the post by Derek Hill to get how to write to a file without overwriting.
+               FileWriter outputFile = new FileWriter("rambleOutput.txt", true); // Append true to continue writing to a file
+
+               if (nextWord.equals(".") || nextWord.equals("?") || nextWord.equals("!")) 
+               {
+                    outputFile.write(nextWord);
+               }
+               else
+               {
+                    outputFile.write(" " + nextWord);
+               }
+
+               outputFile.close();
+            } 
+            catch (IOException e) 
+            {
+               e.getStackTrace();
+            }
 
             // Update the context with the next word
             context.add(nextWord);

diff --git a/src/UnigramWordPredictor.java b/src/UnigramWordPredictor.java
@@ -9,7 +9,8 @@
  * The model is trained on input text and maps each word to a list of 
  * words that directly follow it in the text.
  */
-public class UnigramWordPredictor implements WordPredictor {
+public class UnigramWordPredictor implements WordPredictor 
+{
   private Map<String, List<String>> neighborMap;
   private Tokenizer tokenizer;
 
@@ -18,7 +19,8 @@ public class UnigramWordPredictor implements WordPredictor {
    * 
    * @param tokenizer the tokenizer used to process the input text
    */
-  public UnigramWordPredictor(Tokenizer tokenizer) {
+  public UnigramWordPredictor(Tokenizer tokenizer) 
+  {
     this.tokenizer = tokenizer;
   }
 
@@ -48,10 +50,41 @@ public UnigramWordPredictor(Tokenizer tokenizer) {
    * 
    * @param scanner the Scanner to read the training text from
    */
-  public void train(Scanner scanner) {
+  public void train(Scanner scanner) 
+  {
     List<String> trainingWords = tokenizer.tokenize(scanner);
 
-    // TODO: Convert the trainingWords into neighborMap here
+    // Map
+    Map<String, List<String>> prepNeighborMap = new HashMap<String, List<String>>();
+
+    // Go through each token in trainingWords
+    for (int i = 0; i < trainingWords.size() -1; i++)
+    {
+          // Test for null
+          if (trainingWords.get(i + 1) == null) 
+          {
+               break;
+          }
+          else if (!prepNeighborMap.containsKey(trainingWords.get(i))) // If the token from trainingWords doesn't exist as a key in prepNeighborMap, create one  
+          {
+               List<String> addList = new ArrayList<String>();
+               addList.add(trainingWords.get(i + 1)); // Add the next token from trainingWords to a new list
+
+               prepNeighborMap.put(trainingWords.get(i), addList);
+          }
+          else // If the token from trainingWords exists as a key in prepNeighborMap, add new token to list
+          {
+              List<String> addList = prepNeighborMap.get(trainingWords.get(i)); // Create a new list from the existing list based on the key
+              addList.add(trainingWords.get(i + 1)); // Add new list item
+
+              prepNeighborMap.put(trainingWords.get(i), addList);
+          }
+
+    }
+
+    // Put the completed map into neighborMap
+    this.neighborMap = prepNeighborMap;
+
   }
 
   /**
@@ -98,10 +131,17 @@ public void train(Scanner scanner) {
    * @param context a list of words representing the current context
    * @return the predicted next word, or null if no prediction can be made
    */
-  public String predictNextWord(List<String> context) {
-    // TODO: Return a predicted word given the words preceding it
-    // Hint: only the last word in context should be looked at
-    return null;
+  public String predictNextWord(List<String> context) 
+  {
+    // Generating list of next word options
+    List<String> contextList = neighborMap.get(context.getLast());
+
+    // Creating a random num generator based on the number of next word options
+    int randNum = (int)Math.floor(Math.random() * contextList.size());
+
+    // Returning decided next word
+     return contextList.get(randNum);
+
   }
 
   /**