grc-cohort-21 · abdirashidexe · Jan 21, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 29, 2025
diff --git a/README.md b/README.md
@@ -43,7 +43,7 @@ This is a large, difficult project. Start early, and get help when you need it.
 Sometimes this button takes a little bit to show up when you first open VS Code. If you're not seeing it, make sure you have the Java extension pack installed and it is active.
 1. It should ask you for a filename. Give it the following filename:
     ```
-    wikipediaData.txt
+    keatsTraining.txt
     ```
     Then hit enter.
 1. It should ask you for a number of words. Enter a positive integer and hit enter.
@@ -57,7 +57,7 @@ Sometimes this button takes a little bit to show up when you first open VS Code.
 1. Open the testing side panel by clicking on the beaker on the left of your screen. ![Test Runner Sidebar in VS Code](images/test_runner.png)
 1. Hover over `ramblebot`. A few grey triangles should appear. Click the triangle the furthest to the left.
 1. You should expect to see all the tests fail. This is good! You haven't written your solution yet, so it's expected for them to fail.
-1. Validate that you can push to your repo by making any change to this README, adding, committing, and pushing it.
+1. Validate that you can push to your repo by making any change to this README, adding, committing, and pushing it...
 
 ## Understanding the Project
 

diff --git a/src/LowercaseSentenceTokenizer.java b/src/LowercaseSentenceTokenizer.java
@@ -1,3 +1,4 @@
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Scanner;
 
@@ -30,7 +31,37 @@ public class LowercaseSentenceTokenizer implements Tokenizer {
    */
   public List<String> tokenize(Scanner scanner) {
     // TODO: Implement this function to convert the scanner's input to a list of words and periods
-    return null;
-  }
-}
+    // 1: Make a list for tokens (String)
+    List<String> tokensList = new ArrayList<String>();
+
+    // 2: Grab each word in the text and put into list
+    while (scanner.hasNextLine())
+    { 
+      // if word ends with a period, split it between the period and the letter before it -
+
+      /*if (scanner.next().endsWith("."))
+      {
+        String[] wordArray = scanner.next().split("");
+        if (wordArray[wordArray.length-1] == ".")
+        {
+          tokensList.add(scanner.next().toLowerCase());
+        }
+      }
+      */
+      String word = scanner.next();
 
+      if (word.endsWith("."))
+      {
+        String wordWithoutPeriod = word.replace(".", "");
+        tokensList.add(wordWithoutPeriod);
+        tokensList.add(".");
+      }
+      else
+      {
+        tokensList.add(word.toLowerCase());
+      }
+    }
+
+    return tokensList;
+  }
+}
diff --git a/src/LowercaseSentenceTokenizerTest.java b/src/LowercaseSentenceTokenizerTest.java
@@ -17,8 +17,19 @@ void testTokenizeWithNoCapitalizationOrPeriod() {
 
     // Wave 2
     /*
-     * Write your test here!
+     * Write your test here! everything works for initial commit
      */
+    @Test
+    void testCodeHandlesInputWithManyCase() 
+    {
+        // Arrange
+        LowercaseSentenceTokenizer tokenizer = new LowercaseSentenceTokenizer();
+        Scanner scanner = new Scanner("hi hi hi");
+        List<String> tokens = tokenizer.tokenize(scanner);
+
+        // Act & Assert
+        assertEquals(List.of("hi", "hi", "hi"), tokens);
+    }
 
 
     // Wave 3

diff --git a/src/UnigramWordPredictor.java b/src/UnigramWordPredictor.java
@@ -3,6 +3,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Scanner;
+import java.util.Random;
 
 /**
  * A class for predicting the next word in a sequence using a unigram model.
@@ -52,6 +53,26 @@ public void train(Scanner scanner) {
     List<String> trainingWords = tokenizer.tokenize(scanner);
 
     // TODO: Convert the trainingWords into neighborMap here
+    neighborMap = new HashMap<String, List<String>>(); 
+
+    for (int i=0; i < trainingWords.size()-1; i++)
+    {
+
+      List<String> wordFollowUpList = new ArrayList<String>();
+
+      if (!neighborMap.containsKey(trainingWords.get(i)))
+      {                                  
+        wordFollowUpList.add(trainingWords.get(i+1));
+        neighborMap.put(trainingWords.get(i), wordFollowUpList);
+      }
+      else
+      {
+        List<String> currentWordsList = neighborMap.get(trainingWords.get(i));
+        currentWordsList.add(trainingWords.get(i+1));
+        neighborMap.put(trainingWords.get(i), currentWordsList);
+      }
+    }
+    System.out.println(neighborMap);
   }
 
   /**
@@ -101,7 +122,34 @@ public void train(Scanner scanner) {
   public String predictNextWord(List<String> context) {
     // TODO: Return a predicted word given the words preceding it
     // Hint: only the last word in context should be looked at
-    return null;
+
+    // if number of words to generate is 1, "upon" will be generated
+    String startingWord = "";
+    List<String> temp = new ArrayList<String>();
+    temp.addAll(getNeighborMap().keySet());
+    startingWord = temp.get(0);
+    System.out.println("TEMP: " + temp);
+    System.out.println("STARTING WORD: " + startingWord);
+
+    context = getNeighborMap().get(startingWord); // build off the fact that index0 is always gonna be first
+    System.out.println("CURRENT CONTEXT: " + context);
+
+    int max = context.size()-1;
+    int min = 0;
+    Random r = new Random();
+    int randomNum = r.nextInt(max-min+1) + min;
+
+    String randomWord = "";
+
+    randomWord = context.get(randomNum);
+    context = getNeighborMap().get(randomWord);
+
+    System.out.println(" | next word: " + randomWord);
+    System.out.println(" | next context: " + context);
+    System.out.println(" | size of context: " + context.size());
+
+    return randomWord;
+    //return randomWord; // "upon" is still generated with 1, even when null.
   }
 
   /**

diff --git a/training.txt b/training.txt
@@ -0,0 +1 @@
+Hello world. This is Dr.Smith's hello example.