diff --git a/README.md b/README.md index e57375e..a386b0e 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A project to exercise Java, JUnit, git, GitHub, and code-reading skills. Student ### Academic Honesty -THIS IS AN INDIVIDUAL PROJECT. The following is not allowed: +THIS IS AN INDIVIDUAL PROJECT. The following is not allowed: - You MAY NOT copy any code from an AI. - You MAY NOT paste any of the project or your code into an AI. - You MAY NOT copy another student's code. @@ -20,7 +20,7 @@ You may: ### Commits -YOU ARE EXPECTED TO MAKE SMALL, FREQUENT COMMITS. Doing so is good practice and helps me see that it's less likely you pasted in a large part of your solution from elsewhere. +YOU ARE EXPECTED TO MAKE SMALL, FREQUENT COMMITS. Doing so is good practice and helps me see that it's less likely you pasted in a large part of your solution from elsewhere. (Changing Read me for the first commit) ### Timeline This is a large, difficult project. Start early, and get help when you need it. diff --git a/goodMusic.txt b/goodMusic.txt new file mode 100644 index 0000000..78a08a3 --- /dev/null +++ b/goodMusic.txt @@ -0,0 +1,89 @@ +There was a blooming spider +Went up a blooming spout +And down came the rain +And washed the spider out +Out came the sun +And dried up all the rain +But that bloody blooming son of a gun +Went up that spout again +Humpty Dumpty sat on a wall, +Humpty Dumpty had a great fall. +All the king’s horses and all the king’s men +Couldn’t put Humpty together again. +The wheels on the bus go round and round +Round and round, round and round +The wheels on the bus go round and round +All through the town +The wipers on the bus go “Swish, swish, swish, +Swish, swish, swish, swish, swish, swish” +The wipers on the bus go “Swish, swish, swish” +All through the town. +The people on the bus go, “chat, chat, chat, +cha,,chat chat,chat chat ,chat +The people on the bus go, “, chat,chat,chat +All through the town. +The horn on the bus go “Beep, beep, beep +Beep, beep, beep, beep, beep, beep” +The horn on the bus go “Beep, beep, beep” +All through the town. +The wheels on the bus go round and round +Round and round, round and round +The wheels on the bus go round and round +All through the town +The wipers on the bus go “Swish, swish, swish, +Swish, swish, swish, swish, swish, swish” +The wipers on the bus go “Swish, swish, swish” +All through the town. +The people on the bus go, “chat, chat, chat, +cha,,chat chat,chat chat ,chat +The people on the bus go, “, chat,chat,chat +All through the town. +The horn on the bus go “Beep, beep, beep +Beep, beep, beep, beep, beep, beep” +The horn on the bus go “Beep, beep, beep” +All through the town. +As your bright and tiny spark, +Lights the traveller in the dark. +Though I know not what you are, +Twinkle, twinkle, little star. +Twinkle, twinkle, little star. +How I wonder what you are. +Up above the world so high, +Like a diamond in the sky. +Twinkle, twinkle, little star. +How I wonder what you are. +How I wonder what you are. +Old MacDonald had a farm +Ee i ee i o +And on his farm he had some cows +Ee i ee i oh +With a moo-moo here +And a moo-moo there +Here a moo, there a moo +Everywhere a moo-moo +Old MacDonald had a farm +Ee i ee i o +Old MacDonald had a farm +Ee i ee i o +And on his farm he had some chicks +Ee i ee i o +With a cluck-cluck here +And a cluck-cluck there +Here a cluck, there a cluck +Everywhere a cluck-cluck +Old MacDonald had a farm +Ee i ee i o +Old MacDonald had a farm +Ee i ee i o +And on his farm he had some pigs +Ee i ee i o +With an oink-oink here +And an oink-oink there +Here an oink, there an oink +Everywhere an oink-oink +Old MacDonald had a farm +Ee i ee i o +London Bridge is falling down, +Falling down, falling down, +London Bridge is falling down, +My fair lady \ No newline at end of file diff --git a/src/LowercaseSentenceTokenizer.java b/src/LowercaseSentenceTokenizer.java index cc8285d..4afdfec 100644 --- a/src/LowercaseSentenceTokenizer.java +++ b/src/LowercaseSentenceTokenizer.java @@ -1,6 +1,21 @@ +import java.util.ArrayList; import java.util.List; import java.util.Scanner; +/** + * @author Shawn Nguru + * SDEV 301 RambleBot + * 1-21-25 + * + * Problems/Bugs/Issues: + * + * + * Notes/plans: + * Iterate through the String (1) + * split(String regex) a Split method by the spaces and symbols (2) + * Insert the strings into a list of strings (3) + */ + /** * A tokenizer that converts text input to lowercase and splits it * into a list of tokens, where each token is either a word or a period. @@ -29,8 +44,35 @@ public class LowercaseSentenceTokenizer implements Tokenizer { * @return a list of tokens, where each token is a word or a period */ public List tokenize(Scanner scanner) { - // TODO: Implement this function to convert the scanner's input to a list of words and periods - return null; + List textList =new ArrayList(); + + String text = scanner.nextLine(); //put the input text in a string array + + String[] arr = text.split(" "); //split the array based on spaces in the text + + for(int i = 0; i<= arr.length-1; i++) //add the content of the array into the list of strings + { + + + + if(arr[i] == " " || arr[i] == " " || arr[i] == "") //filtering out the extra spaces inside the arrayList + { + + } + else if(arr[i].endsWith(".")) + { + arr[i] = arr[i].replace('.', ' '); + textList.add(arr[i].trim().toLowerCase()); + textList.add("."); + } + else + { + textList.add(arr[i].toLowerCase()); + } + } + + +return textList; } } diff --git a/src/LowercaseSentenceTokenizerTest.java b/src/LowercaseSentenceTokenizerTest.java index 85ac3a2..6088549 100644 --- a/src/LowercaseSentenceTokenizerTest.java +++ b/src/LowercaseSentenceTokenizerTest.java @@ -7,7 +7,8 @@ class LowercaseSentenceTokenizerTest { // Wave 1 @Test - void testTokenizeWithNoCapitalizationOrPeriod() { + void testTokenizeWithNoCapitalizationOrPeriod() + { LowercaseSentenceTokenizer tokenizer = new LowercaseSentenceTokenizer(); Scanner scanner = new Scanner("this is a lowercase sentence without a period"); List tokens = tokenizer.tokenize(scanner); @@ -16,10 +17,15 @@ void testTokenizeWithNoCapitalizationOrPeriod() { } // Wave 2 - /* - * Write your test here! - */ - + @Test + void testTokenizeWithMultipleSpaces() + { + LowercaseSentenceTokenizer tokenizer = new LowercaseSentenceTokenizer(); + Scanner scanner = new Scanner("hello hi hi hi hello hello"); + List tokens = tokenizer.tokenize(scanner); + + assertEquals(List.of("hello","hi", "hi","hi", "hello","hello"), tokens); + } // Wave 3 @Test diff --git a/src/UnigramWordPredictor.java b/src/UnigramWordPredictor.java index d713250..54e691c 100644 --- a/src/UnigramWordPredictor.java +++ b/src/UnigramWordPredictor.java @@ -1,7 +1,15 @@ +/** + * @author Shawn Nguru + * SDEV 301 RambleBot + * 1-21-25 + * + */ + import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.Scanner; /** @@ -33,6 +41,11 @@ public UnigramWordPredictor(Tokenizer tokenizer) { * If the input text is: "The cat sat. The cat slept. The dog barked." * After tokenizing, the tokens would be: ["the", "cat", "sat", ".", "the", "cat", "slept", ".", "the", "dog", "barked", "."] * + * key: "the" value: ["cat","cat","dog"] + * key: "cat" value: ["sat", "slept"] + * + * + * * The resulting map (neighborMap) would be: * { * "the" -> ["cat", "cat", "dog"], @@ -50,11 +63,49 @@ public UnigramWordPredictor(Tokenizer tokenizer) { */ public void train(Scanner scanner) { List trainingWords = tokenizer.tokenize(scanner); + List valueWords = new ArrayList<>(); + neighborMap = new HashMap<>(); - // TODO: Convert the trainingWords into neighborMap here + String keyText = ""; + for(int i = 0; i < trainingWords.size(); i++) + { + keyText = trainingWords.get(i); + for(int j= 0; j < trainingWords.size()-1; j++) + { + if(keyText.equals(trainingWords.get(j))) + { + valueWords.add(trainingWords.get(j+1)); + } + } + neighborMap.put(trainingWords.get(i), valueWords); + valueWords = new ArrayList(); + } } /** + * + * + the quick fox the slow dog the slow cat +predictor.train() + | + v +this.neighborMap = { + "the": ["quick", "slow", "slow"], + "quick": ["fox"] + "slow": ["dog", "cat"] +} + +--------------- + +predictor.predictNextWord(["I", "saw", "the"]) +followingWords = neighborMap.get("the") -> ["quick", "slow", "slow"] + +rng(3) -> # from [0-2] +randomNumber = rng(3) +followingWords.get(randomNumber) -> ? + * + * * + * * Predicts the next word based on the given context. * The prediction is made by randomly selecting from all words * that follow the last word in the context in the training data. @@ -73,6 +124,8 @@ public void train(Scanner scanner) { * "barked" -> ["."] * } * + * + * * When predicting the next word given a context, the predictor should use * the neighbor map to select a word based on the observed frequencies in * the training data. For example: @@ -98,10 +151,36 @@ public void train(Scanner scanner) { * @param context a list of words representing the current context * @return the predicted next word, or null if no prediction can be made */ - public String predictNextWord(List context) { + public String predictNextWord(List context) + { // TODO: Return a predicted word given the words preceding it - // Hint: only the last word in context should be looked at - return null; + /** + * + */ + + String probableWord = ""; + + String text = context.get(context.size()-1); //getting the last piece of text in the context + + List probaleList = new ArrayList<>(); //having the value of the list equal another arraylist + + for (String key : neighborMap.keySet()) //looping through the map to get the most likely list + { + if(text == key) + { + probaleList = neighborMap.get(key); + } + } + + Random wordProb = new Random(); //instantiating a random number generator + int max = probaleList.size()-1; + + for (int i = 0; i < probaleList.size(); i++) //having the returned probable word return the most likely word from the random number + { + probableWord = probaleList.get(wordProb.nextInt(max - 0 + 1)); + } + + return probableWord; } /** diff --git a/src/UnigramWordPredictorTest.java b/src/UnigramWordPredictorTest.java index 08618a3..b3ad797 100644 --- a/src/UnigramWordPredictorTest.java +++ b/src/UnigramWordPredictorTest.java @@ -56,6 +56,7 @@ void testTrainAndGetNeighborMap() { // Wave 5 /** + * Tests the predictNextWord method using a different example to verify that the correct word * is predicted based on the training data. *