Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# ramblebot

THIS IS A TEST CHANGE SETTING UP GIT

A project to exercise Java, JUnit, git, GitHub, and code-reading skills. Students will create a language model to generate text.

## Expectations
Expand Down
63,779 changes: 63,779 additions & 0 deletions sherlockHolmesTraining.txt

Large diffs are not rendered by default.

32 changes: 29 additions & 3 deletions src/LowercaseSentenceTokenizer.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;

/**
* A tokenizer that converts text input to lowercase and splits it
* into a list of tokens, where each token is either a word or a period.
*/
public class LowercaseSentenceTokenizer implements Tokenizer {
public class LowercaseSentenceTokenizer implements Tokenizer
{
/**
* Tokenizes the text from the given Scanner. The method should
* convert the text to lowercase and split it into words and periods.
Expand All @@ -28,9 +30,33 @@ public class LowercaseSentenceTokenizer implements Tokenizer {
* @param scanner the Scanner to read the input text from
* @return a list of tokens, where each token is a word or a period
*/
public List<String> tokenize(Scanner scanner) {
public List<String> tokenize(Scanner scanner)
{
// TODO: Implement this function to convert the scanner's input to a list of words and periods
return null;
List<String> tokens = new ArrayList<String>();

// While there are new items to scan
while (scanner.hasNext())
{
// Add the next item to a String variable
String punctuationCheck = scanner.next().toLowerCase();

// If it ends with punctuation, separate them as two items
if (punctuationCheck.endsWith(".") || punctuationCheck.endsWith("?") || punctuationCheck.endsWith("!"))
{
Comment on lines +45 to +46

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool handling of other punctuation!

tokens.add(punctuationCheck.substring(0, punctuationCheck.length() - 1));
tokens.add(punctuationCheck.substring(punctuationCheck.length() - 1));
}
else // Otherwise, just add the next item
{
tokens.add(punctuationCheck);
}

}

// Return the new List
return tokens;
}

}

15 changes: 12 additions & 3 deletions src/LowercaseSentenceTokenizerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,18 @@ void testTokenizeWithNoCapitalizationOrPeriod() {
}

// Wave 2
/*
* Write your test here!
*/
@Test
void testTokenizeSpaces()
{
// Arrange
LowercaseSentenceTokenizer tokenizer = new LowercaseSentenceTokenizer();
Scanner scanner = new Scanner("hello hi hi hi hello hello");
// Act
List<String> tokens = tokenizer.tokenize(scanner);
// Assert
assertEquals(List.of("hello", "hi", "hi", "hi", "hello", "hello"), tokens);

}
Comment on lines +19 to +30

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice test!



// Wave 3
Expand Down
47 changes: 46 additions & 1 deletion src/RambleApp.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
Expand Down Expand Up @@ -113,6 +115,18 @@ private void generateText(int numWords, String filename) {
List<String> context = new ArrayList<>();
context.add(tokens.get(0));
System.out.print(context.get(0)); // Print the first word

//File initialization/writing the first word
try
{
FileWriter outputFile = new FileWriter("rambleOutput.txt");
outputFile.write(context.get(0));
outputFile.close();
}
catch (IOException e)
{
e.getStackTrace();
}

for (int i = 1; i < numWords; i++) {
String nextWord = predictor.predictNextWord(context);
Expand All @@ -123,7 +137,38 @@ private void generateText(int numWords, String filename) {
System.out.println("If you have implemented it, there's a bug in your code where it's returning null for a prediction.");
break;
}
System.out.print(" " + nextWord);
// Adjusted output to account for sentence ending punctuation.
if (nextWord.equals(".") || nextWord.equals("?") || nextWord.equals("!"))
{
System.out.println(nextWord);
}
else
{
System.out.print(" " + nextWord);
}

//Appending all additional words to the rambleOutput.txt file
try
{
// https://stackoverflow.com/questions/2885173/how-do-i-create-a-file-and-write-to-it
// Used the FileWriter section of the post by Derek Hill to get how to write to a file without overwriting.
FileWriter outputFile = new FileWriter("rambleOutput.txt", true); // Append true to continue writing to a file

if (nextWord.equals(".") || nextWord.equals("?") || nextWord.equals("!"))
{
outputFile.write(nextWord);
}
else
{
outputFile.write(" " + nextWord);
}

outputFile.close();
Comment on lines +151 to +166

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fun changes!

}
catch (IOException e)
{
e.getStackTrace();
}

// Update the context with the next word
context.add(nextWord);
Expand Down
56 changes: 48 additions & 8 deletions src/UnigramWordPredictor.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
* The model is trained on input text and maps each word to a list of
* words that directly follow it in the text.
*/
public class UnigramWordPredictor implements WordPredictor {
public class UnigramWordPredictor implements WordPredictor
{
private Map<String, List<String>> neighborMap;
private Tokenizer tokenizer;

Expand All @@ -18,7 +19,8 @@ public class UnigramWordPredictor implements WordPredictor {
*
* @param tokenizer the tokenizer used to process the input text
*/
public UnigramWordPredictor(Tokenizer tokenizer) {
public UnigramWordPredictor(Tokenizer tokenizer)
{
this.tokenizer = tokenizer;
}

Expand Down Expand Up @@ -48,10 +50,41 @@ public UnigramWordPredictor(Tokenizer tokenizer) {
*
* @param scanner the Scanner to read the training text from
*/
public void train(Scanner scanner) {
public void train(Scanner scanner)
{
List<String> trainingWords = tokenizer.tokenize(scanner);

// TODO: Convert the trainingWords into neighborMap here
// Map
Map<String, List<String>> prepNeighborMap = new HashMap<String, List<String>>();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use <> here and Java will infer the type


// Go through each token in trainingWords
for (int i = 0; i < trainingWords.size() -1; i++)
{
// Test for null
if (trainingWords.get(i + 1) == null)
{
break;
}
else if (!prepNeighborMap.containsKey(trainingWords.get(i))) // If the token from trainingWords doesn't exist as a key in prepNeighborMap, create one
{
List<String> addList = new ArrayList<String>();
addList.add(trainingWords.get(i + 1)); // Add the next token from trainingWords to a new list

prepNeighborMap.put(trainingWords.get(i), addList);
}
else // If the token from trainingWords exists as a key in prepNeighborMap, add new token to list
{
List<String> addList = prepNeighborMap.get(trainingWords.get(i)); // Create a new list from the existing list based on the key
addList.add(trainingWords.get(i + 1)); // Add new list item

prepNeighborMap.put(trainingWords.get(i), addList);
}

Comment on lines +61 to +82

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice logic!

}

// Put the completed map into neighborMap
this.neighborMap = prepNeighborMap;

}

/**
Expand Down Expand Up @@ -98,10 +131,17 @@ public void train(Scanner scanner) {
* @param context a list of words representing the current context
* @return the predicted next word, or null if no prediction can be made
*/
public String predictNextWord(List<String> context) {
// TODO: Return a predicted word given the words preceding it
// Hint: only the last word in context should be looked at
return null;
public String predictNextWord(List<String> context)
{
// Generating list of next word options
List<String> contextList = neighborMap.get(context.getLast());

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't know about getLast! Looks like it was introduced in a newer version of Java than my tests use, so that's why the tests aren't working on GitHub. Great job though!


// Creating a random num generator based on the number of next word options
int randNum = (int)Math.floor(Math.random() * contextList.size());

// Returning decided next word
return contextList.get(randNum);

}

/**
Expand Down
Loading