-
Notifications
You must be signed in to change notification settings - Fork 27
ramblebot PR #25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
ramblebot PR #25
Changes from all commits
475abfb
da3ab07
285ba24
2a692ff
7acbeed
153f378
ccc7364
8efc60e
a38558b
bc9d2eb
d1bba9b
37140a2
7e23621
7651ae1
8018b64
c0c1399
d6507be
1a2d855
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.Scanner; | ||
|
|
||
|
|
@@ -30,7 +31,37 @@ public class LowercaseSentenceTokenizer implements Tokenizer { | |
| */ | ||
| public List<String> tokenize(Scanner scanner) { | ||
| // TODO: Implement this function to convert the scanner's input to a list of words and periods | ||
| return null; | ||
| } | ||
| } | ||
| // 1: Make a list for tokens (String) | ||
| List<String> tokensList = new ArrayList<String>(); | ||
|
|
||
| // 2: Grab each word in the text and put into list | ||
| while (scanner.hasNextLine()) | ||
| { | ||
| // if word ends with a period, split it between the period and the letter before it - | ||
|
|
||
| /*if (scanner.next().endsWith(".")) | ||
| { | ||
| String[] wordArray = scanner.next().split(""); | ||
| if (wordArray[wordArray.length-1] == ".") | ||
| { | ||
| tokensList.add(scanner.next().toLowerCase()); | ||
| } | ||
| } | ||
| */ | ||
| String word = scanner.next(); | ||
|
|
||
| if (word.endsWith(".")) | ||
| { | ||
| String wordWithoutPeriod = word.replace(".", ""); | ||
| tokensList.add(wordWithoutPeriod); | ||
| tokensList.add("."); | ||
|
Comment on lines
+53
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice logic! |
||
| } | ||
| else | ||
| { | ||
| tokensList.add(word.toLowerCase()); | ||
| } | ||
| } | ||
|
|
||
| return tokensList; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,8 +17,19 @@ void testTokenizeWithNoCapitalizationOrPeriod() { | |
|
|
||
| // Wave 2 | ||
| /* | ||
| * Write your test here! | ||
| * Write your test here! everything works for initial commit | ||
| */ | ||
| @Test | ||
| void testCodeHandlesInputWithManyCase() | ||
| { | ||
| // Arrange | ||
| LowercaseSentenceTokenizer tokenizer = new LowercaseSentenceTokenizer(); | ||
| Scanner scanner = new Scanner("hi hi hi"); | ||
| List<String> tokens = tokenizer.tokenize(scanner); | ||
|
|
||
| // Act & Assert | ||
| assertEquals(List.of("hi", "hi", "hi"), tokens); | ||
| } | ||
|
Comment on lines
+22
to
+32
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't quite test what we were looking for. We wanted to see whether your code could handle multiple spaces in a row, e.g. |
||
|
|
||
|
|
||
| // Wave 3 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Scanner; | ||
| import java.util.Random; | ||
|
|
||
| /** | ||
| * A class for predicting the next word in a sequence using a unigram model. | ||
|
|
@@ -52,6 +53,26 @@ public void train(Scanner scanner) { | |
| List<String> trainingWords = tokenizer.tokenize(scanner); | ||
|
|
||
| // TODO: Convert the trainingWords into neighborMap here | ||
| neighborMap = new HashMap<String, List<String>>(); | ||
|
|
||
| for (int i=0; i < trainingWords.size()-1; i++) | ||
| { | ||
|
|
||
| List<String> wordFollowUpList = new ArrayList<String>(); | ||
|
|
||
| if (!neighborMap.containsKey(trainingWords.get(i))) | ||
| { | ||
| wordFollowUpList.add(trainingWords.get(i+1)); | ||
| neighborMap.put(trainingWords.get(i), wordFollowUpList); | ||
| } | ||
| else | ||
| { | ||
| List<String> currentWordsList = neighborMap.get(trainingWords.get(i)); | ||
| currentWordsList.add(trainingWords.get(i+1)); | ||
| neighborMap.put(trainingWords.get(i), currentWordsList); | ||
| } | ||
|
Comment on lines
+63
to
+73
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice logic! |
||
| } | ||
| System.out.println(neighborMap); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -101,7 +122,34 @@ public void train(Scanner scanner) { | |
| public String predictNextWord(List<String> context) { | ||
| // TODO: Return a predicted word given the words preceding it | ||
| // Hint: only the last word in context should be looked at | ||
| return null; | ||
|
|
||
| // if number of words to generate is 1, "upon" will be generated | ||
| String startingWord = ""; | ||
| List<String> temp = new ArrayList<String>(); | ||
| temp.addAll(getNeighborMap().keySet()); | ||
| startingWord = temp.get(0); | ||
| System.out.println("TEMP: " + temp); | ||
| System.out.println("STARTING WORD: " + startingWord); | ||
|
Comment on lines
+127
to
+132
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like where you're headed with this, but I think there might be some misunderstanding about what context represents. Always feel free to come by tutoring or office hours if you're unsure! |
||
|
|
||
| context = getNeighborMap().get(startingWord); // build off the fact that index0 is always gonna be first | ||
| System.out.println("CURRENT CONTEXT: " + context); | ||
|
|
||
| int max = context.size()-1; | ||
| int min = 0; | ||
| Random r = new Random(); | ||
| int randomNum = r.nextInt(max-min+1) + min; | ||
|
|
||
| String randomWord = ""; | ||
|
|
||
| randomWord = context.get(randomNum); | ||
| context = getNeighborMap().get(randomWord); | ||
|
|
||
| System.out.println(" | next word: " + randomWord); | ||
| System.out.println(" | next context: " + context); | ||
| System.out.println(" | size of context: " + context.size()); | ||
|
|
||
| return randomWord; | ||
| //return randomWord; // "upon" is still generated with 1, even when null. | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Hello world. This is Dr.Smith's hello example. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remember to delete unneeded comments once you're done with them