Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
data/
target/
.DS_Store
.classpath
.project
.settings/
.vscode/
src/.DS_Store
10 changes: 6 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
<version>0.9</version>
</dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-mr</artifactId>
<version>0.13.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down
3 changes: 2 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ This repo contains several common big data exercises.

## Setup

1. Install the JDK 7.0
1. Install the JDK 17
2. [Download & Install Maven](http://maven.apache.org/download.cgi)
3. Add `movies.txt.gz` file to `/data` folder on root.


## How to run tests
Expand Down
160 changes: 160 additions & 0 deletions src/main/java/nearsoft/academy/MovieRecommender.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
package nearsoft.academy;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;

import org.apache.log4j.BasicConfigurator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

public class MovieRecommender {
final String CSV_PATH = "data/dataset.csv";
int totalUsers = 0;
int totalProducts = 0;
int totalReviews = 0;

private Map<String, Integer> users= new HashMap<String, Integer>();
private Map<String, Integer> products = new HashMap<String, Integer>();
private Map<Long, String> productsReverse = new HashMap<Long, String>();
private UserBasedRecommender recommender;

public MovieRecommender(String datasetPath) {
BasicConfigurator.configure();
try {
BufferedReader file = this.readGZFile(datasetPath);
processFileData(file);
DataModel model = new FileDataModel(new File(CSV_PATH));
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
} catch (Exception e) {
System.err.println(e.getLocalizedMessage());
}

return;
}

private BufferedReader readGZFile(String datasetPath) throws IOException {
FileInputStream file = new FileInputStream(datasetPath);
GZIPInputStream gz = new GZIPInputStream(file);
InputStreamReader reader = new InputStreamReader(gz);
BufferedReader br = new BufferedReader(reader);
return br;
}

private void processFileData(BufferedReader file) throws IOException {
List<String> requiredFields = Arrays.asList("review/userId:", "product/productId:", "review/score:");

String user = null;
String product = null;
String review = null;
String data;
int dataCount = 0;

File csvFile = new File(CSV_PATH);

if (csvFile.exists()) {
csvFile.delete();
}

FileWriter fileWriter = new FileWriter(csvFile);

String line = file.readLine();
String[] split;
while (line != null) {
if (dataCount == 3) {
dataCount = 0;
data = this.users.get(user) + "," + this.products.get(product) + "," + review + "\n";
fileWriter.write(data);
}
split = line.split(" ");
if (requiredFields.contains(split[0])) {
switch (split[0]) {
case "review/userId:":
user = split[1];
dataCount++;
addUserCount(user);
break;
case "product/productId:":
product = split[1];
dataCount++;
addProductCount(product);
break;
case "review/score:":
review = split[1];
dataCount++;
this.totalReviews++;
break;
}
}
line = file.readLine();
}
fileWriter.close();
return;
}

private void addUserCount(String user) {
if (!this.users.containsKey(user)) {
this.users.put(user, this.totalUsers);
this.totalUsers++;
}
return;
}

private void addProductCount(String product) {
if (!this.products.containsKey(product)) {
this.products.put(product, this.totalProducts);
this.productsReverse.put(Long.valueOf(this.totalProducts), product);
this.totalProducts++;
}
return;
}

public int getTotalReviews() {
return this.totalReviews;
}

public int getTotalProducts() {
return this.products.size();
}

public int getTotalUsers() {
return this.users.size();
}

public List<String> getRecommendationsForUser(String user) {
List<RecommendedItem> rec;
List<String> res = new ArrayList<String>();
String product;
try {
rec = recommender.recommend(this.users.get(user), 3);
} catch (Exception e) {
System.err.println(e.getLocalizedMessage());
return null;
}

for (RecommendedItem recommendation : rec) {
product = this.productsReverse.get(recommendation.getItemID());
res.add(product);
}

return res;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.junit.Test;

import nearsoft.academy.MovieRecommender;

import java.io.IOException;
import java.util.List;

Expand All @@ -15,7 +17,7 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
MovieRecommender recommender = new MovieRecommender("data/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
Expand Down