Skip to content

Commit 364ce60

Browse files
committed
Merge branch 'bi' of github.com:ldbc/ldbc_snb_datagen into bi
2 parents 247049b + c240c50 commit 364ce60

File tree

8 files changed

+374
-20
lines changed

8 files changed

+374
-20
lines changed

src/main/java/ldbc/snb/datagen/hadoop/HadoopPersonGenerator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ public void map(LongWritable key, Text value, Context context)
3838
try {
3939
this.keySetter = (HadoopFileKeyChanger.KeySetter) Class.forName(conf.get("postKeySetterName")).newInstance();
4040
} catch(Exception e) {
41-
System.out.println(e.getMessage());
41+
System.err.println("Error when setting key setter");
42+
System.err.println(e.getMessage());
4243
}
4344

4445
int threadId = Integer.parseInt(value.toString());
Lines changed: 217 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,229 @@
11
package ldbc.snb.datagen.test;
22

3+
import ldbc.snb.datagen.test.csv.*;
4+
import org.codehaus.groovy.vmplugin.v5.JUnit4Utils;
5+
import org.junit.Before;
6+
import org.junit.BeforeClass;
37
import org.junit.Test;
8+
import static org.junit.Assert.*;
9+
10+
import java.io.File;
11+
import java.util.ArrayList;
12+
import java.util.List;
13+
import java.util.Set;
414

515
/**
616
* Created by aprat on 18/12/15.
717
*/
818
public class LDBCDatagenTest {
919

20+
final String dir = "./test_data/social_network";
21+
22+
@BeforeClass
23+
public static void generateData() {
24+
ProcessBuilder pb = new ProcessBuilder("java", "-cp", "ldbc_snb_datagen.jar","org.apache.hadoop.util.RunJar","./ldbc_snb_datagen.jar","./test_params.ini");
25+
pb.directory(new File("./"));
26+
try {
27+
Process p = pb.start();
28+
p.waitFor();
29+
30+
}catch(Exception e) {
31+
System.err.println(e.getMessage());
32+
}
33+
}
34+
35+
@Test
36+
public void personTest() {
37+
testIdUniqueness(dir+"/person_0_0.csv", 0);
38+
}
39+
40+
@Test
41+
public void postTest() {
42+
testIdUniqueness(dir+"/post_0_0.csv", 0);
43+
}
44+
45+
@Test
46+
public void forumTest() {
47+
testIdUniqueness(dir+"/forum_0_0.csv", 0);
48+
}
49+
50+
@Test
51+
public void commentTest() {
52+
testIdUniqueness(dir+"/comment_0_0.csv", 0);
53+
}
54+
55+
@Test
56+
public void organisationTest() {
57+
testIdUniqueness(dir+"/organisation_0_0.csv", 0);
58+
}
59+
60+
@Test
61+
public void placeTest() {
62+
testIdUniqueness(dir+"/place_0_0.csv", 0);
63+
}
64+
65+
@Test
66+
public void tagTest() {
67+
testIdUniqueness(dir+"/tag_0_0.csv", 0);
68+
}
69+
1070
@Test
11-
public void dummyTest() {
12-
System.out.println("TEST PASSED");
71+
public void tagclassTest() {
72+
testIdUniqueness(dir+"/tagclass_0_0.csv", 0);
1373
}
74+
75+
@Test
76+
public void personKnowsPersonTest() {
77+
testPairUniquenessPlusExistance(dir+"/person_knows_person_0_0.csv",0,1,dir+"/person_0_0.csv",0);
78+
}
79+
80+
@Test
81+
public void organisationIsLocatedInPlaceTest() {
82+
testPairUniquenessPlusExistance(dir+"/organisation_isLocatedIn_place_0_0.csv",0,1,dir+"/organisation_0_0.csv",0,dir+"/place_0_0.csv",0);
83+
}
84+
85+
@Test
86+
public void placeIsPartOfPlaceTest() {
87+
testPairUniquenessPlusExistance(dir+"/place_isPartOf_place_0_0.csv",0,1,dir+"/place_0_0.csv",0);
88+
}
89+
90+
@Test
91+
public void tagClassIsSubclassOfTest() {
92+
testPairUniquenessPlusExistance(dir+"/tagclass_isSubclassOf_tagclass_0_0.csv",0,1,dir+"/tagclass_0_0.csv",0);
93+
}
94+
95+
@Test
96+
public void tagHasTypeTagclassCheck() {
97+
testPairUniquenessPlusExistance(dir+"/tag_hasType_tagclass_0_0.csv",0,1,dir+"/tag_0_0.csv",0,dir+"/tagclass_0_0.csv",0);
98+
}
99+
100+
@Test
101+
public void personStudyAtOrganisationCheck() {
102+
testPairUniquenessPlusExistance(dir+"/person_studyAt_organisation_0_0.csv",0,1,dir+"/person_0_0.csv",0,dir+"/organisation_0_0.csv",0);
103+
}
104+
105+
@Test
106+
public void personWorkAtOrganisationCheck() {
107+
testPairUniquenessPlusExistance(dir+"/person_workAt_organisation_0_0.csv",0,1,dir+"/person_0_0.csv",0,dir+"/organisation_0_0.csv",0);
108+
}
109+
110+
@Test
111+
public void personHasInterestTagCheck() {
112+
testPairUniquenessPlusExistance(dir+"/person_hasInterest_tag_0_0.csv",0,1,dir+"/person_0_0.csv",0,dir+"/tag_0_0.csv",0);
113+
}
114+
115+
@Test
116+
public void personIsLocatedInPlaceCheck() {
117+
testPairUniquenessPlusExistance(dir+"/person_isLocatedIn_place_0_0.csv",0,1,dir+"/person_0_0.csv",0,dir+"/place_0_0.csv",0);
118+
}
119+
120+
@Test
121+
public void forumHasTagCheck() {
122+
testPairUniquenessPlusExistance(dir+"/forum_hasTag_tag_0_0.csv",0,1,dir+"/forum_0_0.csv",0,dir+"/tag_0_0.csv",0);
123+
}
124+
125+
@Test
126+
public void forumHasModeratorPersonCheck() {
127+
testPairUniquenessPlusExistance(dir+"/forum_hasModerator_person_0_0.csv",0,1,dir+"/forum_0_0.csv",0,dir+"/person_0_0.csv",0);
128+
}
129+
130+
@Test
131+
public void forumHasMemberPersonCheck() {
132+
testPairUniquenessPlusExistance(dir+"/forum_hasMember_person_0_0.csv",0,1,dir+"/forum_0_0.csv",0,dir+"/person_0_0.csv",0);
133+
}
134+
135+
@Test
136+
public void forumContainerOfPostCheck() {
137+
testPairUniquenessPlusExistance(dir+"/forum_containerOf_post_0_0.csv",0,1,dir+"/forum_0_0.csv",0,dir+"/post_0_0.csv",0);
138+
}
139+
140+
@Test
141+
public void commentHasCreatorPersonCheck() {
142+
testPairUniquenessPlusExistance(dir+"/comment_hasCreator_person_0_0.csv",0,1,dir+"/comment_0_0.csv",0,dir+"/person_0_0.csv",0);
143+
}
144+
145+
@Test
146+
public void commentHasTagTagCheck() {
147+
testPairUniquenessPlusExistance(dir+"/comment_hasTag_tag_0_0.csv",0,1,dir+"/comment_0_0.csv",0,dir+"/tag_0_0.csv",0);
148+
}
149+
150+
@Test
151+
public void commentIsLocatedInPlaceCheck() {
152+
testPairUniquenessPlusExistance(dir+"/comment_isLocatedIn_place_0_0.csv",0,1,dir+"/comment_0_0.csv",0,dir+"/place_0_0.csv",0);
153+
}
154+
155+
@Test
156+
public void commentReplyOfCommentCheck() {
157+
testPairUniquenessPlusExistance(dir+"/comment_replyOf_comment_0_0.csv",0,1,dir+"/comment_0_0.csv",0,dir+"/comment_0_0.csv",0);
158+
}
159+
160+
@Test
161+
public void commentReplyOfPostCheck() {
162+
testPairUniquenessPlusExistance(dir+"/comment_replyOf_post_0_0.csv",0,1,dir+"/comment_0_0.csv",0,dir+"/post_0_0.csv",0);
163+
}
164+
165+
@Test
166+
public void postHasCreatorPersonCheck() {
167+
testPairUniquenessPlusExistance(dir+"/post_hasCreator_person_0_0.csv",0,1,dir+"/post_0_0.csv",0,dir+"/person_0_0.csv",0);
168+
}
169+
170+
@Test
171+
public void postIsLocatedInPlaceCheck() {
172+
testPairUniquenessPlusExistance(dir+"/post_isLocatedIn_place_0_0.csv",0,1,dir+"/post_0_0.csv",0,dir+"/place_0_0.csv",0);
173+
}
174+
175+
@Test
176+
public void personLikesCommentCheck() {
177+
testPairUniquenessPlusExistance(dir+"/person_likes_comment_0_0.csv",0,1,dir+"/person_0_0.csv",0,dir+"/comment_0_0.csv",0);
178+
}
179+
180+
@Test
181+
public void personLikesPostCheck() {
182+
testPairUniquenessPlusExistance(dir+"/person_likes_post_0_0.csv",0,1,dir+"/person_0_0.csv",0,dir+"/post_0_0.csv",0);
183+
}
184+
185+
public void testIdUniqueness(String fileName, int column) {
186+
FileChecker fileChecker = new FileChecker(fileName);
187+
UniquenessCheck check = new UniquenessCheck(0);
188+
fileChecker.addCheck(check);
189+
if(!fileChecker.run(1)) assertEquals("ERROR PASSING TEST ID UNIQUENESS FOR FILE "+fileName,true, false);
190+
}
191+
192+
public void testPairUniquenessPlusExistance(String relationFileName, int columnA, int columnB, String entityFileNameA, int entityColumnA, String entityFileNameB, int entityColumnB) {
193+
Set<String> organisations = Column.readColumnAsSet(entityFileNameA,entityColumnA);
194+
Set<String> places = Column.readColumnAsSet(entityFileNameB,entityColumnB);
195+
FileChecker fileChecker = new FileChecker(relationFileName);
196+
PairUniquenessCheck pairUniquenessCheck = new PairUniquenessCheck(0,1);
197+
fileChecker.addCheck(pairUniquenessCheck);
198+
List<Set<String>> entityARefColumns = new ArrayList<Set<String>>();
199+
entityARefColumns.add(organisations);
200+
List<Set<String>> entityBRefColumns = new ArrayList<Set<String>>();
201+
entityBRefColumns.add(places);
202+
List<Integer> organisationIndices = new ArrayList<Integer>();
203+
organisationIndices.add(0);
204+
List<Integer> placeIndices = new ArrayList<Integer>();
205+
placeIndices.add(1);
206+
ExistsCheck existsEntityACheck = new ExistsCheck(organisationIndices, entityARefColumns);
207+
ExistsCheck existsEntityBCheck = new ExistsCheck(placeIndices, entityBRefColumns);
208+
fileChecker.addCheck(existsEntityACheck);
209+
fileChecker.addCheck(existsEntityBCheck);
210+
assertEquals("ERROR PASSING ORGANISATION_ISLOCATEDIN_PLACE TEST",true, fileChecker.run(1));
211+
212+
}
213+
214+
public void testPairUniquenessPlusExistance(String relationFileName, int columnA, int columnB, String entityFileName, int entityColumn) {
215+
Set<String> persons = Column.readColumnAsSet(entityFileName,entityColumn);
216+
FileChecker fileChecker = new FileChecker(relationFileName);
217+
PairUniquenessCheck pairUniquenessCheck = new PairUniquenessCheck(columnA,columnB);
218+
fileChecker.addCheck(pairUniquenessCheck);
219+
List<Set<String>> refcolumns = new ArrayList<Set<String>>();
220+
refcolumns.add(persons);
221+
List<Integer> columnIndices = new ArrayList<Integer>();
222+
columnIndices.add(0);
223+
columnIndices.add(1);
224+
ExistsCheck existsCheck = new ExistsCheck(columnIndices, refcolumns);
225+
fileChecker.addCheck(existsCheck);
226+
assertEquals("ERROR PASSING "+relationFileName+" TEST",true, fileChecker.run(1));
227+
}
228+
14229
}
Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,31 @@
11
package ldbc.snb.datagen.test.csv;
22

3+
import java.util.ArrayList;
4+
import java.util.HashMap;
5+
import java.util.List;
6+
import java.util.Map;
7+
38
/**
49
* Created by aprat on 18/12/15.
510
*/
6-
public class Check {
11+
public abstract class Check {
12+
13+
protected String checkName = null;
14+
protected List<Integer> columns = null;
15+
16+
public String getCheckName() {
17+
return checkName;
18+
}
19+
20+
public Check(String name, List<Integer> columns) {
21+
this.checkName = name;
22+
this.columns = columns;
23+
}
24+
25+
public List<Integer> getColumns() {
26+
return columns;
27+
}
28+
29+
public abstract boolean check(List<String> values);
730
}
31+
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,30 @@
11
package ldbc.snb.datagen.test.csv;
22

3+
import java.io.File;
4+
import java.util.HashSet;
5+
import java.util.Set;
6+
37
/**
48
* Created by aprat on 18/12/15.
59
*/
610
public class Column {
11+
12+
public static Set<String> readColumnAsSet(String fileName, int index) {
13+
14+
Set<String> ret = new HashSet<String>();
15+
try {
16+
File file = new File(fileName);
17+
CsvFileReader csvReader = new CsvFileReader(file);
18+
while (csvReader.hasNext()) {
19+
String[] line = csvReader.next();
20+
ret.add(line[index]);
21+
}
22+
}catch(Exception e) {
23+
System.err.println("Error when reading file "+fileName);
24+
System.err.println(e.getMessage());
25+
}
26+
return ret;
27+
}
28+
29+
730
}
Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,32 @@
11
package ldbc.snb.datagen.test.csv;
22

3+
import java.util.List;
4+
import java.util.Set;
5+
36
/**
47
* Created by aprat on 21/12/15.
58
*/
6-
public class ExistsCheck {
9+
public class ExistsCheck extends Check {
10+
11+
protected List<Set<String>> refColumns = null;
12+
13+
public ExistsCheck(List<Integer> indexes, List<Set<String>> refColumns) {
14+
super("Exists Check", indexes);
15+
this.refColumns = refColumns;
16+
}
17+
18+
@Override
19+
public boolean check(List<String> values) {
20+
for(String val : values) {
21+
boolean found = false;
22+
for( Set<String> column : refColumns) {
23+
if(column.contains(val)) {
24+
found = true;
25+
break;
26+
}
27+
}
28+
if(!found) return false;
29+
}
30+
return true;
31+
}
732
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,56 @@
11
package ldbc.snb.datagen.test.csv;
22

3+
import java.io.File;
4+
import java.util.ArrayList;
5+
import java.util.List;
6+
37
/**
48
* Created by aprat on 18/12/15.
59
*/
610
public class FileChecker {
11+
12+
private String fileName = null;
13+
private List<Check> checks = null;
14+
15+
public FileChecker(String fileName) {
16+
this.fileName = fileName;
17+
this.checks = new ArrayList<Check>();
18+
}
19+
20+
public void addCheck( Check check) {
21+
checks.add(check);
22+
}
23+
24+
public boolean run(int startLine) {
25+
File file = new File(fileName);
26+
try {
27+
CsvFileReader csvReader = new CsvFileReader(file);
28+
int lineCount = 1;
29+
while(csvReader.hasNext()) {
30+
String[] line = csvReader.next();
31+
if(startLine <= lineCount-1) {
32+
for (Check c : checks) {
33+
List<String> row = new ArrayList<String>();
34+
for (Integer index : c.getColumns()) {
35+
row.add(line[index]);
36+
}
37+
if (!c.check(row)) {
38+
System.err.print("Found error at file " + fileName + " at line " + lineCount);
39+
System.err.print(" when applying " + c.getCheckName()+" on columns ");
40+
for(Integer index : c.getColumns()) {
41+
System.err.print(index+" ");
42+
}
43+
System.err.println();
44+
return false;
45+
}
46+
}
47+
}
48+
lineCount++;
49+
}
50+
}catch(Exception e) {
51+
System.err.println(e.getMessage());
52+
return false;
53+
}
54+
return true;
55+
}
756
}

0 commit comments

Comments
 (0)