Skip to content

Commit e726fe3

Browse files
committed
Merge branch 'bi' of github.com:ldbc/ldbc_snb_datagen into bi
2 parents c2560f3 + 6f17b84 commit e726fe3

File tree

11 files changed

+295
-51
lines changed

11 files changed

+295
-51
lines changed

src/test/java/ldbc/snb/datagen/test/LDBCDatagenTest.java

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public static void generateData() {
2626
try {
2727
Process p = pb.start();
2828
p.waitFor();
29-
29+
3030
}catch(Exception e) {
3131
System.err.println(e.getMessage());
3232
}
@@ -190,38 +190,43 @@ public void testIdUniqueness(String fileName, int column) {
190190
}
191191

192192
public void testPairUniquenessPlusExistance(String relationFileName, int columnA, int columnB, String entityFileNameA, int entityColumnA, String entityFileNameB, int entityColumnB) {
193-
Set<String> organisations = Column.readColumnAsSet(entityFileNameA,entityColumnA);
194-
Set<String> places = Column.readColumnAsSet(entityFileNameB,entityColumnB);
193+
LongParser parser = new LongParser();
194+
ColumnSet<Long> entitiesA = new ColumnSet<Long>(parser,new File(entityFileNameA),entityColumnA,1);
195+
entitiesA.initialize();
196+
ColumnSet<Long> entitiesB = new ColumnSet<Long>(parser,new File(entityFileNameB),entityColumnB,1);
197+
entitiesB.initialize();
195198
FileChecker fileChecker = new FileChecker(relationFileName);
196-
PairUniquenessCheck pairUniquenessCheck = new PairUniquenessCheck(0,1);
199+
PairUniquenessCheck pairUniquenessCheck = new PairUniquenessCheck<Long,Long>(parser,parser,columnA,columnB);
197200
fileChecker.addCheck(pairUniquenessCheck);
198-
List<Set<String>> entityARefColumns = new ArrayList<Set<String>>();
199-
entityARefColumns.add(organisations);
200-
List<Set<String>> entityBRefColumns = new ArrayList<Set<String>>();
201-
entityBRefColumns.add(places);
201+
List<ColumnSet<Long>> entityARefColumns = new ArrayList<ColumnSet<Long>>();
202+
entityARefColumns.add(entitiesA);
203+
List<ColumnSet<Long>> entityBRefColumns = new ArrayList<ColumnSet<Long>>();
204+
entityBRefColumns.add(entitiesB);
202205
List<Integer> organisationIndices = new ArrayList<Integer>();
203-
organisationIndices.add(0);
206+
organisationIndices.add(columnA);
204207
List<Integer> placeIndices = new ArrayList<Integer>();
205-
placeIndices.add(1);
206-
ExistsCheck existsEntityACheck = new ExistsCheck(organisationIndices, entityARefColumns);
207-
ExistsCheck existsEntityBCheck = new ExistsCheck(placeIndices, entityBRefColumns);
208+
placeIndices.add(columnB);
209+
ExistsCheck<Long> existsEntityACheck = new ExistsCheck<Long>(parser,organisationIndices, entityARefColumns);
210+
ExistsCheck<Long> existsEntityBCheck = new ExistsCheck<Long>(parser,placeIndices, entityBRefColumns);
208211
fileChecker.addCheck(existsEntityACheck);
209212
fileChecker.addCheck(existsEntityBCheck);
210213
assertEquals("ERROR PASSING ORGANISATION_ISLOCATEDIN_PLACE TEST",true, fileChecker.run(1));
211214

212215
}
213216

214217
public void testPairUniquenessPlusExistance(String relationFileName, int columnA, int columnB, String entityFileName, int entityColumn) {
215-
Set<String> persons = Column.readColumnAsSet(entityFileName,entityColumn);
218+
LongParser parser = new LongParser();
219+
ColumnSet<Long> entities = new ColumnSet<Long>(parser,new File(entityFileName),entityColumn,1);
220+
entities.initialize();
216221
FileChecker fileChecker = new FileChecker(relationFileName);
217-
PairUniquenessCheck pairUniquenessCheck = new PairUniquenessCheck(columnA,columnB);
222+
PairUniquenessCheck pairUniquenessCheck = new PairUniquenessCheck<Long,Long>(parser,parser,columnA,columnB);
218223
fileChecker.addCheck(pairUniquenessCheck);
219-
List<Set<String>> refcolumns = new ArrayList<Set<String>>();
220-
refcolumns.add(persons);
224+
List<ColumnSet<Long>> refcolumns = new ArrayList<ColumnSet<Long>>();
225+
refcolumns.add(entities);
221226
List<Integer> columnIndices = new ArrayList<Integer>();
222-
columnIndices.add(0);
223-
columnIndices.add(1);
224-
ExistsCheck existsCheck = new ExistsCheck(columnIndices, refcolumns);
227+
columnIndices.add(columnA);
228+
columnIndices.add(columnB);
229+
ExistsCheck existsCheck = new ExistsCheck<Long>(parser,columnIndices, refcolumns);
225230
fileChecker.addCheck(existsCheck);
226231
assertEquals("ERROR PASSING "+relationFileName+" TEST",true, fileChecker.run(1));
227232
}
Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,26 @@
11
package ldbc.snb.datagen.test.csv;
22

33
import java.io.File;
4-
import java.util.HashSet;
5-
import java.util.Set;
4+
import java.util.Iterator;
65

76
/**
87
* Created by aprat on 18/12/15.
98
*/
10-
public class Column {
9+
public abstract class Column <T> {
1110

12-
public static Set<String> readColumnAsSet(String fileName, int index) {
11+
protected Parser<T> parser = null;
12+
protected File file = null;
13+
protected int index = 0;
14+
protected int startIndex = 0;
1315

14-
Set<String> ret = new HashSet<String>();
15-
try {
16-
File file = new File(fileName);
17-
CsvFileReader csvReader = new CsvFileReader(file);
18-
while (csvReader.hasNext()) {
19-
String[] line = csvReader.next();
20-
ret.add(line[index]);
21-
}
22-
}catch(Exception e) {
23-
System.err.println("Error when reading file "+fileName);
24-
System.err.println(e.getMessage());
25-
}
26-
return ret;
16+
Column( Parser<T> parser, File file, int index, int startIndex ) {
17+
this.parser = parser;
18+
this.file = file;
19+
this.index = index;
20+
this.startIndex = startIndex;
2721
}
2822

2923

24+
public abstract Iterator<T> iterator();
25+
3026
}
Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,44 @@
11
package ldbc.snb.datagen.test.csv;
22

3+
import java.io.File;
4+
import java.util.HashSet;
5+
import java.util.Iterator;
6+
import java.util.Set;
7+
38
/**
49
* Created by aprat on 22/12/15.
510
*/
6-
public class ColumnSet {
11+
public class ColumnSet<T> extends Column<T> {
12+
13+
protected Set<T> data = null;
14+
15+
public ColumnSet(Parser<T> parser, File file, int index, int startIndex) {
16+
super(parser, file, index, startIndex);
17+
data = new HashSet<T>();
18+
try {
19+
CsvFileReader csvReader = new CsvFileReader(file);
20+
int count = 0;
21+
while(csvReader.hasNext()) {
22+
String[] line = csvReader.next();
23+
if(count >= startIndex ) {
24+
data.add(this.parser.parse(line[index]));
25+
}
26+
count++;
27+
}
28+
} catch(Exception e) {
29+
System.err.println("Error while reading file");
30+
System.err.println(e.getMessage());
31+
}
32+
}
33+
34+
public void initialize() {
35+
}
36+
37+
public boolean contains(T element) {
38+
return data.contains(element);
39+
}
40+
41+
public Iterator<T> iterator() {
42+
return data.iterator();
43+
}
744
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package ldbc.snb.datagen.test.csv;
2+
3+
import org.apache.commons.collections.iterators.CollatingIterator;
4+
5+
import java.io.File;
6+
import java.util.Iterator;
7+
8+
/**
9+
* Created by aprat on 22/12/15.
10+
*/
11+
public class ColumnStream<T> extends Column<T>{
12+
13+
protected File file = null;
14+
protected int index = 0;
15+
16+
public ColumnStream(Parser<T> parser, File file, int index, int startIndex) {
17+
super(parser, file, index, startIndex);
18+
}
19+
20+
public static class ColumnStreamIterator<T> implements Iterator<T> {
21+
22+
protected CsvFileReader reader = null;
23+
protected int index = 0;
24+
protected ColumnStream<T> columnStream = null;
25+
26+
public ColumnStreamIterator( ColumnStream<T> columnStream, CsvFileReader reader, int index){
27+
this.reader = reader;
28+
this.index = index;
29+
this.columnStream = columnStream;
30+
}
31+
32+
public void advance(int startIndex) {
33+
for(int i = 0; i < startIndex; ++i) {
34+
next();
35+
}
36+
}
37+
38+
public boolean hasNext() {
39+
return reader.hasNext();
40+
}
41+
42+
public T next() {
43+
String [] line = reader.next();
44+
return columnStream.parser.parse(line[index]);
45+
}
46+
47+
public void remove() {
48+
49+
}
50+
}
51+
52+
public Iterator<T> iterator( ) {
53+
try {
54+
CsvFileReader reader = new CsvFileReader(file);
55+
ColumnStreamIterator<T> iter = new ColumnStreamIterator<T>(this,reader,index);
56+
iter.advance(startIndex);
57+
return iter;
58+
} catch(Exception e) {
59+
System.err.println("Error opening csv reader");
60+
}
61+
return null;
62+
}
63+
}

src/test/java/ldbc/snb/datagen/test/csv/ExistsCheck.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,23 @@
66
/**
77
* Created by aprat on 21/12/15.
88
*/
9-
public class ExistsCheck extends Check {
9+
public class ExistsCheck<T> extends Check {
1010

11-
protected List<Set<String>> refColumns = null;
11+
protected List<ColumnSet<T>> refColumns = null;
12+
protected Parser<T> parser = null;
1213

13-
public ExistsCheck(List<Integer> indexes, List<Set<String>> refColumns) {
14+
public ExistsCheck(Parser<T> parser, List<Integer> indexes, List<ColumnSet<T>> refColumns) {
1415
super("Exists Check", indexes);
1516
this.refColumns = refColumns;
17+
this.parser = parser;
1618
}
1719

1820
@Override
1921
public boolean check(List<String> values) {
2022
for(String val : values) {
2123
boolean found = false;
22-
for( Set<String> column : refColumns) {
23-
if(column.contains(val)) {
24+
for( ColumnSet<T> column : refColumns) {
25+
if(column.contains(parser.parse(val))) {
2426
found = true;
2527
break;
2628
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package ldbc.snb.datagen.test.csv;
2+
3+
/**
4+
* Created by aprat on 23/12/15.
5+
*/
6+
public class LongCheck extends NumericCheck<Long> {
7+
8+
public LongCheck(Parser<Long> parser, String name, Integer column, NumericCheckType type, Long val1, Long val2) {
9+
super(parser, name, column, type, val1, val2);
10+
}
11+
12+
@Override
13+
public boolean greater(Long val1, Long val2) {
14+
return val1 > val2;
15+
}
16+
17+
@Override
18+
public boolean greaterEqual(Long val1, Long val2) {
19+
return val1 >= val2;
20+
}
21+
22+
@Override
23+
public boolean less(Long val1, Long val2) {
24+
return val1 < val2;
25+
}
26+
27+
@Override
28+
public boolean lessEqual(Long val1, Long val2) {
29+
return val1 <= val2;
30+
}
31+
32+
@Override
33+
public boolean equals(Long val1, Long val2) {
34+
return val1 == val2;
35+
}
36+
37+
@Override
38+
public boolean nonEquals(Long val1, Long val2) {
39+
return val1 != val2;
40+
}
41+
42+
@Override
43+
public boolean between(Long val1, Long val2, Long val3) {
44+
return (val1 >= val2 && val1 < val3);
45+
}
46+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package ldbc.snb.datagen.test.csv;
2+
3+
/**
4+
* Created by aprat on 23/12/15.
5+
*/
6+
public class LongParser extends Parser<Long> {
7+
8+
@Override
9+
public Long parse(String s) {
10+
return Long.parseLong(s);
11+
}
12+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package ldbc.snb.datagen.test.csv;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
6+
/**
7+
* Created by aprat on 23/12/15.
8+
*/
9+
public abstract class NumericCheck<T extends Number > extends Check {
10+
11+
enum NumericCheckType {
12+
G,
13+
GE,
14+
L,
15+
LE,
16+
E,
17+
NE,
18+
BETWEEN
19+
}
20+
21+
protected NumericCheckType type;
22+
protected T val1;
23+
protected T val2;
24+
protected Parser<T> parser;
25+
26+
public NumericCheck(Parser<T> parser, String name, Integer column, NumericCheckType type, T val1, T val2 ) {
27+
super(name, new ArrayList<Integer>());
28+
this.getColumns().add(column);
29+
this.type = type;
30+
this.val1 = val1;
31+
this.val2 = val2;
32+
this.parser = parser;
33+
}
34+
35+
@Override
36+
public boolean check(List<String> values) {
37+
switch(type) {
38+
case G:
39+
return greater(parser.parse(values.get(0)),val1);
40+
case GE:
41+
return greaterEqual(parser.parse(values.get(0)),val1);
42+
case L:
43+
return less(parser.parse(values.get(0)),val1);
44+
case LE:
45+
return lessEqual(parser.parse(values.get(0)),val1);
46+
case E:
47+
return equals(parser.parse(values.get(0)),val1);
48+
case NE:
49+
return nonEquals(parser.parse(values.get(0)),val1);
50+
case BETWEEN:
51+
return between(parser.parse(values.get(0)),val1, val2);
52+
default:
53+
return false;
54+
}
55+
}
56+
57+
public abstract boolean greater(T val1, T val2);
58+
59+
public abstract boolean greaterEqual(T val1, T val2);
60+
61+
public abstract boolean less(T val1, T val2);
62+
63+
public abstract boolean lessEqual(T val1, T val2);
64+
65+
public abstract boolean equals(T val1, T val2);
66+
67+
public abstract boolean nonEquals(T val1, T val2);
68+
69+
public abstract boolean between(T val1, T val2, T val3);
70+
71+
}

0 commit comments

Comments
 (0)