forked from InseeFr/Trevas
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCSVDataset.java
More file actions
90 lines (79 loc) · 2.98 KB
/
CSVDataset.java
File metadata and controls
90 lines (79 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package fr.insee.vtl.csv;
import fr.insee.vtl.model.Dataset;
import org.supercsv.cellprocessor.Optional;
import org.supercsv.cellprocessor.ParseBool;
import org.supercsv.cellprocessor.ParseDouble;
import org.supercsv.cellprocessor.ParseLong;
import org.supercsv.cellprocessor.ift.CellProcessor;
import org.supercsv.io.CsvMapReader;
import org.supercsv.prefs.CsvPreference;
import java.io.IOException;
import java.io.Reader;
import java.time.Instant;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class CSVDataset implements Dataset {
private final DataStructure structure;
private final CsvMapReader csvReader;
private ArrayList<DataPoint> data;
public CSVDataset(DataStructure structure, Reader csv) throws IOException {
this.structure = structure;
this.csvReader = new CsvMapReader(csv, CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE);
var columns = this.csvReader.getHeader(true);
if (!this.structure.keySet().containsAll(List.of(columns))) {
throw new RuntimeException("missing columns in CSV");
}
}
private CellProcessor[] getProcessors() {
List<CellProcessor> processors = new ArrayList<>();
for (String name : this.getColumnNames()) {
// Find a valid processor for each type.
processors.add(getProcessor(this.structure.get(name).getType()));
}
return processors.toArray(new CellProcessor[]{});
}
private CellProcessor getProcessor(Class<?> type) {
if (String.class.equals(type)) {
return new Optional();
} else if (Long.class.equals(type)) {
return new ParseLong();
} else if (Double.class.equals(type)) {
return new ParseDouble();
} else if (Boolean.class.equals(type)) {
return new ParseBool();
} else if (Instant.class.equals(type)) {
throw new RuntimeException("TODO");
} else if (LocalDate.class.equals(type)) {
throw new RuntimeException("TODO");
} else {
throw new UnsupportedOperationException("unsupported type " + type);
}
}
private String[] getNameMapping() {
return this.getColumnNames().toArray(new String[]{});
}
@Override
public List<DataPoint> getDataPoints() {
if (this.data == null) {
this.data = new ArrayList<>();
try {
var header = getNameMapping();
var processors = getProcessors();
Map<String, Object> datum;
while ((datum = this.csvReader.read(header, processors)) != null) {
this.data.add(new DataPoint(this.structure, datum));
}
} catch (IOException e) {
// TODO: Improve.
throw new RuntimeException(e);
}
}
return data;
}
@Override
public DataStructure getDataStructure() {
return this.structure;
}
}