Skip to content

Commit 7fbfa0d

Browse files
author
Maxim Moinat
committed
bins the number of records per topic and device for quality check
1 parent cea4c16 commit 7fbfa0d

File tree

2 files changed

+128
-28
lines changed

2 files changed

+128
-28
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package org.radarcns;
2+
3+
import org.apache.commons.collections.MapIterator;
4+
import org.apache.commons.collections.keyvalue.MultiKey;
5+
import org.apache.commons.collections.map.MultiKeyMap;
6+
7+
import java.io.*;
8+
9+
10+
public class Frequency {
11+
12+
private MultiKeyMap bins = new MultiKeyMap();
13+
private String binFilePath;
14+
15+
public void setBinFilePath(String binFilePath) {
16+
this.binFilePath = binFilePath;
17+
}
18+
19+
public MultiKeyMap getBins() {
20+
return bins;
21+
}
22+
23+
public void addToBin(String topicName, String id, String timestamp, Integer countToAdd) {
24+
Integer count = (Integer) bins.get(topicName, id, timestamp);
25+
if (count == null) {
26+
bins.put(topicName, id, timestamp, countToAdd);
27+
} else {
28+
bins.put(topicName, id, timestamp, count + countToAdd);
29+
}
30+
}
31+
32+
public void addToBin(String topicName, String id, Double time, Integer countToAdd) {
33+
// Hour resolution
34+
String hourlyTimestamp = restructureAvroRecords.createHourTimestamp(time);
35+
36+
addToBin(topicName, id, hourlyTimestamp, countToAdd);
37+
}
38+
39+
public void addToBin(String topicName, String id, Double time) {
40+
addToBin(topicName, id, time, 1);
41+
}
42+
43+
public void printBins() {
44+
MapIterator mapIterator = bins.mapIterator();
45+
46+
while (mapIterator.hasNext()) {
47+
MultiKey key = (MultiKey) mapIterator.next();
48+
Integer value = (Integer) mapIterator.getValue();
49+
System.out.printf("%s|%s|%s - %d\n", key.getKey(0), key.getKey(1), key.getKey(2), value);
50+
}
51+
}
52+
53+
public void writeBins() {
54+
// Read bins from file and add to current bins
55+
// Creates new bins if not existing yet
56+
addBinsFromFile();
57+
58+
// Write all bins to csv
59+
MapIterator mapIterator = bins.mapIterator();
60+
try(FileWriter fw = new FileWriter(binFilePath, false);
61+
BufferedWriter bw = new BufferedWriter(fw);
62+
PrintWriter out = new PrintWriter(bw))
63+
{
64+
String header = String.join(",","topic","device","timestamp","count");
65+
out.println(header);
66+
67+
while (mapIterator.hasNext()) {
68+
MultiKey key = (MultiKey) mapIterator.next();
69+
Integer value = (Integer) mapIterator.getValue();
70+
String data = String.join(",", key.getKey(0).toString(), key.getKey(1).toString(), key.getKey(2).toString(), value.toString());
71+
out.println(data);
72+
}
73+
} catch (IOException e) {
74+
// TODO: exception handling
75+
e.printStackTrace();
76+
}
77+
78+
// Reset the map
79+
bins = new MultiKeyMap();
80+
}
81+
82+
private void addBinsFromFile() {
83+
try (FileReader fr = new FileReader(binFilePath);
84+
BufferedReader br = new BufferedReader(fr))
85+
{
86+
// Read in all lines as multikeymap (key, key, key, value)
87+
String line;
88+
br.readLine(); // Skip header
89+
while ( (line = br.readLine()) != null ) {
90+
String[] columns = line.split(",");
91+
this.addToBin(columns[0], columns[1], columns[2], Integer.valueOf(columns[3]));
92+
}
93+
} catch (IOException e) {
94+
System.out.println("Could not read the file with bins. Creating new file when writing.");
95+
} catch (ArrayIndexOutOfBoundsException e) {
96+
System.out.println("Unable to parse the contents of the bins file. Skipping reading.");
97+
}
98+
}
99+
}

src/main/java/org/radarcns/restructureAvroRecords.java

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,37 @@ public class restructureAvroRecords {
2020

2121
private final String OUTPUT_FILE_EXTENSION = "json";
2222
private final String OFFSETS_FILE_NAME = "offsets.csv";
23+
private final String BINS_FILE_NAME = "bins.csv";
2324

2425
private String outputPath = ".";
2526
private String offsetsPath = outputPath + "/" + OFFSETS_FILE_NAME;
2627
private Set<String> seenFiles = new HashSet<>();
28+
private Frequency bins = new Frequency();
2729

2830
private Configuration conf = new Configuration();
29-
private final SimpleDateFormat dateFormatFileName = new SimpleDateFormat("yyyyMMdd_HH");
31+
private final static SimpleDateFormat dateFormatFileName = new SimpleDateFormat("yyyyMMdd_HH");
3032

3133
private int processedFileCount;
3234

3335
public static void main(String [] args) throws Exception {
3436

35-
// restructureAvroRecords restr = new restructureAvroRecords(args[0], args[2]);
36-
// restr.start(args[1]);
37+
restructureAvroRecords restr = new restructureAvroRecords(args[0], args[2]);
38+
long time1 = System.currentTimeMillis();
39+
restr.start(args[1]);
40+
System.out.printf("Time taken: %.2f seconds\n",(System.currentTimeMillis() - time1)/1000d);
3741

38-
restructureAvroRecords restr = new restructureAvroRecords("webhdfs://radar-test.thehyve.net:50070", "output3/");
39-
restr.start("/topicAndroidPhoneNew/");
4042

41-
// restructureAvroRecords.processTopic("/topicE4/android_empatica_e4_inter_beat_interval/partition=0/");
42-
// restructureAvroRecords.processAvroFile(new Path("/topicE4/android_empatica_e4_inter_beat_interval/partition=0/android_empatica_e4_inter_beat_interval+0+0000031485+0000031488.avro") );
43-
// restructureAvroRecords.processAvroFile(new Path("/testE4Time/android_phone_acceleration/partition=0/android_phone_acceleration+0+0000590000+0000599999.avro"),"wazaa" );
43+
// restructureAvroRecords restr = new restructureAvroRecords("webhdfs://radar-test.thehyve.net:50070", "output4/");
44+
// restr.start("/topicE4/");
45+
46+
// restr.processTopic(new Path("/topicE4/android_empatica_e4_temperature/"));
47+
// restr.processAvroFile(new Path("/testE4Time/android_phone_acceleration/partition=0/android_phone_acceleration+0+0000590000+0000599999.avro"),"wazaa" );
4448
}
4549

4650
public restructureAvroRecords(String inputPath, String outputPath) {
4751
this.setInputWebHdfsURL(inputPath);
4852
this.setOutputPath(outputPath);
53+
bins.setBinFilePath(outputPath + "/" + BINS_FILE_NAME);
4954
}
5055

5156
public void setInputWebHdfsURL(String fileSystemURL) {
@@ -81,10 +86,11 @@ public void start(String directoryName) throws IOException {
8186
processTopic(filePath);
8287
}
8388
}
89+
8490
System.out.printf("%d files processed\n", processedFileCount);
8591
}
8692

87-
public void processTopic(Path topicPath) throws IOException {
93+
private void processTopic(Path topicPath) throws IOException {
8894
// Get files in this topic directory
8995
FileSystem fs = FileSystem.get(conf);
9096
RemoteIterator<LocatedFileStatus> files = fs.listFiles(topicPath, true); // TODO: all partitions or just 'partition=0'?
@@ -94,15 +100,12 @@ public void processTopic(Path topicPath) throws IOException {
94100
while (files.hasNext()) {
95101
LocatedFileStatus locatedFileStatus = files.next();
96102

97-
System.out.println(locatedFileStatus.getPath());
98-
99103
if (locatedFileStatus.isFile())
100104
this.processAvroFile( locatedFileStatus.getPath(), topicName );
101-
102105
}
103106
}
104107

105-
public void processAvroFile(Path filePath, String topicName) throws IOException {
108+
private void processAvroFile(Path filePath, String topicName) throws IOException {
106109
String fileName = filePath.getName();
107110

108111
// Skip if extension is not .avro
@@ -116,9 +119,10 @@ public void processAvroFile(Path filePath, String topicName) throws IOException
116119
return;
117120
}
118121

122+
System.out.println(filePath);
123+
// Read and parse avro file
119124
FsInput input = new FsInput(filePath, conf);
120125
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
121-
122126
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
123127

124128
GenericRecord record = null;
@@ -130,15 +134,17 @@ record = dataFileReader.next(record);
130134
}
131135

132136
this.writeSeenOffsets(fileName);
137+
bins.writeBins();
133138
processedFileCount++;
134139
}
135140

136-
public void writeRecord(GenericRecord record, String topicName) throws IOException {
141+
private void writeRecord(GenericRecord record, String topicName) throws IOException {
137142
GenericRecord keyField = (GenericRecord) record.get("keyField");
138143
GenericRecord valueField = (GenericRecord) record.get("valueField");
139144

140145
// Make a timestamped filename YYYYMMDD_HH00.json
141-
String outputFileName = this.createFilePathFromTimestamp( (Double) valueField.get("time"));
146+
String hourlyTimestamp = createHourTimestamp( (Double) valueField.get("time"));
147+
String outputFileName = hourlyTimestamp + "00." + OUTPUT_FILE_EXTENSION;
142148

143149
// Clean user id and create final output pathname
144150
String userId = keyField.get("userId").toString().replaceAll("\\W+", "");
@@ -147,21 +153,18 @@ public void writeRecord(GenericRecord record, String topicName) throws IOExcepti
147153
// Write data
148154
String data = record.toString(); // TODO: check whether this indeed always creates valid JSON
149155
this.appendToFile(dirName, outputFileName, data);
150-
}
151156

152-
public String createFilePathFromTimestamp(Double time) {
153-
// Send all output to the Appendable object sb
154-
StringBuilder sb = new StringBuilder();
155-
Formatter formatter = new Formatter(sb, Locale.US);
157+
// Count data
158+
bins.addToBin(topicName, keyField.get("sourceId").toString(), (Double) valueField.get("time"));
159+
}
156160

157-
// In millis
161+
public static String createHourTimestamp(Double time) {
162+
// Convert from millis to date and apply dateFormat
158163
Date date = new Date( time.longValue() * 1000 );
159-
160-
formatter.format("%s00.%s", dateFormatFileName.format(date), OUTPUT_FILE_EXTENSION);
161-
return sb.toString();
164+
return dateFormatFileName.format(date);
162165
}
163166

164-
public void appendToFile(String directoryName, String fileName, String data) {
167+
private void appendToFile(String directoryName, String fileName, String data) {
165168
File directory = new File(directoryName);
166169
if (! directory.exists()){
167170
if (directory.mkdirs())
@@ -216,7 +219,6 @@ private void writeSeenOffsets(String fileName) {
216219
}
217220

218221
private void readSeenOffsets() {
219-
220222
try (FileReader fr = new FileReader(offsetsPath);
221223
BufferedReader br = new BufferedReader(fr))
222224
{
@@ -231,6 +233,5 @@ private void readSeenOffsets() {
231233
// TODO
232234
e.printStackTrace();
233235
}
234-
235236
}
236237
}

0 commit comments

Comments
 (0)