-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathGKProgram.java
More file actions
98 lines (84 loc) · 3.68 KB
/
GKProgram.java
File metadata and controls
98 lines (84 loc) · 3.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.*;
public class GKProgram {
static double[] arr;
static List<Double> data;
public static void main(String[] args) {
// NOTE: mergeability test not implemented here --- no merge op. in our implementation of GK
if (args.length < 4 || args.length > 5) {
System.out.println("Usage: java GKProgram <dataset_file> <query_file> <k> <output_file>");
return;
}
String datasetFile = args[0];
String queryFile = args[1];
int k = Integer.parseInt(args[2]);
String outputFile = args[3];
try {
////////////// load data and queries /////////////////
data = new ArrayList<>();
// Read the dataset file and add numbers to the SplineSketch
try (BufferedReader datasetReader = new BufferedReader(new FileReader(datasetFile))) {
String line;
while ((line = datasetReader.readLine()) != null) {
double value = Double.parseDouble(line);
data.add(value);
}
}
int n = data.size();
//arr = new double[data.size()];
//for (int i = 0; i < data.size(); i++) {
// arr[i] = data.get(i);
//}
// Read the query file and query the SplineSketch using the cdf method
List<Double> queries = new ArrayList<>();
try (BufferedReader br = new BufferedReader(new FileReader(queryFile))) {
String line;
while ((line = br.readLine()) != null) {
try {
double value = Double.parseDouble(line);
queries.add(value);
} catch (NumberFormatException e) {
System.err.println("Skipping invalid float value: " + line);
}
}
} catch (IOException e) {
e.printStackTrace();
}
////////////// measure time from here /////////////////
long startTime = System.nanoTime();
// Create GKSketch with the given compression parameter
GK gk = new GK(3.0/(2*k)); // FIXME: which epsilon to choose?
for (int i = 0; i < data.size(); i++) {
gk.feed(data.get(i));
}
gk.finalize();
long afterUpdatesTime = System.nanoTime();
List<Long> results = new ArrayList<>();
for (int i = 0; i < queries.size(); i++) {
results.add((gk.query_for_rank(queries.get(i))));
}
long afterQueriesTime = System.nanoTime();
int tuples = gk.num_entries_stored();
// Print the size of the serialized sketch in bytes
System.out.printf("%d%n", tuples * 24); // three 64-bit numbers per tuple in GK
System.out.printf("%d%n", afterUpdatesTime - startTime);
System.out.printf("%d%n", afterQueriesTime - afterUpdatesTime);
try (PrintWriter outputWriter = new PrintWriter(new FileWriter(outputFile))) {
if (results != null) {
for (int i = 0; i < results.size(); i++) {
outputWriter.printf("%d%n", results.get(i));
}
}
} catch (IOException e) {
e.printStackTrace();
}
} catch (Exception e) {
System.err.printf("GKSketchProgram exception %n" + e.getMessage());
e.printStackTrace();
}
}
}