Skip to content

Commit 8b5b0dd

Browse files
committed
If file is already sorted and unique, do not try to sort/uniq
1 parent 4fbb269 commit 8b5b0dd

File tree

1 file changed

+43
-5
lines changed

1 file changed

+43
-5
lines changed

src/main/java/org/radarcns/util/RecordConverterFactory.java

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,21 @@ default void sortUnique(Path path) throws IOException {
6060
List<String> sortedLines = new ArrayList<>((int)(Files.size(path) / 100));
6161
Path tempOut = Files.createTempFile("tempfile", ".tmp");
6262
String header;
63+
boolean withHeader = hasHeader();
6364
if (path.getFileName().endsWith(".gz")) {
6465
try (InputStream fileIn = Files.newInputStream(path);
6566
GZIPInputStream gzipIn = new GZIPInputStream(fileIn);
6667
Reader inReader = new InputStreamReader(gzipIn);
6768
BufferedReader reader = new BufferedReader(inReader)) {
68-
header = readFile(reader, sortedLines, hasHeader());
69+
if (testSortedAndUnique(reader, withHeader)) {
70+
return;
71+
}
72+
}
73+
try (InputStream fileIn = Files.newInputStream(path);
74+
GZIPInputStream gzipIn = new GZIPInputStream(fileIn);
75+
Reader inReader = new InputStreamReader(gzipIn);
76+
BufferedReader reader = new BufferedReader(inReader)) {
77+
header = readFile(reader, sortedLines, withHeader);
6978
}
7079
try (OutputStream fileOut = Files.newOutputStream(tempOut);
7180
GZIPOutputStream gzipOut = new GZIPOutputStream(fileOut);
@@ -74,7 +83,12 @@ default void sortUnique(Path path) throws IOException {
7483
}
7584
} else {
7685
try (BufferedReader reader = Files.newBufferedReader(path)) {
77-
header = readFile(reader, sortedLines, hasHeader());
86+
if (testSortedAndUnique(reader, withHeader)) {
87+
return;
88+
}
89+
}
90+
try (BufferedReader reader = Files.newBufferedReader(path)) {
91+
header = readFile(reader, sortedLines, withHeader);
7892
}
7993
try (BufferedWriter writer = Files.newBufferedWriter(tempOut)) {
8094
writeFile(writer, header, sortedLines);
@@ -92,9 +106,6 @@ static String readFile(BufferedReader reader, Collection<String> lines, boolean
92106
String line = reader.readLine();
93107
String header;
94108
if (withHeader) {
95-
if (line == null) {
96-
throw new IOException("CSV file does not have header");
97-
}
98109
header = line;
99110
line = reader.readLine();
100111
} else {
@@ -125,4 +136,31 @@ static void writeFile(Writer writer, String header, List<String> lines) throws I
125136
previousLine = line;
126137
}
127138
}
139+
140+
static boolean testSortedAndUnique(BufferedReader reader, boolean withHeader) throws IOException {
141+
String line = reader.readLine();
142+
if (withHeader) {
143+
if (line == null) {
144+
throw new IOException("header expected but not found");
145+
}
146+
line = reader.readLine();
147+
}
148+
149+
// no lines -> sorted & unique
150+
if (line == null) {
151+
return true;
152+
}
153+
154+
String previousLine = line;
155+
line = reader.readLine();
156+
157+
while (line != null) {
158+
if (line.compareTo(previousLine) <= 0) {
159+
return false;
160+
}
161+
previousLine = line;
162+
line = reader.readLine();
163+
}
164+
return true;
165+
}
128166
}

0 commit comments

Comments
 (0)