@@ -60,12 +60,21 @@ default void sortUnique(Path path) throws IOException {
6060 List <String > sortedLines = new ArrayList <>((int )(Files .size (path ) / 100 ));
6161 Path tempOut = Files .createTempFile ("tempfile" , ".tmp" );
6262 String header ;
63+ boolean withHeader = hasHeader ();
6364 if (path .getFileName ().endsWith (".gz" )) {
6465 try (InputStream fileIn = Files .newInputStream (path );
6566 GZIPInputStream gzipIn = new GZIPInputStream (fileIn );
6667 Reader inReader = new InputStreamReader (gzipIn );
6768 BufferedReader reader = new BufferedReader (inReader )) {
68- header = readFile (reader , sortedLines , hasHeader ());
69+ if (testSortedAndUnique (reader , withHeader )) {
70+ return ;
71+ }
72+ }
73+ try (InputStream fileIn = Files .newInputStream (path );
74+ GZIPInputStream gzipIn = new GZIPInputStream (fileIn );
75+ Reader inReader = new InputStreamReader (gzipIn );
76+ BufferedReader reader = new BufferedReader (inReader )) {
77+ header = readFile (reader , sortedLines , withHeader );
6978 }
7079 try (OutputStream fileOut = Files .newOutputStream (tempOut );
7180 GZIPOutputStream gzipOut = new GZIPOutputStream (fileOut );
@@ -74,7 +83,12 @@ default void sortUnique(Path path) throws IOException {
7483 }
7584 } else {
7685 try (BufferedReader reader = Files .newBufferedReader (path )) {
77- header = readFile (reader , sortedLines , hasHeader ());
86+ if (testSortedAndUnique (reader , withHeader )) {
87+ return ;
88+ }
89+ }
90+ try (BufferedReader reader = Files .newBufferedReader (path )) {
91+ header = readFile (reader , sortedLines , withHeader );
7892 }
7993 try (BufferedWriter writer = Files .newBufferedWriter (tempOut )) {
8094 writeFile (writer , header , sortedLines );
@@ -92,9 +106,6 @@ static String readFile(BufferedReader reader, Collection<String> lines, boolean
92106 String line = reader .readLine ();
93107 String header ;
94108 if (withHeader ) {
95- if (line == null ) {
96- throw new IOException ("CSV file does not have header" );
97- }
98109 header = line ;
99110 line = reader .readLine ();
100111 } else {
@@ -125,4 +136,31 @@ static void writeFile(Writer writer, String header, List<String> lines) throws I
125136 previousLine = line ;
126137 }
127138 }
139+
140+ static boolean testSortedAndUnique (BufferedReader reader , boolean withHeader ) throws IOException {
141+ String line = reader .readLine ();
142+ if (withHeader ) {
143+ if (line == null ) {
144+ throw new IOException ("header expected but not found" );
145+ }
146+ line = reader .readLine ();
147+ }
148+
149+ // no lines -> sorted & unique
150+ if (line == null ) {
151+ return true ;
152+ }
153+
154+ String previousLine = line ;
155+ line = reader .readLine ();
156+
157+ while (line != null ) {
158+ if (line .compareTo (previousLine ) <= 0 ) {
159+ return false ;
160+ }
161+ previousLine = line ;
162+ line = reader .readLine ();
163+ }
164+ return true ;
165+ }
128166}
0 commit comments