Sort members

garydgregory · garydgregory · commit 76981db68af5 · 2025-01-02T15:14:13.000-05:00
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -181,6 +181,18 @@ public Builder setCharacterOffset(final long characterOffset) {
             return asThis();
         }
 
+        /**
+         * Sets whether to enable byte tracking for the parser.
+         *
+         * @param enableByteTracking {@code true} to enable byte tracking; {@code false} to disable it.
+         * @return this instance.
+         * @since 1.13.0
+         */
+        public Builder setEnableByteTracking(final boolean enableByteTracking) {
+            this.enableByteTracking = enableByteTracking;
+            return asThis();
+        }
+
         /**
          * Sets the CSV format. A copy of the given format is kept.
          *
@@ -203,18 +215,6 @@ public Builder setRecordNumber(final long recordNumber) {
             return asThis();
         }
 
-        /**
-         * Sets whether to enable byte tracking for the parser.
-         *
-         * @param enableByteTracking {@code true} to enable byte tracking; {@code false} to disable it.
-         * @return this instance.
-         * @since 1.13.0
-         */
-        public Builder setEnableByteTracking(final boolean enableByteTracking) {
-            this.enableByteTracking = enableByteTracking;
-            return asThis();
-        }
-
     }
 
     final class CSVRecordIterator implements Iterator<CSVRecord> {
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -141,16 +141,6 @@ public String get(final String name) {
         }
     }
 
-    /**
-     * Returns the start position of this record as a character position in the source stream. This may or may not
-     * correspond to the byte position depending on the character set.
-     *
-     * @return the position of this record in the source stream.
-     */
-    public long getCharacterPosition() {
-        return characterPosition;
-    }
-
     /**
      * Returns the starting position of this record in the source stream, measured in bytes.
      *
@@ -161,6 +151,16 @@ public long getBytePosition() {
         return bytePosition;
     }
 
+    /**
+     * Returns the start position of this record as a character position in the source stream. This may or may not
+     * correspond to the byte position depending on the character set.
+     *
+     * @return the position of this record in the source stream.
+     */
+    public long getCharacterPosition() {
+        return characterPosition;
+    }
+
     /**
      * Returns the comment for this record, if any.
      * Note that comments are attached to the following record.
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -98,6 +98,60 @@ public void close() throws IOException {
         super.close();
     }
 
+    /**
+     * Gets the number of bytes read by the reader.
+     *
+     * @return the number of bytes read by the read
+     */
+    long getBytesRead() {
+        return this.bytesRead;
+    }
+
+    /**
+     * Gets the byte length of the given character based on the the original Unicode
+     * specification, which defined characters as fixed-width 16-bit entities.
+     * <p>
+     * The Unicode characters are divided into two main ranges:
+     * <ul>
+     *   <li><b>U+0000 to U+FFFF (Basic Multilingual Plane, BMP):</b>
+     *     <ul>
+     *       <li>Represented using a single 16-bit {@code char}.</li>
+     *       <li>Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.</li>
+     *     </ul>
+     *   </li>
+     *   <li><b>U+10000 to U+10FFFF (Supplementary Characters):</b>
+     *     <ul>
+     *       <li>Represented as a pair of {@code char}s:</li>
+     *       <li>The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).</li>
+     *       <li>The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).</li>
+     *       <li>Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.</li>
+     *     </ul>
+     *   </li>
+     * </ul>
+     *
+     * @param current the current character to process.
+     * @return the byte length of the character.
+     * @throws CharacterCodingException if the character cannot be encoded.
+     */
+    private int getEncodedCharLength(int current) throws CharacterCodingException {
+        final char cChar = (char) current;
+        final char lChar = (char) lastChar;
+        if (!Character.isSurrogate(cChar)) {
+            return encoder.encode(
+                CharBuffer.wrap(new char[] {cChar})).limit();
+        } else {
+            if (Character.isHighSurrogate(cChar)) {
+                // Move on to the next char (low surrogate)
+                return 0;
+            } else if (Character.isSurrogatePair(lChar, cChar)) {
+                return encoder.encode(
+                    CharBuffer.wrap(new char[] {lChar, cChar})).limit();
+            } else {
+                throw new CharacterCodingException();
+            }
+        }
+    }
+
     /**
      * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
      * any of the read methods. This will not include a character read using the {@link #peek()} method. If no
@@ -156,51 +210,6 @@ public int read() throws IOException {
         return lastChar;
     }
 
-    /**
-     * Gets the byte length of the given character based on the the original Unicode
-     * specification, which defined characters as fixed-width 16-bit entities.
-     * <p>
-     * The Unicode characters are divided into two main ranges:
-     * <ul>
-     *   <li><b>U+0000 to U+FFFF (Basic Multilingual Plane, BMP):</b>
-     *     <ul>
-     *       <li>Represented using a single 16-bit {@code char}.</li>
-     *       <li>Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.</li>
-     *     </ul>
-     *   </li>
-     *   <li><b>U+10000 to U+10FFFF (Supplementary Characters):</b>
-     *     <ul>
-     *       <li>Represented as a pair of {@code char}s:</li>
-     *       <li>The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).</li>
-     *       <li>The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).</li>
-     *       <li>Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.</li>
-     *     </ul>
-     *   </li>
-     * </ul>
-     *
-     * @param current the current character to process.
-     * @return the byte length of the character.
-     * @throws CharacterCodingException if the character cannot be encoded.
-     */
-    private int getEncodedCharLength(int current) throws CharacterCodingException {
-        final char cChar = (char) current;
-        final char lChar = (char) lastChar;
-        if (!Character.isSurrogate(cChar)) {
-            return encoder.encode(
-                CharBuffer.wrap(new char[] {cChar})).limit();
-        } else {
-            if (Character.isHighSurrogate(cChar)) {
-                // Move on to the next char (low surrogate)
-                return 0;
-            } else if (Character.isSurrogatePair(lChar, cChar)) {
-                return encoder.encode(
-                    CharBuffer.wrap(new char[] {lChar, cChar})).limit();
-            } else {
-                throw new CharacterCodingException();
-            }
-        }
-    }
-
     @Override
     public int read(final char[] buf, final int offset, final int length) throws IOException {
         if (length == 0) {
@@ -269,13 +278,4 @@ public void reset() throws IOException {
         super.reset();
     }
 
-    /**
-     * Gets the number of bytes read by the reader.
-     *
-     * @return the number of bytes read by the read
-     */
-    long getBytesRead() {
-        return this.bytesRead;
-    }
-
 }
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -97,21 +97,21 @@ public void close() throws IOException {
     }
 
     /**
-     * Returns the current character position
+     * Gets the number of bytes read
      *
-     * @return the current character position
+     * @return the number of bytes read
      */
-    long getCharacterPosition() {
-        return reader.getPosition();
+    long getBytesRead() {
+        return reader.getBytesRead();
     }
 
     /**
-     * Gets the number of bytes read
+     * Returns the current character position
      *
-     * @return the number of bytes read
+     * @return the current character position
      */
-    long getBytesRead() {
-        return reader.getBytesRead();
+    long getCharacterPosition() {
+        return reader.getPosition();
     }
 
     /**
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -703,76 +703,6 @@ public void testGetHeaderComment_NoComment3() throws IOException {
         }
     }
 
-    @Test
-    public void testGetRecordThreeBytesRead() throws Exception {
-        final String code = "id,date,val5,val4\n" +
-            "11111111111111,'4017-09-01',きちんと節分近くには咲いてる～,v4\n" +
-            "22222222222222,'4017-01-01',おはよう私の友人～,v4\n" +
-            "33333333333333,'4017-01-01',きる自然の力ってすごいな～,v4\n";
-        final CSVFormat format = CSVFormat.Builder.create()
-            .setDelimiter(',')
-            .setQuote('\'')
-            .get();
-        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get() ) {
-            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
-
-            assertEquals(0, parser.getRecordNumber());
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(1, record.getRecordNumber());
-            assertEquals(code.indexOf('i'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), record.getCharacterPosition());
-
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(2, record.getRecordNumber());
-            assertEquals(code.indexOf('1'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), record.getCharacterPosition());
-
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(3, record.getRecordNumber());
-            assertEquals(code.indexOf('2'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 95);
-
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(4, record.getRecordNumber());
-            assertEquals(code.indexOf('3'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 154);
-        }
-    }
-
-    @Test
-    public void testGetRecordFourBytesRead() throws Exception {
-        final String code = "id,a,b,c\n" +
-            "1,😊,🤔,😂\n" +
-            "2,😊,🤔,😂\n" +
-            "3,😊,🤔,😂\n";
-        final CSVFormat format = CSVFormat.Builder.create()
-            .setDelimiter(',')
-            .setQuote('\'')
-            .get();
-        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get()) {
-            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
-
-            assertEquals(0, parser.getRecordNumber());
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(1, record.getRecordNumber());
-            assertEquals(code.indexOf('i'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), record.getCharacterPosition());
-
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(2, record.getRecordNumber());
-            assertEquals(code.indexOf('1'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), record.getCharacterPosition());
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(3, record.getRecordNumber());
-            assertEquals(code.indexOf('2'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 26);
-            assertNotNull(record = parser.nextRecord());
-            assertEquals(4, record.getRecordNumber());
-            assertEquals(code.indexOf('3'), record.getCharacterPosition());
-            assertEquals(record.getBytePosition(), 43);
-        }
-    }
-
     @Test
     public void testGetHeaderMap() throws Exception {
         try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
@@ -878,6 +808,40 @@ public void testGetOneLineOneParser() throws IOException {
         }
     }
 
+    @Test
+    public void testGetRecordFourBytesRead() throws Exception {
+        final String code = "id,a,b,c\n" +
+            "1,😊,🤔,😂\n" +
+            "2,😊,🤔,😂\n" +
+            "3,😊,🤔,😂\n";
+        final CSVFormat format = CSVFormat.Builder.create()
+            .setDelimiter(',')
+            .setQuote('\'')
+            .get();
+        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get()) {
+            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
+
+            assertEquals(0, parser.getRecordNumber());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(1, record.getRecordNumber());
+            assertEquals(code.indexOf('i'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), record.getCharacterPosition());
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(2, record.getRecordNumber());
+            assertEquals(code.indexOf('1'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), record.getCharacterPosition());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(3, record.getRecordNumber());
+            assertEquals(code.indexOf('2'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 26);
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(4, record.getRecordNumber());
+            assertEquals(code.indexOf('3'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 43);
+        }
+    }
+
     @Test
     public void testGetRecordNumberWithCR() throws Exception {
         validateRecordNumbers(String.valueOf(CR));
@@ -923,6 +887,42 @@ public void testGetRecordsFromBrokenInputStream() throws IOException {
 
     }
 
+    @Test
+    public void testGetRecordThreeBytesRead() throws Exception {
+        final String code = "id,date,val5,val4\n" +
+            "11111111111111,'4017-09-01',きちんと節分近くには咲いてる～,v4\n" +
+            "22222222222222,'4017-01-01',おはよう私の友人～,v4\n" +
+            "33333333333333,'4017-01-01',きる自然の力ってすごいな～,v4\n";
+        final CSVFormat format = CSVFormat.Builder.create()
+            .setDelimiter(',')
+            .setQuote('\'')
+            .get();
+        try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get() ) {
+            CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
+
+            assertEquals(0, parser.getRecordNumber());
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(1, record.getRecordNumber());
+            assertEquals(code.indexOf('i'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), record.getCharacterPosition());
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(2, record.getRecordNumber());
+            assertEquals(code.indexOf('1'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), record.getCharacterPosition());
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(3, record.getRecordNumber());
+            assertEquals(code.indexOf('2'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 95);
+
+            assertNotNull(record = parser.nextRecord());
+            assertEquals(4, record.getRecordNumber());
+            assertEquals(code.indexOf('3'), record.getCharacterPosition());
+            assertEquals(record.getBytePosition(), 154);
+        }
+    }
+
     @Test
     public void testGetRecordWithMultiLineValues() throws Exception {
         try (CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"",

Original file line number	Diff line number	Diff line change
`@@ -97,21 +97,21 @@ public void close() throws IOException {`
`97`	`97`	`}`
`98`	`98`
`99`	`99`	`/**`
`100`		`- * Returns the current character position`
	`100`	`+ * Gets the number of bytes read`
`101`	`101`	`*`
`102`		`- * @return the current character position`
	`102`	`+ * @return the number of bytes read`
`103`	`103`	`*/`
`104`		`- long getCharacterPosition() {`
`105`		`- return reader.getPosition();`
	`104`	`+ long getBytesRead() {`
	`105`	`+ return reader.getBytesRead();`
`106`	`106`	`}`
`107`	`107`
`108`	`108`	`/**`
`109`		`- * Gets the number of bytes read`
	`109`	`+ * Returns the current character position`
`110`	`110`	`*`
`111`		`- * @return the number of bytes read`
	`111`	`+ * @return the current character position`
`112`	`112`	`*/`
`113`		`- long getBytesRead() {`
`114`		`- return reader.getBytesRead();`
	`113`	`+ long getCharacterPosition() {`
	`114`	`+ return reader.getPosition();`
`115`	`115`	`}`
`116`	`116`
`117`	`117`	`/**`