Skip to content

Commit 76981db

Browse files
committed
Sort members
1 parent 5e5512f commit 76981db

File tree

5 files changed

+154
-154
lines changed

5 files changed

+154
-154
lines changed

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,18 @@ public Builder setCharacterOffset(final long characterOffset) {
181181
return asThis();
182182
}
183183

184+
/**
185+
* Sets whether to enable byte tracking for the parser.
186+
*
187+
* @param enableByteTracking {@code true} to enable byte tracking; {@code false} to disable it.
188+
* @return this instance.
189+
* @since 1.13.0
190+
*/
191+
public Builder setEnableByteTracking(final boolean enableByteTracking) {
192+
this.enableByteTracking = enableByteTracking;
193+
return asThis();
194+
}
195+
184196
/**
185197
* Sets the CSV format. A copy of the given format is kept.
186198
*
@@ -203,18 +215,6 @@ public Builder setRecordNumber(final long recordNumber) {
203215
return asThis();
204216
}
205217

206-
/**
207-
* Sets whether to enable byte tracking for the parser.
208-
*
209-
* @param enableByteTracking {@code true} to enable byte tracking; {@code false} to disable it.
210-
* @return this instance.
211-
* @since 1.13.0
212-
*/
213-
public Builder setEnableByteTracking(final boolean enableByteTracking) {
214-
this.enableByteTracking = enableByteTracking;
215-
return asThis();
216-
}
217-
218218
}
219219

220220
final class CSVRecordIterator implements Iterator<CSVRecord> {

src/main/java/org/apache/commons/csv/CSVRecord.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -141,16 +141,6 @@ public String get(final String name) {
141141
}
142142
}
143143

144-
/**
145-
* Returns the start position of this record as a character position in the source stream. This may or may not
146-
* correspond to the byte position depending on the character set.
147-
*
148-
* @return the position of this record in the source stream.
149-
*/
150-
public long getCharacterPosition() {
151-
return characterPosition;
152-
}
153-
154144
/**
155145
* Returns the starting position of this record in the source stream, measured in bytes.
156146
*
@@ -161,6 +151,16 @@ public long getBytePosition() {
161151
return bytePosition;
162152
}
163153

154+
/**
155+
* Returns the start position of this record as a character position in the source stream. This may or may not
156+
* correspond to the byte position depending on the character set.
157+
*
158+
* @return the position of this record in the source stream.
159+
*/
160+
public long getCharacterPosition() {
161+
return characterPosition;
162+
}
163+
164164
/**
165165
* Returns the comment for this record, if any.
166166
* Note that comments are attached to the following record.

src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java

Lines changed: 54 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,60 @@ public void close() throws IOException {
9898
super.close();
9999
}
100100

101+
/**
102+
* Gets the number of bytes read by the reader.
103+
*
104+
* @return the number of bytes read by the read
105+
*/
106+
long getBytesRead() {
107+
return this.bytesRead;
108+
}
109+
110+
/**
111+
* Gets the byte length of the given character based on the the original Unicode
112+
* specification, which defined characters as fixed-width 16-bit entities.
113+
* <p>
114+
* The Unicode characters are divided into two main ranges:
115+
* <ul>
116+
* <li><b>U+0000 to U+FFFF (Basic Multilingual Plane, BMP):</b>
117+
* <ul>
118+
* <li>Represented using a single 16-bit {@code char}.</li>
119+
* <li>Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.</li>
120+
* </ul>
121+
* </li>
122+
* <li><b>U+10000 to U+10FFFF (Supplementary Characters):</b>
123+
* <ul>
124+
* <li>Represented as a pair of {@code char}s:</li>
125+
* <li>The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).</li>
126+
* <li>The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).</li>
127+
* <li>Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.</li>
128+
* </ul>
129+
* </li>
130+
* </ul>
131+
*
132+
* @param current the current character to process.
133+
* @return the byte length of the character.
134+
* @throws CharacterCodingException if the character cannot be encoded.
135+
*/
136+
private int getEncodedCharLength(int current) throws CharacterCodingException {
137+
final char cChar = (char) current;
138+
final char lChar = (char) lastChar;
139+
if (!Character.isSurrogate(cChar)) {
140+
return encoder.encode(
141+
CharBuffer.wrap(new char[] {cChar})).limit();
142+
} else {
143+
if (Character.isHighSurrogate(cChar)) {
144+
// Move on to the next char (low surrogate)
145+
return 0;
146+
} else if (Character.isSurrogatePair(lChar, cChar)) {
147+
return encoder.encode(
148+
CharBuffer.wrap(new char[] {lChar, cChar})).limit();
149+
} else {
150+
throw new CharacterCodingException();
151+
}
152+
}
153+
}
154+
101155
/**
102156
* Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
103157
* any of the read methods. This will not include a character read using the {@link #peek()} method. If no
@@ -156,51 +210,6 @@ public int read() throws IOException {
156210
return lastChar;
157211
}
158212

159-
/**
160-
* Gets the byte length of the given character based on the the original Unicode
161-
* specification, which defined characters as fixed-width 16-bit entities.
162-
* <p>
163-
* The Unicode characters are divided into two main ranges:
164-
* <ul>
165-
* <li><b>U+0000 to U+FFFF (Basic Multilingual Plane, BMP):</b>
166-
* <ul>
167-
* <li>Represented using a single 16-bit {@code char}.</li>
168-
* <li>Includes UTF-8 encodings of 1-byte, 2-byte, and some 3-byte characters.</li>
169-
* </ul>
170-
* </li>
171-
* <li><b>U+10000 to U+10FFFF (Supplementary Characters):</b>
172-
* <ul>
173-
* <li>Represented as a pair of {@code char}s:</li>
174-
* <li>The first {@code char} is from the high-surrogates range (\uD800-\uDBFF).</li>
175-
* <li>The second {@code char} is from the low-surrogates range (\uDC00-\uDFFF).</li>
176-
* <li>Includes UTF-8 encodings of some 3-byte characters and all 4-byte characters.</li>
177-
* </ul>
178-
* </li>
179-
* </ul>
180-
*
181-
* @param current the current character to process.
182-
* @return the byte length of the character.
183-
* @throws CharacterCodingException if the character cannot be encoded.
184-
*/
185-
private int getEncodedCharLength(int current) throws CharacterCodingException {
186-
final char cChar = (char) current;
187-
final char lChar = (char) lastChar;
188-
if (!Character.isSurrogate(cChar)) {
189-
return encoder.encode(
190-
CharBuffer.wrap(new char[] {cChar})).limit();
191-
} else {
192-
if (Character.isHighSurrogate(cChar)) {
193-
// Move on to the next char (low surrogate)
194-
return 0;
195-
} else if (Character.isSurrogatePair(lChar, cChar)) {
196-
return encoder.encode(
197-
CharBuffer.wrap(new char[] {lChar, cChar})).limit();
198-
} else {
199-
throw new CharacterCodingException();
200-
}
201-
}
202-
}
203-
204213
@Override
205214
public int read(final char[] buf, final int offset, final int length) throws IOException {
206215
if (length == 0) {
@@ -269,13 +278,4 @@ public void reset() throws IOException {
269278
super.reset();
270279
}
271280

272-
/**
273-
* Gets the number of bytes read by the reader.
274-
*
275-
* @return the number of bytes read by the read
276-
*/
277-
long getBytesRead() {
278-
return this.bytesRead;
279-
}
280-
281281
}

src/main/java/org/apache/commons/csv/Lexer.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -97,21 +97,21 @@ public void close() throws IOException {
9797
}
9898

9999
/**
100-
* Returns the current character position
100+
* Gets the number of bytes read
101101
*
102-
* @return the current character position
102+
* @return the number of bytes read
103103
*/
104-
long getCharacterPosition() {
105-
return reader.getPosition();
104+
long getBytesRead() {
105+
return reader.getBytesRead();
106106
}
107107

108108
/**
109-
* Gets the number of bytes read
109+
* Returns the current character position
110110
*
111-
* @return the number of bytes read
111+
* @return the current character position
112112
*/
113-
long getBytesRead() {
114-
return reader.getBytesRead();
113+
long getCharacterPosition() {
114+
return reader.getPosition();
115115
}
116116

117117
/**

src/test/java/org/apache/commons/csv/CSVParserTest.java

Lines changed: 70 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -703,76 +703,6 @@ public void testGetHeaderComment_NoComment3() throws IOException {
703703
}
704704
}
705705

706-
@Test
707-
public void testGetRecordThreeBytesRead() throws Exception {
708-
final String code = "id,date,val5,val4\n" +
709-
"11111111111111,'4017-09-01',きちんと節分近くには咲いてる~,v4\n" +
710-
"22222222222222,'4017-01-01',おはよう私の友人~,v4\n" +
711-
"33333333333333,'4017-01-01',きる自然の力ってすごいな~,v4\n";
712-
final CSVFormat format = CSVFormat.Builder.create()
713-
.setDelimiter(',')
714-
.setQuote('\'')
715-
.get();
716-
try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get() ) {
717-
CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
718-
719-
assertEquals(0, parser.getRecordNumber());
720-
assertNotNull(record = parser.nextRecord());
721-
assertEquals(1, record.getRecordNumber());
722-
assertEquals(code.indexOf('i'), record.getCharacterPosition());
723-
assertEquals(record.getBytePosition(), record.getCharacterPosition());
724-
725-
assertNotNull(record = parser.nextRecord());
726-
assertEquals(2, record.getRecordNumber());
727-
assertEquals(code.indexOf('1'), record.getCharacterPosition());
728-
assertEquals(record.getBytePosition(), record.getCharacterPosition());
729-
730-
assertNotNull(record = parser.nextRecord());
731-
assertEquals(3, record.getRecordNumber());
732-
assertEquals(code.indexOf('2'), record.getCharacterPosition());
733-
assertEquals(record.getBytePosition(), 95);
734-
735-
assertNotNull(record = parser.nextRecord());
736-
assertEquals(4, record.getRecordNumber());
737-
assertEquals(code.indexOf('3'), record.getCharacterPosition());
738-
assertEquals(record.getBytePosition(), 154);
739-
}
740-
}
741-
742-
@Test
743-
public void testGetRecordFourBytesRead() throws Exception {
744-
final String code = "id,a,b,c\n" +
745-
"1,😊,🤔,😂\n" +
746-
"2,😊,🤔,😂\n" +
747-
"3,😊,🤔,😂\n";
748-
final CSVFormat format = CSVFormat.Builder.create()
749-
.setDelimiter(',')
750-
.setQuote('\'')
751-
.get();
752-
try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get()) {
753-
CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
754-
755-
assertEquals(0, parser.getRecordNumber());
756-
assertNotNull(record = parser.nextRecord());
757-
assertEquals(1, record.getRecordNumber());
758-
assertEquals(code.indexOf('i'), record.getCharacterPosition());
759-
assertEquals(record.getBytePosition(), record.getCharacterPosition());
760-
761-
assertNotNull(record = parser.nextRecord());
762-
assertEquals(2, record.getRecordNumber());
763-
assertEquals(code.indexOf('1'), record.getCharacterPosition());
764-
assertEquals(record.getBytePosition(), record.getCharacterPosition());
765-
assertNotNull(record = parser.nextRecord());
766-
assertEquals(3, record.getRecordNumber());
767-
assertEquals(code.indexOf('2'), record.getCharacterPosition());
768-
assertEquals(record.getBytePosition(), 26);
769-
assertNotNull(record = parser.nextRecord());
770-
assertEquals(4, record.getRecordNumber());
771-
assertEquals(code.indexOf('3'), record.getCharacterPosition());
772-
assertEquals(record.getBytePosition(), 43);
773-
}
774-
}
775-
776706
@Test
777707
public void testGetHeaderMap() throws Exception {
778708
try (CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
@@ -878,6 +808,40 @@ public void testGetOneLineOneParser() throws IOException {
878808
}
879809
}
880810

811+
@Test
812+
public void testGetRecordFourBytesRead() throws Exception {
813+
final String code = "id,a,b,c\n" +
814+
"1,😊,🤔,😂\n" +
815+
"2,😊,🤔,😂\n" +
816+
"3,😊,🤔,😂\n";
817+
final CSVFormat format = CSVFormat.Builder.create()
818+
.setDelimiter(',')
819+
.setQuote('\'')
820+
.get();
821+
try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get()) {
822+
CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
823+
824+
assertEquals(0, parser.getRecordNumber());
825+
assertNotNull(record = parser.nextRecord());
826+
assertEquals(1, record.getRecordNumber());
827+
assertEquals(code.indexOf('i'), record.getCharacterPosition());
828+
assertEquals(record.getBytePosition(), record.getCharacterPosition());
829+
830+
assertNotNull(record = parser.nextRecord());
831+
assertEquals(2, record.getRecordNumber());
832+
assertEquals(code.indexOf('1'), record.getCharacterPosition());
833+
assertEquals(record.getBytePosition(), record.getCharacterPosition());
834+
assertNotNull(record = parser.nextRecord());
835+
assertEquals(3, record.getRecordNumber());
836+
assertEquals(code.indexOf('2'), record.getCharacterPosition());
837+
assertEquals(record.getBytePosition(), 26);
838+
assertNotNull(record = parser.nextRecord());
839+
assertEquals(4, record.getRecordNumber());
840+
assertEquals(code.indexOf('3'), record.getCharacterPosition());
841+
assertEquals(record.getBytePosition(), 43);
842+
}
843+
}
844+
881845
@Test
882846
public void testGetRecordNumberWithCR() throws Exception {
883847
validateRecordNumbers(String.valueOf(CR));
@@ -923,6 +887,42 @@ public void testGetRecordsFromBrokenInputStream() throws IOException {
923887

924888
}
925889

890+
@Test
891+
public void testGetRecordThreeBytesRead() throws Exception {
892+
final String code = "id,date,val5,val4\n" +
893+
"11111111111111,'4017-09-01',きちんと節分近くには咲いてる~,v4\n" +
894+
"22222222222222,'4017-01-01',おはよう私の友人~,v4\n" +
895+
"33333333333333,'4017-01-01',きる自然の力ってすごいな~,v4\n";
896+
final CSVFormat format = CSVFormat.Builder.create()
897+
.setDelimiter(',')
898+
.setQuote('\'')
899+
.get();
900+
try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get() ) {
901+
CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
902+
903+
assertEquals(0, parser.getRecordNumber());
904+
assertNotNull(record = parser.nextRecord());
905+
assertEquals(1, record.getRecordNumber());
906+
assertEquals(code.indexOf('i'), record.getCharacterPosition());
907+
assertEquals(record.getBytePosition(), record.getCharacterPosition());
908+
909+
assertNotNull(record = parser.nextRecord());
910+
assertEquals(2, record.getRecordNumber());
911+
assertEquals(code.indexOf('1'), record.getCharacterPosition());
912+
assertEquals(record.getBytePosition(), record.getCharacterPosition());
913+
914+
assertNotNull(record = parser.nextRecord());
915+
assertEquals(3, record.getRecordNumber());
916+
assertEquals(code.indexOf('2'), record.getCharacterPosition());
917+
assertEquals(record.getBytePosition(), 95);
918+
919+
assertNotNull(record = parser.nextRecord());
920+
assertEquals(4, record.getRecordNumber());
921+
assertEquals(code.indexOf('3'), record.getCharacterPosition());
922+
assertEquals(record.getBytePosition(), 154);
923+
}
924+
}
925+
926926
@Test
927927
public void testGetRecordWithMultiLineValues() throws Exception {
928928
try (CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"",

0 commit comments

Comments
 (0)