@@ -307,6 +307,52 @@ public static Builder builder() {
307307 private FilePart currentFilePart ;
308308 private boolean trailingNewlineOfFileSkipped ;
309309
310+ private ReversedLinesFileReader (final Builder builder ) throws IOException {
311+ this .blockSize = builder .getBufferSize ();
312+ this .charset = Charsets .toCharset (builder .getCharset ());
313+ // check & prepare encoding
314+ final CharsetEncoder charsetEncoder = this .charset .newEncoder ();
315+ final float maxBytesPerChar = charsetEncoder .maxBytesPerChar ();
316+ if (maxBytesPerChar == 1f || this .charset == StandardCharsets .UTF_8 ) {
317+ // all one byte encodings are partNumber problem
318+ byteDecrement = 1 ;
319+ } else if (this .charset == Charset .forName ("Shift_JIS" ) || // Same as for UTF-8
320+ // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
321+ this .charset == Charset .forName ("windows-31j" ) || // Windows code page 932 (Japanese)
322+ this .charset == Charset .forName ("x-windows-949" ) || // Windows code page 949 (Korean)
323+ this .charset == Charset .forName ("gbk" ) || // Windows code page 936 (Simplified Chinese)
324+ this .charset == Charset .forName ("x-windows-950" )) { // Windows code page 950 (Traditional Chinese)
325+ byteDecrement = 1 ;
326+ } else if (this .charset == StandardCharsets .UTF_16BE || this .charset == StandardCharsets .UTF_16LE ) {
327+ // UTF-16 new line sequences are not allowed as second tuple of four byte
328+ // sequences,
329+ // however byte order has to be specified
330+ byteDecrement = 2 ;
331+ } else if (this .charset == StandardCharsets .UTF_16 ) {
332+ throw new UnsupportedEncodingException ("For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)" );
333+ } else {
334+ throw new UnsupportedEncodingException ("Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)" );
335+ }
336+ // NOTE: The new line sequences are matched in the order given, so it is
337+ // important that \r\n is BEFORE \n
338+ this .newLineSequences = new byte [][] { StandardLineSeparator .CRLF .getBytes (this .charset ), StandardLineSeparator .LF .getBytes (this .charset ),
339+ StandardLineSeparator .CR .getBytes (this .charset ) };
340+ this .avoidNewlineSplitBufferSize = newLineSequences [0 ].length ;
341+ // Open file
342+ this .channel = Files .newByteChannel (builder .getPath (), StandardOpenOption .READ );
343+ this .totalByteLength = channel .size ();
344+ int lastBlockLength = (int ) (this .totalByteLength % blockSize );
345+ if (lastBlockLength > 0 ) {
346+ this .totalBlockCount = this .totalByteLength / blockSize + 1 ;
347+ } else {
348+ this .totalBlockCount = this .totalByteLength / blockSize ;
349+ if (this .totalByteLength > 0 ) {
350+ lastBlockLength = blockSize ;
351+ }
352+ }
353+ this .currentFilePart = new FilePart (totalBlockCount , lastBlockLength , null );
354+ }
355+
310356 /**
311357 * Constructs a ReversedLinesFileReader with default block size of 4KB and the virtual machine's {@link Charset#defaultCharset() default charset}.
312358 *
@@ -397,59 +443,7 @@ public ReversedLinesFileReader(final Path file, final Charset charset) throws IO
397443 */
398444 @ Deprecated
399445 public ReversedLinesFileReader (final Path file , final int blockSize , final Charset charset ) throws IOException {
400- this .blockSize = blockSize ;
401- this .charset = Charsets .toCharset (charset );
402-
403- // --- check & prepare encoding ---
404- final CharsetEncoder charsetEncoder = this .charset .newEncoder ();
405- final float maxBytesPerChar = charsetEncoder .maxBytesPerChar ();
406- if (maxBytesPerChar == 1f || this .charset == StandardCharsets .UTF_8 ) {
407- // all one byte encodings are partNumber problem
408- byteDecrement = 1 ;
409- } else if (this .charset == Charset .forName ("Shift_JIS" ) || // Same as for UTF-8
410- // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
411- this .charset == Charset .forName ("windows-31j" ) || // Windows code page 932 (Japanese)
412- this .charset == Charset .forName ("x-windows-949" ) || // Windows code page 949 (Korean)
413- this .charset == Charset .forName ("gbk" ) || // Windows code page 936 (Simplified Chinese)
414- this .charset == Charset .forName ("x-windows-950" )) { // Windows code page 950 (Traditional Chinese)
415- byteDecrement = 1 ;
416- } else if (this .charset == StandardCharsets .UTF_16BE || this .charset == StandardCharsets .UTF_16LE ) {
417- // UTF-16 new line sequences are not allowed as second tuple of four byte
418- // sequences,
419- // however byte order has to be specified
420- byteDecrement = 2 ;
421- } else if (this .charset == StandardCharsets .UTF_16 ) {
422- throw new UnsupportedEncodingException (
423- "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)" );
424- } else {
425- throw new UnsupportedEncodingException (
426- "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)" );
427- }
428-
429- // NOTE: The new line sequences are matched in the order given, so it is
430- // important that \r\n is BEFORE \n
431- this .newLineSequences = new byte [][] {
432- StandardLineSeparator .CRLF .getBytes (this .charset ),
433- StandardLineSeparator .LF .getBytes (this .charset ),
434- StandardLineSeparator .CR .getBytes (this .charset )
435- };
436-
437- this .avoidNewlineSplitBufferSize = newLineSequences [0 ].length ;
438-
439- // Open file
440- this .channel = Files .newByteChannel (file , StandardOpenOption .READ );
441- this .totalByteLength = channel .size ();
442- int lastBlockLength = (int ) (this .totalByteLength % blockSize );
443- if (lastBlockLength > 0 ) {
444- this .totalBlockCount = this .totalByteLength / blockSize + 1 ;
445- } else {
446- this .totalBlockCount = this .totalByteLength / blockSize ;
447- if (this .totalByteLength > 0 ) {
448- lastBlockLength = blockSize ;
449- }
450- }
451- this .currentFilePart = new FilePart (totalBlockCount , lastBlockLength , null );
452-
446+ this (builder ().setPath (file ).setBufferSize (blockSize ).setCharset (charset ));
453447 }
454448
455449 /**
0 commit comments