46
46
*/
47
47
public abstract class CompressedAnalyzer extends FileAnalyzer {
48
48
49
+ private static final int CHUNK_SIZE = 8 * 1024 ;
50
+
49
51
protected Genre g ;
50
52
51
53
@ Override
@@ -93,10 +95,17 @@ private boolean meetsHugeTextThreshold(StreamSource compressedSrc) throws IOExce
93
95
return false ;
94
96
}
95
97
96
- byte [] buf = new byte [8 * 1024 ];
97
- int bytesRead = 0 ;
98
- int n ;
99
98
try (InputStream in = compressedSrc .getStream ()) {
99
+ // Try skip first.
100
+ SkipResult result = meetsHugeTextThresholdBySkip (in , hugeTextThresholdBytes );
101
+ if (result .didMeet ) {
102
+ return true ;
103
+ }
104
+
105
+ // Even if some skipped, only read==-1 is a true indicator of EOF.
106
+ long bytesRead = result .bytesSkipped ;
107
+ byte [] buf = new byte [CHUNK_SIZE ];
108
+ long n ;
100
109
while ((n = in .read (buf , 0 , buf .length )) != -1 ) {
101
110
bytesRead += n ;
102
111
if (bytesRead >= hugeTextThresholdBytes ) {
@@ -106,4 +115,30 @@ private boolean meetsHugeTextThreshold(StreamSource compressedSrc) throws IOExce
106
115
}
107
116
return false ;
108
117
}
118
+
119
+ private SkipResult meetsHugeTextThresholdBySkip (InputStream in , int hugeTextThresholdBytes ) {
120
+ long bytesSkipped = 0 ;
121
+ long n ;
122
+ try {
123
+ while ((n = in .skip (CHUNK_SIZE )) > 0 ) {
124
+ bytesSkipped += n ;
125
+ if (bytesSkipped >= hugeTextThresholdBytes ) {
126
+ return new SkipResult (bytesSkipped , true );
127
+ }
128
+ }
129
+ } catch (IOException ignored ) {
130
+ // Ignore and assume not capable of skip.
131
+ }
132
+ return new SkipResult (bytesSkipped , false );
133
+ }
134
+
135
+ private static class SkipResult {
136
+ final long bytesSkipped ;
137
+ final boolean didMeet ;
138
+
139
+ SkipResult (long bytesSkipped , boolean didMeet ) {
140
+ this .bytesSkipped = bytesSkipped ;
141
+ this .didMeet = didMeet ;
142
+ }
143
+ }
109
144
}
0 commit comments