@@ -20,6 +20,18 @@ var BASE_PAIRS = [
2020 'G' // 3=11
2121] ;
2222
23+ /**
24+ The following chunk sizes are optimized against
25+ the human reference genome (hg19.2bit). Assuming that
26+ pileup.js is mostly being used for human genome,
27+ increasing the following numbers might cause nonnecessary
28+ network traffic and might also break our unit tests
29+ that make use of mapped 2bit files.
30+ */
31+ var FIRST_HEADER_CHUNKSIZE = 16 * 1024 , // 16 KB
32+ FIRST_SEQUENCE_CHUNKSIZE = ( 4 * 1024 ) - 1 , // ~ 4KB
33+ MAX_CHUNKSIZE = 1024 * 1024 ; // 1 MB
34+
2335type FileIndexEntry = {
2436 name : string ;
2537 offset: number ;
@@ -125,6 +137,66 @@ function markUnknownDNA(basePairs: Array<string>, dnaStartIndex: number, sequenc
125137}
126138
127139
140+ /**
141+ * An umbrella error type to describe issues with parsing an
142+ * incomplete chunk of data with JBinary's read. If this is being
143+ * raised, we either need to ask for more data (a bigger chunk) or
144+ * report to the user that there might be a problem with the 2bit
145+ * file, specifically with its header.
146+ */
147+ function IncompleteChunkError ( message ) {
148+ this . name = "IncompleteChunkError" ;
149+ this . message = ( message || "" ) ;
150+ }
151+ IncompleteChunkError . prototype = Error . prototype ;
152+
153+ /**
154+ * Wraps a parsing attempt, captures errors related to
155+ * incomplete data and re-throws a specialized error:
156+ * IncompleteChunkError. Otherwise, whatever other error
157+ * is being raised gets escalated.
158+ */
159+ function parseWithException ( parseFunc : Function ) {
160+ try {
161+ return parseFunc ( ) ;
162+ } catch ( error ) {
163+ // Chrome-like browsers: RangeError; phantomjs: DOMException
164+ if ( error . name == "RangeError" || error . name == "INDEX_SIZE_ERR" ) {
165+ console . log ( `Error name: ${ error . name } ` ) ;
166+ throw new IncompleteChunkError ( error ) ;
167+ } else {
168+ throw error ;
169+ }
170+ }
171+ }
172+
173+ /**
174+ * Try getting a bigger chunk of the remote file
175+ * until the Incomplete Chunk Error is resolved. This is useful when we need to
176+ * parse the header, but when we don't know the size of the header up front.
177+ * If the intial request returns an incomplete header and hence the
178+ * parsing fails, we next try doubling the requested size.
179+ * The size of the request is capped with `untilSize` so that
180+ * we don't start asking for MBs of data for no use.
181+ * Instead we we throw an error if we reach the cap,
182+ * potentially meaning a corrupted 2bit file.
183+ */
184+ function retryRemoteGet ( remoteFile : RemoteFile , start : number , size : number , untilSize : number , promisedFunc : Function ) {
185+ return remoteFile . getBytes ( start , size ) . then ( promisedFunc ) . catch ( error => {
186+ if ( error . name == "IncompleteChunkError" ) {
187+ // Do not attempt to download more than `untilSize`
188+ if ( size > untilSize ) {
189+ throw `Couldn't parse the header ` +
190+ `from the first ${ size } bytes of the file. ` +
191+ `Corrupted 2bit file?` ;
192+ }
193+ return retryRemoteGet ( remoteFile , start , size * 2 , untilSize , promisedFunc ) ;
194+ } else {
195+ throw error ;
196+ }
197+ } ) ;
198+ }
199+
128200class TwoBit {
129201 remoteFile : RemoteFile ;
130202 header : Q . Promise < TwoBitHeader > ;
@@ -133,10 +205,15 @@ class TwoBit {
133205 this . remoteFile = remoteFile ;
134206 var deferredHeader = Q . defer ( ) ;
135207 this . header = deferredHeader . promise ;
136-
137- // TODO: if 16k is insufficient, fetch the right amount.
138- this . remoteFile . getBytes ( 0 , 16 * 1024 ) . then ( function ( buffer ) {
139- var header = parseHeader ( buffer ) ;
208+ retryRemoteGet (
209+ this . remoteFile ,
210+ 0 , // Beginning of the file
211+ FIRST_HEADER_CHUNKSIZE ,
212+ MAX_CHUNKSIZE ,
213+ buffer => {
214+ var header = parseWithException ( ( ) => {
215+ return parseHeader ( buffer ) ;
216+ } ) ;
140217 deferredHeader . resolve ( header ) ;
141218 } ) . done ( ) ;
142219 }
@@ -178,9 +255,17 @@ class TwoBit {
178255 }
179256 var seq = maybeSeq ; // for flow, see facebook/flow#266
180257
181- // TODO: if 4k is insufficient, fetch the right amount.
182- return this . remoteFile . getBytes ( seq . offset , 4095 ) . then (
183- buf => parseSequenceRecord ( buf , seq . offset ) ) ;
258+ return retryRemoteGet (
259+ this . remoteFile ,
260+ seq . offset ,
261+ FIRST_SEQUENCE_CHUNKSIZE ,
262+ MAX_CHUNKSIZE ,
263+ buffer => {
264+ return parseWithException ( ( ) => {
265+ return parseSequenceRecord ( buffer , seq . offset ) ;
266+ } ) ;
267+ }
268+ ) ;
184269 } ) ;
185270 }
186271}
0 commit comments