@@ -10,13 +10,56 @@ internal static partial class FirstPassParser
1010{
1111 private static ReadOnlySpan < byte > StartXRefBytes => "startxref"u8 ;
1212
13+ public const long EndOfFileBufferSize = 1024 ;
14+
1315 public static StartXRefLocation GetFirstCrossReferenceOffset (
1416 IInputBytes bytes ,
1517 ISeekableTokenScanner scanner ,
1618 ILog log )
1719 {
20+ // We used to read backward through the file, but this is quite expensive for streams that directly wrap OS files.
21+ // Instead we fetch the last 1024 bytes of the file and do a memory search, as cheap first attempt. This is significantly faster
22+ // in practice, if there is no in-process caching of the file involved
23+ //
24+ // If that fails (in practice it should never) we fall back to the old method of reading backwards.
1825 var fileLength = bytes . Length ;
26+ {
27+ var fetchFrom = Math . Max ( bytes . Length - EndOfFileBufferSize , 0L ) ;
28+
29+ bytes . Seek ( fetchFrom ) ;
30+
31+ Span < byte > byteBuffer = new byte [ bytes . Length - fetchFrom ] ; // TODO: Maybe use PoolArray?
32+
33+ int n = bytes . Read ( byteBuffer ) ;
34+
35+ if ( n == byteBuffer . Length )
36+ {
37+ int lx = byteBuffer . LastIndexOf ( "startxref"u8 ) ;
38+
39+ if ( lx < 0 )
40+ {
41+ // See old code. We also try a mangled version
42+ lx = byteBuffer . LastIndexOf ( "startref"u8 ) ;
43+ }
44+
45+ if ( lx >= 0 )
46+ {
47+ scanner . Seek ( fetchFrom + lx ) ;
48+
49+ if ( scanner . TryReadToken ( out OperatorToken startXrefOp ) && ( startXrefOp . Data == "startxref" || startXrefOp . Data == "startref" ) )
50+ {
51+ var pos = GetNumericTokenFollowingCurrent ( scanner ) ;
52+
53+ log . Debug ( $ "Found startxref at { pos } ") ;
54+
55+ return new StartXRefLocation ( fetchFrom + lx , pos ) ;
56+ }
57+ }
58+
59+ }
60+ }
1961
62+ // Now fall through in the old code
2063 var buffer = new CircularByteBuffer ( StartXRefBytes . Length ) ;
2164
2265 // Start from the end of the file
0 commit comments