1616package com .splunk ;
1717
1818import java .io .*;
19- import java .nio .ByteBuffer ;
20- import java .util .Arrays ;
21- import java .util .concurrent .Callable ;
2219
2320/**
2421 * Takes an InputStream containing a UTF-8 encoded XML document containing one or more
3128 * it is filtering.
3229 */
3330class InsertRootElementFilterInputStream extends FilterInputStream {
31+ private static final int REREAD_BUFFER_SIZE = 512 ;
32+ private static byte [] resultsTagBytes ;
3433 private final ByteArrayInputStream suffix = new ByteArrayInputStream ("</doc>" .getBytes ("UTF-8" ));
34+ private ByteArrayInputStream beforeResultsBuffer ;
3535 private boolean wrotePrefix ;
3636
3737 private byte [] oneByte = new byte [1 ];
3838
39+ static {
40+ try {
41+ resultsTagBytes = "results" .getBytes ("UTF-8" );
42+ } catch (UnsupportedEncodingException e ) {
43+ //should not be thrown because UTF-8 is supported
44+ throw new RuntimeException (e );
45+ }
46+ }
47+
3948 InsertRootElementFilterInputStream (InputStream in ) throws IOException {
4049 // Wrap in with a pushback stream so we can write our modified version back
4150 // onto the beginning of it.
42- super (new PushbackInputStream (in , 512 ));
51+ super (new PushbackInputStream (in , REREAD_BUFFER_SIZE ));
52+
4353 PushbackInputStream pin = (PushbackInputStream )this .in ;
4454
4555 // Read bytes until we reach '>', then push everything we read, followed by "<doc>",
4656 // back onto the stream. If we run out of input before we reach '>', then don't
4757 // modify the stream.
4858 ByteArrayOutputStream beforeResultsChars = new ByteArrayOutputStream ();
49- ByteArrayOutputStream atResultsChars = new ByteArrayOutputStream ( );
59+ beforeResultsBuffer = new ByteArrayInputStream ( new byte [ 0 ] );
5060
5161 int ch ;
5262 while (true ) {
@@ -57,32 +67,19 @@ class InsertRootElementFilterInputStream extends FilterInputStream {
5767 pin .unread (beforeResultsChars .toByteArray ());
5868 return ;
5969 } else if (ch == (int )'<' ) {
60- // Try extending
61- atResultsChars .reset ();
62- int ech ;
63- boolean matched = true ;
64- for (byte b : "results" .getBytes ("UTF-8" )) {
65- ech = this .in .read ();
66- atResultsChars .write (ech );
67- if (ech != b ) {
68- // Extension failed. Put the bytes back on and search again.
69- pin .unread (atResultsChars .toByteArray ());
70- matched = false ;
71- break ;
72- }
73- }
70+ boolean resultsTag = isResultsTag (pin );
7471
75- if (matched ) {
72+ if (resultsTag ) {
7673 // If we reach here, the extension succeeded, so we insert <doc>, unread everything,
7774 // and return.
7875
7976 // Unread the match.
80- pin .unread (atResultsChars . toByteArray () );
77+ pin .unread (InsertRootElementFilterInputStream . resultsTagBytes );
8178 // Unread the opening '<' that led to our extension
8279 pin .unread (ch );
83- // Add a '<doc>' element to our read charactes and unread them.
80+ // Add a '<doc>' element to our read characters
8481 beforeResultsChars .write ("<doc>" .getBytes ("UTF-8" ));
85- pin . unread (beforeResultsChars .toByteArray ());
82+ beforeResultsBuffer = new ByteArrayInputStream (beforeResultsChars .toByteArray ());
8683 wrotePrefix = true ;
8784 return ;
8885 } else {
@@ -96,9 +93,38 @@ class InsertRootElementFilterInputStream extends FilterInputStream {
9693 }
9794 }
9895
96+ private boolean isResultsTag (PushbackInputStream pin ) throws IOException {
97+ // Try extending
98+ ByteArrayOutputStream atResultsChars = new ByteArrayOutputStream ();
99+ int ech ;
100+ boolean resultsTag = true ;
101+ for (byte b : resultsTagBytes ) {
102+ ech = this .in .read ();
103+ atResultsChars .write (ech );
104+ if (ech != b ) {
105+ // Extension failed. Put the bytes back on and search again.
106+ pin .unread (atResultsChars .toByteArray ());
107+ resultsTag = false ;
108+ break ;
109+ }
110+ }
111+ return resultsTag ;
112+ }
113+
99114 @ Override
100115 public int read (byte [] buffer , int offset , int length ) throws IOException {
101- int result = in .read (buffer , offset , length );
116+ // first we read from the buffer before the first results xml tag
117+ int result = 0 ;
118+ int availableFromBuffer = beforeResultsBuffer .available ();
119+ if (offset < availableFromBuffer ) {
120+ result = beforeResultsBuffer .read (buffer , offset , length );
121+ if (length <= result ) {
122+ return result ;
123+ }
124+ }
125+
126+ // then we read from the original input stream
127+ result += in .read (buffer , offset +result , length -result );
102128 if (result == -1 && wrotePrefix ) {
103129 // No more bytes to read from in, and we have written '<doc>' earlier in the stream
104130 return suffix .read (buffer , offset , length );
0 commit comments