@@ -20,6 +20,7 @@ import (
2020 "github.com/cockroachdb/cockroach/pkg/storage/enginepb"
2121 "github.com/cockroachdb/cockroach/pkg/storage/mvccencoding"
2222 "github.com/cockroachdb/cockroach/pkg/storage/mvcceval"
23+ "github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
2324 "github.com/cockroachdb/cockroach/pkg/util/log"
2425 "github.com/cockroachdb/errors"
2526)
@@ -151,7 +152,17 @@ type txnWriteBuffer struct {
151152 // that have been aborted by a conflicting transaction. As read-your-own-write
152153 // semantics are upheld by the client, not the server, for transactions that
153154 // use buffered writes, we can skip the AbortSpan check on the server.
155+ //
156+ // We currently track this via two state variables: `enabled` and `flushed`.
157+ // Writes are only buffered if enabled && !flushed.
158+ //
159+ // `enabled` tracks whether buffering has been enabled/disabled externally via
160+ // txn.SetBufferedWritesEnabled or because we are operating on a leaf
161+ // transaction.
154162 enabled bool
163+ //
164+ // `flushed` tracks whether the buffer has been previously flushed.
165+ flushed bool
155166
156167 // flushOnNextBatch, if set, indicates that write buffering has just been
157168 // disabled, and the interceptor should flush any buffered writes when it
@@ -188,7 +199,7 @@ func (twb *txnWriteBuffer) SendLocked(
188199 return twb .flushBufferAndSendBatch (ctx , ba )
189200 }
190201
191- if ! twb .enabled {
202+ if ! twb .shouldBuffer () {
192203 return twb .wrapped .SendLocked (ctx , ba )
193204 }
194205
@@ -202,6 +213,7 @@ func (twb *txnWriteBuffer) SendLocked(
202213 }
203214
204215 if _ , ok := ba .GetArg (kvpb .DeleteRange ); ok {
216+ log .VEventf (ctx , 2 , "DeleteRangeRequest forcing flush of write buffer" )
205217 // DeleteRange requests can delete an arbitrary number of keys over a
206218 // given keyspan. We won't know the exact scope of the delete until
207219 // we've scanned the keyspan, which must happen on the server. We've got
@@ -231,8 +243,13 @@ func (twb *txnWriteBuffer) SendLocked(
231243 // Check if buffering writes from the supplied batch will run us over
232244 // budget. If it will, we shouldn't buffer writes from the current batch,
233245 // and flush the buffer.
234- if twb .estimateSize (ba )+ twb .bufferSize > bufferedWritesMaxBufferSize .Get (& twb .st .SV ) {
246+ maxSize := bufferedWritesMaxBufferSize .Get (& twb .st .SV )
247+ bufSize := twb .estimateSize (ba ) + twb .bufferSize
248+ if bufSize > maxSize {
235249 // TODO(arul): add some metrics for this case.
250+ log .VEventf (ctx , 2 , "flushing buffer because buffer size (%s) exceeds max size (%s)" ,
251+ humanizeutil .IBytes (bufSize ),
252+ humanizeutil .IBytes (maxSize ))
236253 return twb .flushBufferAndSendBatch (ctx , ba )
237254 }
238255
@@ -325,12 +342,17 @@ func (twb *txnWriteBuffer) adjustError(
325342 if ts [0 ].stripped {
326343 numStripped ++
327344 } else {
328- // TODO(arul): If the error index points to a request that we've
329- // transformed, returning this back to the client is weird -- the
330- // client doesn't know we're making transformations. We should
331- // probably just log a warning and clear out the error index for such
332- // cases.
333- log .Fatal (ctx , "unhandled" )
345+ // This is a transformed request (for example a LockingGet that was
346+ // sent instead of a Del). In this case, the error might be a bit
347+ // confusing to the client since the request that had an error isn't
348+ // exactly the request the user sent.
349+ //
350+ // For now, we handle this by logging and removing the error index.
351+ if baIdx == pErr .Index .Index {
352+ log .Warningf (ctx , "error index %d is part of a transformed request" , pErr .Index .Index )
353+ pErr .Index = nil
354+ return pErr
355+ }
334356 }
335357 ts = ts [1 :]
336358 continue
@@ -444,7 +466,14 @@ func (twb *txnWriteBuffer) importLeafFinalState(context.Context, *roachpb.LeafTx
444466}
445467
446468// epochBumpedLocked implements the txnInterceptor interface.
447- func (twb * txnWriteBuffer ) epochBumpedLocked () {}
469+ func (twb * txnWriteBuffer ) epochBumpedLocked () {
470+ twb .resetBuffer ()
471+ }
472+
473+ func (twb * txnWriteBuffer ) resetBuffer () {
474+ twb .buffer .Reset ()
475+ twb .bufferSize = 0
476+ }
448477
449478// createSavepointLocked is part of the txnInterceptor interface.
450479func (twb * txnWriteBuffer ) createSavepointLocked (context.Context , * savepoint ) {}
@@ -1132,34 +1161,28 @@ func (twb *txnWriteBuffer) flushBufferAndSendBatch(
11321161 defer func () {
11331162 assertTrue (twb .buffer .Len () == 0 , "buffer should be empty after flush" )
11341163 assertTrue (twb .bufferSize == 0 , "buffer size should be 0 after flush" )
1164+ assertTrue (twb .flushed , "flushed should be true after flush" )
11351165 }()
11361166
1167+ // Once we've flushed the buffer, we disable write buffering going forward. We
1168+ // do this even if the buffer is empty since once we've called this function,
1169+ // our buffer no longer represents all of the writes in the transaction.
1170+ log .VEventf (ctx , 2 , "disabling write buffering for this epoch" )
1171+ twb .flushed = true
1172+
11371173 numBuffered := twb .buffer .Len ()
11381174 if numBuffered == 0 {
11391175 return twb .wrapped .SendLocked (ctx , ba ) // nothing to flush
11401176 }
11411177
1142- // Once we've flushed the buffer, we disable write buffering going forward.
1143- twb .enabled = false
1144-
11451178 // Flush all buffered writes by pre-pending them to the requests being sent
11461179 // in the batch.
1147- // First, collect the requests we'll need to flush.
1148- toFlushBufferedWrites := make ([]bufferedWrite , 0 , twb .buffer .Len ())
1149-
1180+ reqs := make ([]kvpb.RequestUnion , 0 , numBuffered + len (ba .Requests ))
11501181 it := twb .buffer .MakeIter ()
11511182 for it .First (); it .Valid (); it .Next () {
1152- toFlushBufferedWrites = append (toFlushBufferedWrites , * it .Cur ())
1153- }
1154-
1155- reqs := make ([]kvpb.RequestUnion , 0 , numBuffered + len (ba .Requests ))
1156-
1157- // Next, remove the buffered writes from the buffer and collect them into
1158- // requests.
1159- for _ , bw := range toFlushBufferedWrites {
1160- reqs = append (reqs , bw .toRequest ())
1161- twb .removeFromBuffer (& bw )
1183+ reqs = append (reqs , it .Cur ().toRequest ())
11621184 }
1185+ twb .resetBuffer ()
11631186
11641187 // Layers below us expect that writes inside a batch are in sequence number
11651188 // order but the iterator above returns data in key order. Here we re-sort it
@@ -1194,6 +1217,12 @@ func (twb *txnWriteBuffer) hasBufferedWrites() bool {
11941217 return twb .buffer .Len () > 0
11951218}
11961219
1220+ // shouldBuffer returns true if SendLocked() should attempt to buffer parts of
1221+ // the batch.
1222+ func (twb * txnWriteBuffer ) shouldBuffer () bool {
1223+ return twb .enabled && ! twb .flushed
1224+ }
1225+
11971226// testingBufferedWritesAsSlice returns all buffered writes, in key order, as a
11981227// slice.
11991228func (twb * txnWriteBuffer ) testingBufferedWritesAsSlice () []bufferedWrite {
0 commit comments