4747import org .elasticsearch .xcontent .ToXContentObject ;
4848import org .elasticsearch .xcontent .XContentBuilder ;
4949
50+ import java .io .FileNotFoundException ;
5051import java .io .IOException ;
52+ import java .nio .file .NoSuchFileException ;
5153import java .util .ArrayList ;
5254import java .util .Collection ;
5355import java .util .Collections ;
7072 * version of the blob, but again must not yield partial data). Usually, however, we write once and only read after the write completes, and
7173 * in this case we insist that the read succeeds.
7274 *
75+ * The writer may also attempt to copy the blob, either just before the write completes (which may fail with not found)
76+ * or after (which should not fail). The writer may overwrite the source while the copy is in progress. If a copy is attempted,
77+ * readers will read the copy instead of the original. As above, if the copy succeeds, then readers should see a complete copy.
78+ * If the source is overwritten while the copy is in progress, readers may see either the original blob or the new one but no
79+ * mixture or partial result.
7380 *
7481 * <pre>
7582 *
8390 * | Write blob with random content | |
8491 * |-----------------------------------→| |
8592 * | | |
93+ * | Copy blob during write (rarely) | |
94+ * |-----------------------------------→| |
95+ * | | |
96+ * | Copy complete | |
97+ * |←-----------------------------------| |
98+ * | | |
8699 * | Read range during write (rarely) | |
87100 * |----------------------------------------------------------------------------→|
88101 * | | |
106119 * |-| Read phase | | |
107120 * | |------------| | |
108121 * | | |
122+ * | Copy blob (rarely) | |
123+ * |-----------------------------------→| |
124+ * | | |
125+ * | TODO: Overwrite source (rarely) | |
126+ * |-----------------------------------→| |
127+ * | | |
128+ * | Overwrite complete | |
129+ * |←-----------------------------------| |
130+ * | | |
131+ * | Copy complete | |
132+ * |←-----------------------------------| |
133+ * | | |
109134 * | Read range [a,b) | |
110135 * |----------------------------------------------------------------------------→|
111136 * | | |
@@ -199,6 +224,9 @@ private static class BlobAnalysis {
199224 private final boolean checksumWholeBlob ;
200225 private final long checksumStart ;
201226 private final long checksumEnd ;
227+ // If a copy is requested, do exactly one so that the number of blobs created is controlled by RepositoryAnalyzeAction.
228+ // Doing the copy in step 1 exercises copy before read completes. Step 2 exercises copy after read completes or the happy path.
229+ private final boolean doEarlyCopy ;
202230 private final List <DiscoveryNode > earlyReadNodes ;
203231 private final List <DiscoveryNode > readNodes ;
204232 private final GroupedActionListener <NodeResponse > readNodesListener ;
@@ -230,6 +258,7 @@ private static class BlobAnalysis {
230258 checksumStart = randomLongBetween (0L , request .targetLength );
231259 checksumEnd = randomLongBetween (checksumStart + 1 , request .targetLength + 1 );
232260 }
261+ doEarlyCopy = random .nextBoolean ();
233262
234263 final ArrayList <DiscoveryNode > nodes = new ArrayList <>(request .nodes ); // copy for shuffling purposes
235264 if (request .readEarly ) {
@@ -368,11 +397,38 @@ public StreamInput streamInput() throws IOException {
368397 }
369398
370399 private void onLastReadForInitialWrite () {
400+ var readBlobName = request .blobName ;
401+ if (request .doCopy && doEarlyCopy ) {
402+ try {
403+ final var copyName = request .blobName + "_copy" ;
404+ blobContainer .copyBlob (
405+ OperationPurpose .REPOSITORY_ANALYSIS ,
406+ blobContainer ,
407+ request .blobName ,
408+ copyName ,
409+ request .targetLength
410+ );
411+ readBlobName = copyName ;
412+ } catch (UnsupportedOperationException uoe ) {
413+ // not all repositories support copy
414+ } catch (NoSuchFileException | FileNotFoundException ignored ) {
415+ // assume this is due to copy starting before the source was finished
416+ logger .trace ("copy FNF before write completed: {}" , request .blobName );
417+ } catch (IOException e ) {
418+ if (request .getAbortWrite () == false ) {
419+ throw new RepositoryVerificationException (
420+ request .getRepositoryName (),
421+ "failed to copy blob before write: [" + request .blobName + "]" ,
422+ e
423+ );
424+ }
425+ }
426+ }
371427 if (earlyReadNodes .isEmpty () == false ) {
372428 if (logger .isTraceEnabled ()) {
373429 logger .trace ("sending read request to [{}] for [{}] before write complete" , earlyReadNodes , request .getDescription ());
374430 }
375- readOnNodes (earlyReadNodes , true );
431+ readOnNodes (earlyReadNodes , readBlobName , true );
376432 }
377433 if (request .getAbortWrite ()) {
378434 throw new BlobWriteAbortedException ();
@@ -383,10 +439,37 @@ private void doReadAfterWrite() {
383439 if (logger .isTraceEnabled ()) {
384440 logger .trace ("sending read request to [{}] for [{}] after write complete" , readNodes , request .getDescription ());
385441 }
386- readOnNodes (readNodes , false );
442+ var readBlobName = request .blobName ;
443+ if (request .doCopy && (doEarlyCopy == false ) && (request .getAbortWrite () == false )) {
444+ try {
445+ final var copyName = request .blobName + "_copy" ;
446+ blobContainer .copyBlob (
447+ OperationPurpose .REPOSITORY_ANALYSIS ,
448+ blobContainer ,
449+ request .blobName ,
450+ copyName ,
451+ request .targetLength
452+ );
453+ readBlobName = copyName ;
454+ } catch (UnsupportedOperationException uoe ) {
455+ // not all repositories support copy
456+ } catch (IOException e ) {
457+ for (int i = 0 ; i < readNodes .size (); i ++) {
458+ readNodesListener .onFailure (
459+ new RepositoryVerificationException (
460+ request .getRepositoryName (),
461+ "failed to copy blob after write: [" + request .blobName + "]" ,
462+ e
463+ )
464+ );
465+ }
466+ return ;
467+ }
468+ }
469+ readOnNodes (readNodes , readBlobName , false );
387470 }
388471
389- private void readOnNodes (List <DiscoveryNode > nodes , boolean beforeWriteComplete ) {
472+ private void readOnNodes (List <DiscoveryNode > nodes , String blobName , boolean beforeWriteComplete ) {
390473 for (DiscoveryNode node : nodes ) {
391474 if (task .isCancelled ()) {
392475 // record dummy response since we're already on the path to failure
@@ -396,7 +479,7 @@ private void readOnNodes(List<DiscoveryNode> nodes, boolean beforeWriteComplete)
396479 } else {
397480 // no need for extra synchronization after checking if we were cancelled a couple of lines ago -- we haven't notified
398481 // the outer listener yet so any bans on the children are still in place
399- final GetBlobChecksumAction .Request blobChecksumRequest = getBlobChecksumRequest ();
482+ final GetBlobChecksumAction .Request blobChecksumRequest = getBlobChecksumRequest (blobName );
400483 transportService .sendChildRequest (
401484 node ,
402485 GetBlobChecksumAction .NAME ,
@@ -432,11 +515,11 @@ public void onFailure(Exception e) {
432515 }
433516 }
434517
435- private GetBlobChecksumAction .Request getBlobChecksumRequest () {
518+ private GetBlobChecksumAction .Request getBlobChecksumRequest (String blobName ) {
436519 return new GetBlobChecksumAction .Request (
437520 request .getRepositoryName (),
438521 request .getBlobPath (),
439- request . getBlobName () ,
522+ blobName ,
440523 checksumStart ,
441524 checksumWholeBlob ? 0L : checksumEnd
442525 );
@@ -650,6 +733,7 @@ static class Request extends ActionRequest {
650733 private final boolean readEarly ;
651734 private final boolean writeAndOverwrite ;
652735 private final boolean abortWrite ;
736+ private final boolean doCopy ;
653737
654738 Request (
655739 String repositoryName ,
@@ -662,7 +746,8 @@ static class Request extends ActionRequest {
662746 int earlyReadNodeCount ,
663747 boolean readEarly ,
664748 boolean writeAndOverwrite ,
665- boolean abortWrite
749+ boolean abortWrite ,
750+ boolean doCopy
666751 ) {
667752 assert 0 < targetLength ;
668753 assert targetLength <= MAX_ATOMIC_WRITE_SIZE || (readEarly == false && writeAndOverwrite == false ) : "oversized atomic write" ;
@@ -678,6 +763,7 @@ static class Request extends ActionRequest {
678763 this .readEarly = readEarly ;
679764 this .writeAndOverwrite = writeAndOverwrite ;
680765 this .abortWrite = abortWrite ;
766+ this .doCopy = doCopy ;
681767 }
682768
683769 Request (StreamInput in ) throws IOException {
@@ -693,6 +779,8 @@ static class Request extends ActionRequest {
693779 readEarly = in .readBoolean ();
694780 writeAndOverwrite = in .readBoolean ();
695781 abortWrite = in .readBoolean ();
782+ // BWC
783+ doCopy = in .readBoolean ();
696784 }
697785
698786 @ Override
@@ -709,6 +797,8 @@ public void writeTo(StreamOutput out) throws IOException {
709797 out .writeBoolean (readEarly );
710798 out .writeBoolean (writeAndOverwrite );
711799 out .writeBoolean (abortWrite );
800+ // BWC
801+ out .writeBoolean (doCopy );
712802 }
713803
714804 @ Override
0 commit comments