31
31
import org .apache .flink .runtime .checkpoint .filemerging .FileMergingSnapshotManager .SpaceStat ;
32
32
import org .apache .flink .runtime .checkpoint .filemerging .FileMergingSnapshotManager .SubtaskKey ;
33
33
import org .apache .flink .runtime .clusterframework .types .ResourceID ;
34
+ import org .apache .flink .runtime .jobgraph .JobVertexID ;
34
35
import org .apache .flink .runtime .jobgraph .OperatorID ;
35
36
import org .apache .flink .runtime .metrics .groups .UnregisteredMetricGroups ;
36
37
import org .apache .flink .runtime .state .CheckpointedStateScope ;
@@ -73,7 +74,7 @@ public abstract class FileMergingSnapshotManagerTestBase {
73
74
74
75
final JobID jobID = new JobID ();
75
76
76
- final OperatorID operatorID = new OperatorID (289347923L , 75893479L );
77
+ final JobVertexID vertexID = new JobVertexID (289347923L , 75893479L );
77
78
78
79
SubtaskKey subtaskKey1 ;
79
80
SubtaskKey subtaskKey2 ;
@@ -89,9 +90,9 @@ public abstract class FileMergingSnapshotManagerTestBase {
89
90
@ BeforeEach
90
91
public void setup (@ TempDir java .nio .file .Path tempFolder ) {
91
92
subtaskKey1 =
92
- new SubtaskKey (jobID , operatorID , new TaskInfoImpl ("TestingTask" , 128 , 0 , 128 , 3 ));
93
+ new SubtaskKey (jobID , vertexID , new TaskInfoImpl ("TestingTask" , 128 , 0 , 128 , 3 ));
93
94
subtaskKey2 =
94
- new SubtaskKey (jobID , operatorID , new TaskInfoImpl ("TestingTask" , 128 , 1 , 128 , 3 ));
95
+ new SubtaskKey (jobID , vertexID , new TaskInfoImpl ("TestingTask" , 128 , 1 , 128 , 3 ));
95
96
96
97
checkpointBaseDir = new Path (tempFolder .toString (), jobID .toHexString ());
97
98
sharedStateDir = new Path (checkpointBaseDir , CHECKPOINT_SHARED_STATE_DIR );
@@ -492,12 +493,21 @@ public void testRestore() throws Exception {
492
493
(FileMergingSnapshotManagerBase )
493
494
createFileMergingSnapshotManager (checkpointBaseDir );
494
495
CloseableRegistry closeableRegistry = new CloseableRegistry ()) {
495
-
496
+ fmsm . registerSubtaskForSharedStates ( subtaskKey1 );
496
497
fmsm .notifyCheckpointStart (subtaskKey1 , checkpointId );
497
498
498
499
Map <OperatorID , OperatorSubtaskState > subtaskStatesByOperatorID = new HashMap <>();
500
+ // Here, we simulate a task with 2 operators, each operator has one keyed state and one
501
+ // operator state. The second operator's id is the same as the vertexID.
502
+ // first operator
503
+ subtaskStatesByOperatorID .put (
504
+ new OperatorID (777L , 75893479L ),
505
+ buildOperatorSubtaskState (checkpointId , fmsm , closeableRegistry ));
506
+
507
+ // second operator
499
508
subtaskStatesByOperatorID .put (
500
- operatorID , buildOperatorSubtaskState (checkpointId , fmsm , closeableRegistry ));
509
+ OperatorID .fromJobVertexID (vertexID ),
510
+ buildOperatorSubtaskState (checkpointId , fmsm , closeableRegistry ));
501
511
taskStateSnapshot = new TaskStateSnapshot (subtaskStatesByOperatorID );
502
512
oldSpaceStat = fmsm .spaceStat ;
503
513
@@ -510,6 +520,7 @@ public void testRestore() throws Exception {
510
520
try (FileMergingSnapshotManagerBase fmsm =
511
521
(FileMergingSnapshotManagerBase )
512
522
createFileMergingSnapshotManager (checkpointBaseDir )) {
523
+ fmsm .registerSubtaskForSharedStates (subtaskKey1 );
513
524
TaskInfo taskInfo =
514
525
new TaskInfoImpl (
515
526
"test restore" ,
@@ -521,19 +532,15 @@ public void testRestore() throws Exception {
521
532
taskStateSnapshot .getSubtaskStateMappings ()) {
522
533
SubtaskFileMergingManagerRestoreOperation restoreOperation =
523
534
new SubtaskFileMergingManagerRestoreOperation (
524
- checkpointId ,
525
- fmsm ,
526
- jobID ,
527
- taskInfo ,
528
- entry .getKey (),
529
- entry .getValue ());
535
+ checkpointId , fmsm , jobID , taskInfo , vertexID , entry .getValue ());
530
536
restoreOperation .restore ();
531
537
}
532
538
TreeMap <Long , Set <LogicalFile >> stateFiles = fmsm .getUploadedStates ();
533
539
assertThat (stateFiles .size ()).isEqualTo (1 );
534
540
Set <LogicalFile > restoreFileSet = stateFiles .get (checkpointId );
535
541
assertThat (restoreFileSet ).isNotNull ();
536
- assertThat (restoreFileSet .size ()).isEqualTo (4 );
542
+ // 2 operators * (2 keyed state + 2 operator state)
543
+ assertThat (restoreFileSet .size ()).isEqualTo (8 );
537
544
assertThat (fmsm .spaceStat ).isEqualTo (oldSpaceStat );
538
545
for (LogicalFile file : restoreFileSet ) {
539
546
assertThat (fmsm .getLogicalFile (file .getFileId ())).isEqualTo (file );
@@ -662,29 +669,44 @@ private OperatorSubtaskState buildOperatorSubtaskState(
662
669
Collections .singletonList (
663
670
IncrementalKeyedStateHandle .HandleAndLocalPath .of (
664
671
buildOneSegmentFileHandle (
665
- checkpointId , fmsm , closeableRegistry ),
672
+ checkpointId ,
673
+ fmsm ,
674
+ CheckpointedStateScope .SHARED ,
675
+ closeableRegistry ),
666
676
"localPath" )),
667
677
Collections .emptyList (),
668
678
null );
669
679
670
680
KeyGroupsStateHandle keyedStateHandle2 =
671
681
new KeyGroupsStateHandle (
672
682
new KeyGroupRangeOffsets (0 , 8 ),
673
- buildOneSegmentFileHandle (checkpointId , fmsm , closeableRegistry ));
683
+ buildOneSegmentFileHandle (
684
+ checkpointId ,
685
+ fmsm ,
686
+ CheckpointedStateScope .EXCLUSIVE ,
687
+ closeableRegistry ));
674
688
675
689
OperatorStateHandle operatorStateHandle1 =
676
690
new FileMergingOperatorStreamStateHandle (
677
691
null ,
678
692
null ,
679
693
Collections .emptyMap (),
680
- buildOneSegmentFileHandle (checkpointId , fmsm , closeableRegistry ));
694
+ buildOneSegmentFileHandle (
695
+ checkpointId ,
696
+ fmsm ,
697
+ CheckpointedStateScope .EXCLUSIVE ,
698
+ closeableRegistry ));
681
699
682
700
OperatorStateHandle operatorStateHandle2 =
683
701
new FileMergingOperatorStreamStateHandle (
684
702
null ,
685
703
null ,
686
704
Collections .emptyMap (),
687
- buildOneSegmentFileHandle (checkpointId , fmsm , closeableRegistry ));
705
+ buildOneSegmentFileHandle (
706
+ checkpointId ,
707
+ fmsm ,
708
+ CheckpointedStateScope .EXCLUSIVE ,
709
+ closeableRegistry ));
688
710
689
711
return OperatorSubtaskState .builder ()
690
712
.setManagedKeyedState (keyedStateHandle1 )
@@ -695,10 +717,13 @@ private OperatorSubtaskState buildOperatorSubtaskState(
695
717
}
696
718
697
719
private SegmentFileStateHandle buildOneSegmentFileHandle (
698
- long checkpointId , FileMergingSnapshotManager fmsm , CloseableRegistry closeableRegistry )
720
+ long checkpointId ,
721
+ FileMergingSnapshotManager fmsm ,
722
+ CheckpointedStateScope scope ,
723
+ CloseableRegistry closeableRegistry )
699
724
throws Exception {
700
725
FileMergingCheckpointStateOutputStream outputStream =
701
- writeCheckpointAndGetStream (checkpointId , fmsm , closeableRegistry );
726
+ writeCheckpointAndGetStream (checkpointId , fmsm , scope , closeableRegistry );
702
727
return outputStream .closeAndGetHandle ();
703
728
}
704
729
@@ -741,15 +766,13 @@ jobID, new ResourceID(tmId), getFileMergingType())
741
766
}
742
767
743
768
FileMergingCheckpointStateOutputStream writeCheckpointAndGetStream (
744
- long checkpointId , FileMergingSnapshotManager fmsm , CloseableRegistry closeableRegistry )
769
+ long checkpointId ,
770
+ FileMergingSnapshotManager fmsm ,
771
+ CheckpointedStateScope scope ,
772
+ CloseableRegistry closeableRegistry )
745
773
throws IOException {
746
774
return writeCheckpointAndGetStream (
747
- subtaskKey1 ,
748
- checkpointId ,
749
- CheckpointedStateScope .EXCLUSIVE ,
750
- fmsm ,
751
- closeableRegistry ,
752
- 32 );
775
+ subtaskKey1 , checkpointId , scope , fmsm , closeableRegistry , 32 );
753
776
}
754
777
755
778
FileMergingCheckpointStateOutputStream writeCheckpointAndGetStream (
0 commit comments