Skip to content

Commit ba13d6c

Browse files
abhishekmjainWill-Lo
authored andcommitted
Fix owner execute permission bit issue for path existing in destination (#4147)
1 parent 0db6a96 commit ba13d6c

File tree

3 files changed

+192
-7
lines changed

3 files changed

+192
-7
lines changed

gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, Cop
116116
// map of paths and permissions sorted by depth of path, so that permissions can be set in order
117117
Map<String, List<OwnerAndPermission>> ancestorOwnerAndPermissions = new HashMap<>();
118118
Map<String, List<OwnerAndPermission>> ancestorOwnerAndPermissionsForSetPermissionStep = new HashMap<>();
119+
Map<String, OwnerAndPermission> existingDirectoryPermissionsForSetPermissionStep = new HashMap<>();
119120
TreeMap<String, OwnerAndPermission> flattenedAncestorPermissions = new TreeMap<>(
120121
Comparator.comparingInt((String o) -> o.split("/").length).thenComparing(o -> o));
121122
try {
@@ -151,13 +152,18 @@ public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, Cop
151152
// FileAwareInputStreamDataWriter::setOwnerExecuteBitIfDirectory -> HadoopUtils::addExecutePermissionToOwner
152153
// We need to revert this extra permission change in setPermissionStep
153154
if (srcFile.isDirectory() && !srcFile.getPermission().getUserAction().implies(FsAction.EXECUTE)
154-
&& !ancestorOwnerAndPermissionsForSetPermissionStep.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fileToCopy).toString())
155-
&& !targetFs.exists(fileToCopy)) {
156-
List<OwnerAndPermission> ancestorsOwnerAndPermissionUpdated = new ArrayList<>();
155+
&& !ancestorOwnerAndPermissionsForSetPermissionStep.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fileToCopy).toString())) {
157156
OwnerAndPermission srcFileOwnerPermissionReplicatedForDest = new OwnerAndPermission(copyableFile.getDestinationOwnerAndPermission());
158-
ancestorsOwnerAndPermissionUpdated.add(srcFileOwnerPermissionReplicatedForDest);
159-
copyableFile.getAncestorsOwnerAndPermission().forEach(ancestorOwnPerm -> ancestorsOwnerAndPermissionUpdated.add(new OwnerAndPermission(ancestorOwnPerm)));
160-
ancestorOwnerAndPermissionsForSetPermissionStep.put(fileToCopy.toString(), ancestorsOwnerAndPermissionUpdated);
157+
if(!targetFs.exists(fileToCopy)) {
158+
List<OwnerAndPermission> ancestorsOwnerAndPermissionUpdated = new ArrayList<>();
159+
ancestorsOwnerAndPermissionUpdated.add(srcFileOwnerPermissionReplicatedForDest);
160+
copyableFile.getAncestorsOwnerAndPermission().forEach(ancestorOwnPerm -> ancestorsOwnerAndPermissionUpdated.add(new OwnerAndPermission(ancestorOwnPerm)));
161+
ancestorOwnerAndPermissionsForSetPermissionStep.put(fileToCopy.toString(), ancestorsOwnerAndPermissionUpdated);
162+
}
163+
else {
164+
// If the path exists, update only current directory permission in post publish step and not entire hierarchy.
165+
existingDirectoryPermissionsForSetPermissionStep.put(fileToCopy.toString(), srcFileOwnerPermissionReplicatedForDest);
166+
}
161167
}
162168

163169
// Always grab the parent since the above permission setting should be setting the permission for a folder itself
@@ -198,6 +204,10 @@ public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, Cop
198204
currentPath = currentPath.getParent();
199205
}
200206
}
207+
for (Map.Entry<String, OwnerAndPermission> existingDirectoryPermissions : existingDirectoryPermissionsForSetPermissionStep.entrySet()) {
208+
Path currentPath = new Path(existingDirectoryPermissions.getKey());
209+
flattenedAncestorPermissions.put(currentPath.toString(), existingDirectoryPermissions.getValue());
210+
}
201211
CommitStep setPermissionCommitStep = new SetPermissionCommitStep(targetFs, flattenedAncestorPermissions, this.properties);
202212
copyEntities.add(new PostPublishStep(datasetURN(), Maps.newHashMap(), setPermissionCommitStep, 1));
203213
}

gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java

Lines changed: 156 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ public void testFindFilesWithDifferentPermissions() throws IOException, URISynta
159159
CommitStep setPermissionStep = ((PostPublishStep) fileSet.getFiles().get(3)).getStep();
160160
Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
161161
Map<String, OwnerAndPermission> ownerAndPermissionMap = ((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
162-
// Ignore /tmp as it already exists on destination
162+
// Ignore /tmp as it is not part of manifest and already present in destination
163163
Assert.assertEquals(ownerAndPermissionMap.size(), 1);
164164
Assert.assertTrue(ownerAndPermissionMap.containsKey("/tmp/dataset"));
165165
}
@@ -415,6 +415,161 @@ public void testSetPermissionWhenCopyingDirectoryWithOwnerExecutePermissionSetUn
415415
}
416416
}
417417

418+
/**
419+
* Test correct permissions are set for existing directories.
420+
*/
421+
@Test
422+
public void testCorrectPermissionForExistingDirectoriesWithoutExecute() throws IOException, URISyntaxException {
423+
Path manifestPath = new Path(getClass().getClassLoader().getResource("manifestBasedDistcpTest/sampleManifestWithOnlyDirectory.json").getPath());
424+
Properties props = new Properties();
425+
props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
426+
props.setProperty("gobblin.copy.preserved.attributes", "rbugpvta");
427+
428+
try (FileSystem sourceFs = Mockito.mock(FileSystem.class);
429+
FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
430+
FileSystem destFs = Mockito.mock(FileSystem.class)) {
431+
432+
setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs, false);
433+
434+
// Key difference: directory already exists on target
435+
Mockito.when(destFs.exists(new Path("/tmp/dataset"))).thenReturn(true);
436+
Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(true);
437+
438+
Mockito.when(destFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new Path(tmpDir.toString())));
439+
440+
// Source directory has 000 permission (no execute bit)
441+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset", "d---------", "owner1", "group1", true);
442+
setFsMockPathWithPermissions(sourceFs, "/tmp", "dr--r--r--", "owner2", "group2", true);
443+
444+
Iterator<FileSet<CopyEntity>> fileSets = new ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath, props)
445+
.getFileSetIterator(destFs, CopyConfiguration.builder(destFs, props).build());
446+
447+
Assert.assertTrue(fileSets.hasNext());
448+
FileSet<CopyEntity> fileSet = fileSets.next();
449+
Assert.assertEquals(fileSet.getFiles().size(), 3); // 1 dir to copy + 1 pre publish step + 1 post publish step
450+
451+
// Verify SetPermissionCommitStep includes the directory even though it exists on target
452+
CommitStep setPermissionStep = ((PostPublishStep) fileSet.getFiles().get(2)).getStep();
453+
Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
454+
Map<String, OwnerAndPermission> ownerAndPermissionMap = ((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
455+
456+
// Should include only /tmp/dataset and not /tmp since it is not part of manifest for permission correction
457+
Assert.assertEquals(ownerAndPermissionMap.size(), 1);
458+
Assert.assertTrue(ownerAndPermissionMap.containsKey("/tmp/dataset"));
459+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getFsPermission(), FsPermission.valueOf("d---------"));
460+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getOwner(), "owner1");
461+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getGroup(), "group1");
462+
463+
Assert.assertFalse(ownerAndPermissionMap.containsKey("/tmp"));
464+
}
465+
}
466+
467+
/**
468+
* Test correct permissions are set in post-publish step.
469+
*/
470+
@Test
471+
public void testCorrectPermissionForMixedExistingAndNewDirectories() throws IOException, URISyntaxException {
472+
Path manifestPath = new Path(getClass().getClassLoader().getResource("manifestBasedDistcpTest/nestedDirectories.json").getPath());
473+
Properties props = new Properties();
474+
props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
475+
props.setProperty("gobblin.copy.preserved.attributes", "rbugpvta");
476+
477+
try (FileSystem sourceFs = Mockito.mock(FileSystem.class);
478+
FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
479+
FileSystem destFs = Mockito.mock(FileSystem.class)) {
480+
481+
setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs, false);
482+
483+
// Mix of existing and non-existing directories
484+
Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(true);
485+
Mockito.when(destFs.exists(new Path("/tmp/dataset"))).thenReturn(true);
486+
Mockito.when(destFs.exists(new Path("/tmp/dataset/hourly"))).thenReturn(false);
487+
Mockito.when(destFs.exists(new Path("/tmp/dataset/hourly/metadata"))).thenReturn(false);
488+
Mockito.when(destFs.exists(new Path("/tmp/dataset2"))).thenReturn(true);
489+
Mockito.when(destFs.exists(new Path("/tmp/dataset2/hourly"))).thenReturn(true);
490+
Mockito.when(destFs.exists(new Path("/tmp/dataset2/hourly/metadata"))).thenReturn(true);
491+
Mockito.when(destFs.exists(new Path("/tmp/dataset2/hourly/metadata2"))).thenReturn(true);
492+
493+
Mockito.when(destFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new Path(tmpDir.toString())));
494+
495+
// Set up directories with various permissions, some without execute bit
496+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset/hourly/metadata", "drw-rw-rw-", "owner1", "group1", true);
497+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset2/hourly/metadata", "dr-xr--r--", "owner2", "group2", true);
498+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset2/hourly/metadata2", "dr--r--r--", "owner2", "group2", true);
499+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset/hourly", "drw-rw-rw-", "owner1", "group1", true);
500+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset2/hourly", "dr--r--r--", "owner2", "group2", true);
501+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset", "drw-r-----", "owner1", "group1", true);
502+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset2", "dr--r--r--", "owner2", "group2", true);
503+
setFsMockPathWithPermissions(sourceFs, "/tmp", "dr--r--r--", "owner3", "group3", true);
504+
505+
AclStatus aclStatusDest = buildAclStatusWithPermissions("user::r--,group::---,other::---", "group3", "owner3");
506+
Mockito.when(destFs.getAclStatus(any(Path.class))).thenReturn(aclStatusDest);
507+
508+
Iterator<FileSet<CopyEntity>> fileSets = new ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath, props)
509+
.getFileSetIterator(destFs, CopyConfiguration.builder(destFs, props).build());
510+
511+
Assert.assertTrue(fileSets.hasNext());
512+
FileSet<CopyEntity> fileSet = fileSets.next();
513+
514+
CommitStep setPermissionStep = ((PostPublishStep) fileSet.getFiles().get(4)).getStep();
515+
Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
516+
Map<String, OwnerAndPermission> ownerAndPermissionMap = ((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
517+
518+
// Not part of manifest and already exists in destination
519+
Assert.assertFalse(ownerAndPermissionMap.containsKey("/tmp/dataset"));
520+
Assert.assertFalse(ownerAndPermissionMap.containsKey("/tmp/dataset2"));
521+
522+
Assert.assertFalse(ownerAndPermissionMap.containsKey("/tmp/dataset2/hourly/metadata")); // Owner execute is set
523+
524+
// Path not present in target
525+
Assert.assertTrue(ownerAndPermissionMap.containsKey("/tmp/dataset/hourly/metadata"));
526+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset/hourly/metadata").getFsPermission(), FsPermission.valueOf("drw-rw-rw-"));
527+
Assert.assertTrue(ownerAndPermissionMap.containsKey("/tmp/dataset2/hourly/metadata2"));
528+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset2/hourly/metadata2").getFsPermission(), FsPermission.valueOf("dr--r--r--"));
529+
}
530+
}
531+
532+
/**
533+
* Test that directories with execute permission are NOT added to SetPermissionCommitStep
534+
* when they already exist on target.
535+
*/
536+
@Test
537+
public void testNoPermissionFixForExistingDirectoriesWithExecute() throws IOException, URISyntaxException {
538+
Path manifestPath = new Path(getClass().getClassLoader().getResource("manifestBasedDistcpTest/sampleManifestWithOnlyDirectory.json").getPath());
539+
Properties props = new Properties();
540+
props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
541+
props.setProperty("gobblin.copy.preserved.attributes", "rbugpvta");
542+
543+
try (FileSystem sourceFs = Mockito.mock(FileSystem.class);
544+
FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
545+
FileSystem destFs = Mockito.mock(FileSystem.class)) {
546+
547+
setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs, false);
548+
549+
// Directory already exists on target
550+
Mockito.when(destFs.exists(new Path("/tmp/dataset"))).thenReturn(true);
551+
Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(true);
552+
553+
Mockito.when(destFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new Path(tmpDir.toString())));
554+
555+
// Source directory has execute permission (should NOT need permission fix)
556+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset", "drwxrwxrwx", "owner1", "group1", true);
557+
setFsMockPathWithPermissions(sourceFs, "/tmp", "dr-xr-xr-x", "owner2", "group2", true);
558+
559+
Iterator<FileSet<CopyEntity>> fileSets = new ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath, props)
560+
.getFileSetIterator(destFs, CopyConfiguration.builder(destFs, props).build());
561+
562+
Assert.assertTrue(fileSets.hasNext());
563+
FileSet<CopyEntity> fileSet = fileSets.next();
564+
565+
CommitStep setPermissionStep = ((PostPublishStep) fileSet.getFiles().get(2)).getStep();
566+
Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
567+
Map<String, OwnerAndPermission> ownerAndPermissionMap = ((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
568+
569+
Assert.assertEquals(ownerAndPermissionMap.size(), 0);
570+
}
571+
}
572+
418573
private void setSourceAndDestFsMocks(FileSystem sourceFs, FileSystem destFs, Path manifestPath, FileSystem manifestReadFs, boolean setFileStatusMock) throws IOException, URISyntaxException {
419574
URI SRC_FS_URI = new URI("source", "the.source.org", "/", null);
420575
URI MANIFEST_READ_FS_URI = new URI("manifest-read", "the.manifest-source.org", "/", null);
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[
2+
{
3+
"id":"1",
4+
"fileName":"/tmp/dataset/hourly/metadata",
5+
"fileGroup":"/tmp/dataset",
6+
"fileSizeInBytes":"1024"
7+
},
8+
{
9+
"id":"2",
10+
"fileName":"/tmp/dataset2/hourly/metadata",
11+
"fileGroup":"/tmp/dataset2",
12+
"fileSizeInBytes":"1028"
13+
},
14+
{
15+
"id":"3",
16+
"fileName":"/tmp/dataset2/hourly/metadata2",
17+
"fileGroup":"/tmp/dataset2",
18+
"fileSizeInBytes":"1028"
19+
}
20+
]

0 commit comments

Comments
 (0)