Skip to content

Commit 8d71c89

Browse files
authored
[GOBBLIN-2206] Revert extra execute bit getting set in ManifestDistcp (#4115)
* added check to add extra set permission step * add copy constructor for ownerandpermission * remove extra null check for checkstyle failure
1 parent be7c1a7 commit 8d71c89

File tree

5 files changed

+177
-1
lines changed

5 files changed

+177
-1
lines changed

gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.gobblin.data.management.copy;
1919

2020
import java.io.IOException;
21+
import java.util.ArrayList;
2122
import java.util.Collections;
2223
import java.util.Comparator;
2324
import java.util.HashMap;
@@ -31,6 +32,7 @@
3132
import org.apache.hadoop.fs.FileStatus;
3233
import org.apache.hadoop.fs.FileSystem;
3334
import org.apache.hadoop.fs.Path;
35+
import org.apache.hadoop.fs.permission.FsAction;
3436

3537
import com.google.common.base.Optional;
3638
import com.google.common.cache.Cache;
@@ -113,6 +115,7 @@ public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, Cop
113115
List<FileStatus> toDelete = Lists.newArrayList();
114116
// map of paths and permissions sorted by depth of path, so that permissions can be set in order
115117
Map<String, List<OwnerAndPermission>> ancestorOwnerAndPermissions = new HashMap<>();
118+
Map<String, List<OwnerAndPermission>> ancestorOwnerAndPermissionsForSetPermissionStep = new HashMap<>();
116119
TreeMap<String, OwnerAndPermission> flattenedAncestorPermissions = new TreeMap<>(
117120
Comparator.comparingInt((String o) -> o.split("/").length).thenComparing(o -> o));
118121
try {
@@ -143,12 +146,29 @@ public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, Cop
143146
copyableFile.setFsDatasets(srcFs, targetFs);
144147
copyEntities.add(copyableFile);
145148

149+
// In case of directory with 000 permission, the permission is changed to 100 due to HadoopUtils::addExecutePermissionToOwner
150+
// getting called from CopyDataPublisher::preserveFileAttrInPublisher -> FileAwareInputStreamDataWriter::setPathPermission ->
151+
// FileAwareInputStreamDataWriter::setOwnerExecuteBitIfDirectory -> HadoopUtils::addExecutePermissionToOwner
152+
// We need to revert this extra permission change in setPermissionStep
153+
if (srcFile.isDirectory() && !srcFile.getPermission().getUserAction().implies(FsAction.EXECUTE)
154+
&& !ancestorOwnerAndPermissionsForSetPermissionStep.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fileToCopy).toString())
155+
&& !targetFs.exists(fileToCopy)) {
156+
List<OwnerAndPermission> ancestorsOwnerAndPermissionUpdated = new ArrayList<>();
157+
OwnerAndPermission srcFileOwnerPermissionReplicatedForDest = new OwnerAndPermission(copyableFile.getDestinationOwnerAndPermission());
158+
ancestorsOwnerAndPermissionUpdated.add(srcFileOwnerPermissionReplicatedForDest);
159+
copyableFile.getAncestorsOwnerAndPermission().forEach(ancestorOwnPerm -> ancestorsOwnerAndPermissionUpdated.add(new OwnerAndPermission(ancestorOwnPerm)));
160+
ancestorOwnerAndPermissionsForSetPermissionStep.put(fileToCopy.toString(), ancestorsOwnerAndPermissionUpdated);
161+
}
162+
146163
// Always grab the parent since the above permission setting should be setting the permission for a folder itself
147164
// {@link CopyDataPublisher#preserveFileAttrInPublisher} will be setting the permission for the empty child dir
148165
Path fromPath = fileToCopy.getParent();
149166
// Avoid duplicate calculation for the same ancestor
150167
if (fromPath != null && !ancestorOwnerAndPermissions.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString()) && !targetFs.exists(fromPath)) {
151168
ancestorOwnerAndPermissions.put(fromPath.toString(), copyableFile.getAncestorsOwnerAndPermission());
169+
if (!ancestorOwnerAndPermissionsForSetPermissionStep.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString())) {
170+
ancestorOwnerAndPermissionsForSetPermissionStep.put(fromPath.toString(), copyableFile.getAncestorsOwnerAndPermission());
171+
}
152172
}
153173

154174
if (existOnTarget && srcFile.isFile()) {
@@ -167,7 +187,7 @@ public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, Cop
167187
copyEntities.add(new PrePublishStep(datasetURN(), Maps.newHashMap(), createDirectoryWithPermissionsCommitStep, 1));
168188

169189
if (this.enableSetPermissionPostPublish) {
170-
for (Map.Entry<String, List<OwnerAndPermission>> recursiveParentPermissions : ancestorOwnerAndPermissions.entrySet()) {
190+
for (Map.Entry<String, List<OwnerAndPermission>> recursiveParentPermissions : ancestorOwnerAndPermissionsForSetPermissionStep.entrySet()) {
171191
Path currentPath = new Path(recursiveParentPermissions.getKey());
172192
for (OwnerAndPermission ownerAndPermission : recursiveParentPermissions.getValue()) {
173193
// Ignore folders that already exist in destination, we assume that the publisher will re-sync those permissions if needed and

gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.apache.hadoop.fs.permission.FsPermission;
3737
import org.mockito.Mockito;
3838
import org.testng.Assert;
39+
import org.testng.annotations.DataProvider;
3940
import org.testng.annotations.Test;
4041

4142
import com.google.common.io.Files;
@@ -343,6 +344,77 @@ public void testDisableSetPermissionStep() throws Exception {
343344
Assert.assertEquals(fileSet.getFiles().size(), 2); // 1 files to copy + 1 pre publish step
344345
}
345346

347+
@DataProvider
348+
public Object[][] dirPermissionWithExpectedSetPermissionStepCount() {
349+
return new Object[][] {
350+
{"d---------", 2},
351+
{"drw-rw--w-", 2},
352+
{"d-w-r----x", 2},
353+
{"drwxrwxrwx", 1},
354+
{"dr-xr-xr-x", 1},
355+
{"d--x------", 1},
356+
};
357+
}
358+
359+
@Test(dataProvider = "dirPermissionWithExpectedSetPermissionStepCount")
360+
public void testSetPermissionWhenCopyingDirectoryWithOwnerExecutePermissionSetUnset(String dirPermission, int expectedCount) throws IOException, URISyntaxException {
361+
Path manifestPath = new Path(getClass().getClassLoader().getResource("manifestBasedDistcpTest/sampleManifestWithOnlyDirectory.json").getPath());
362+
Properties props = new Properties();
363+
props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
364+
props.setProperty("gobblin.copy.preserved.attributes", "rbugpvta");
365+
try (FileSystem sourceFs = Mockito.mock(FileSystem.class);
366+
FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
367+
FileSystem destFs = Mockito.mock(FileSystem.class)) {
368+
369+
setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs, false);
370+
371+
Mockito.when(destFs.exists(new Path("/tmp/dataset"))).thenReturn(false);
372+
Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(false);
373+
374+
Mockito.when(destFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new Path(tmpDir.toString())));
375+
376+
setFsMockPathWithPermissions(sourceFs, "/tmp/dataset", dirPermission, "owner1", "group1", true);
377+
setFsMockPathWithPermissions(sourceFs, "/tmp", "dr--r--r--", "owner2", "group2", true);
378+
379+
Iterator<FileSet<CopyEntity>> fileSets = new ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath, props).getFileSetIterator(destFs,
380+
CopyConfiguration.builder(destFs, props).build());
381+
382+
Assert.assertTrue(fileSets.hasNext());
383+
FileSet<CopyEntity> fileSet = fileSets.next();
384+
// 1 dir to copy + 1 pre publish step + 1 post publish step
385+
Assert.assertEquals(fileSet.getFiles().size(),3);
386+
387+
CommitStep createDirectoryStep = ((PrePublishStep) fileSet.getFiles().get(1)).getStep();
388+
Assert.assertTrue(createDirectoryStep instanceof CreateDirectoryWithPermissionsCommitStep);
389+
Map<String, List<OwnerAndPermission>> pathAndPermissions = ((CreateDirectoryWithPermissionsCommitStep) createDirectoryStep).getPathAndPermissions();
390+
Assert.assertEquals(pathAndPermissions.size(), 1);
391+
392+
Assert.assertTrue(pathAndPermissions.containsKey("/tmp"));
393+
Assert.assertEquals(pathAndPermissions.get("/tmp").size(), 1);
394+
Assert.assertEquals(pathAndPermissions.get("/tmp").get(0).getFsPermission(), FsPermission.valueOf("dr--r--r--"));
395+
Assert.assertEquals(pathAndPermissions.get("/tmp").get(0).getOwner(), "owner2");
396+
Assert.assertEquals(pathAndPermissions.get("/tmp").get(0).getGroup(), "group2");
397+
398+
CommitStep setPermissionStep = ((PostPublishStep) fileSet.getFiles().get(2)).getStep();
399+
Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
400+
Map<String, OwnerAndPermission> ownerAndPermissionMap = ((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
401+
Assert.assertEquals(ownerAndPermissionMap.size(), expectedCount);
402+
403+
List<String> sortedMapKeys = new ArrayList<>(ownerAndPermissionMap.keySet());
404+
Assert.assertEquals(sortedMapKeys.get(0), "/tmp");
405+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp").getFsPermission(), FsPermission.valueOf("dr--r--r--"));
406+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp").getOwner(), "owner2");
407+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp").getGroup(), "group2");
408+
409+
if (expectedCount > 1) {
410+
Assert.assertEquals(sortedMapKeys.get(1), "/tmp/dataset");
411+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getFsPermission(), FsPermission.valueOf(dirPermission));
412+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getOwner(), "owner1");
413+
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getGroup(), "group1");
414+
}
415+
}
416+
}
417+
346418
private void setSourceAndDestFsMocks(FileSystem sourceFs, FileSystem destFs, Path manifestPath, FileSystem manifestReadFs, boolean setFileStatusMock) throws IOException, URISyntaxException {
347419
URI SRC_FS_URI = new URI("source", "the.source.org", "/", null);
348420
URI MANIFEST_READ_FS_URI = new URI("manifest-read", "the.manifest-source.org", "/", null);
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[
2+
{
3+
"id":"1",
4+
"fileName":"/tmp/dataset",
5+
"fileGroup":"/tmp/dataset",
6+
"fileSizeInBytes":"1024"
7+
}
8+
]

gobblin-utility/src/main/java/org/apache/gobblin/util/filesystem/OwnerAndPermission.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,22 @@ public OwnerAndPermission(String owner, String group, FsPermission fsPermission)
5454
this(owner, group, fsPermission, Lists.newArrayList());
5555
}
5656

57+
58+
/**
59+
* Copy constructor for {@link OwnerAndPermission}.
60+
* <p>
61+
* Creates a new instance by copying the values from another {@link OwnerAndPermission} object.
62+
* Performs a deep copy of {@link FsPermission} and a shallow copy of the {@code aclEntries} list,
63+
* since {@link org.apache.hadoop.fs.permission.AclEntry} is immutable.
64+
* </p>
65+
*
66+
* @param other the {@code OwnerAndPermission} instance to copy from.
67+
*/
68+
public OwnerAndPermission(OwnerAndPermission other) {
69+
this(other.owner, other.group, new FsPermission(other.fsPermission),
70+
other.aclEntries == null ? Lists.newArrayList() : Lists.newArrayList(other.aclEntries));
71+
}
72+
5773
@Override
5874
public void write(DataOutput dataOutput) throws IOException {
5975
Text.writeString(dataOutput, this.owner);
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.gobblin.util.filesystem;
19+
20+
import java.util.Collections;
21+
import java.util.List;
22+
23+
import org.apache.hadoop.fs.permission.AclEntry;
24+
import org.apache.hadoop.fs.permission.AclEntryScope;
25+
import org.apache.hadoop.fs.permission.AclEntryType;
26+
import org.apache.hadoop.fs.permission.FsAction;
27+
import org.apache.hadoop.fs.permission.FsPermission;
28+
import org.testng.Assert;
29+
import org.testng.annotations.Test;
30+
31+
32+
/** Tests for {@link OwnerAndPermission}*/
33+
public class OwnerAndPermissionTest {
34+
35+
@Test
36+
public void testOwnerAndPermissionCopyConstructor() {
37+
String owner = "testOwner";
38+
String group = "testGroup";
39+
FsPermission permission = new FsPermission((short) 0755);
40+
AclEntry aclEntry = new AclEntry.Builder()
41+
.setPermission(FsAction.READ_WRITE)
42+
.setName("test-acl")
43+
.setScope(AclEntryScope.DEFAULT)
44+
.setType(AclEntryType.GROUP)
45+
.build();
46+
List<AclEntry> aclEntries = Collections.singletonList(aclEntry);
47+
48+
OwnerAndPermission ownerAndPermission = new OwnerAndPermission(owner, group, permission, aclEntries);
49+
OwnerAndPermission copyOwnerAndPermission = new OwnerAndPermission(ownerAndPermission);
50+
51+
Assert.assertEquals(copyOwnerAndPermission.getOwner(), owner);
52+
Assert.assertEquals(copyOwnerAndPermission.getGroup(), group);
53+
Assert.assertEquals(copyOwnerAndPermission.getFsPermission(), permission);
54+
Assert.assertEquals(copyOwnerAndPermission.getAclEntries(), aclEntries);
55+
56+
Assert.assertNotSame(ownerAndPermission, copyOwnerAndPermission);
57+
Assert.assertNotSame(ownerAndPermission.getFsPermission(), copyOwnerAndPermission.getFsPermission());
58+
Assert.assertNotSame(ownerAndPermission.getAclEntries(), copyOwnerAndPermission.getAclEntries());
59+
}
60+
}

0 commit comments

Comments
 (0)