Skip to content

Commit 855fa94

Browse files
authored
Fix read issue on concurrent open after truncate (#1781)
Fix race condition when 2 concurrent open requests occur after a truncate and resulting in errors on read. The issue is caused by a concurrent operation which ignores that the inode is still completing an upload and tries to refresh its state by performing a remote lookup to the bucket. By the time the remote lookup completes, its result may be stale but still be used to overwrite the result of the upload. This fix adds a check for a pending upload instead of only relying on the `write_status` field. ### Does this change impact existing behavior? No. ### Does this change need a changelog entry? Does it require a version change? Added changelog entries for a patch release. --- By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license and I agree to the terms of the [Developer Certificate of Origin (DCO)](https://developercertificate.org/). --------- Signed-off-by: Alessandro Passaro <alexpax@amazon.co.uk>
1 parent 90bb0b4 commit 855fa94

File tree

4 files changed

+100
-16
lines changed

4 files changed

+100
-16
lines changed

mountpoint-s3-fs/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
## Unreleased (v0.9.1)
22

3+
* Fix a race condition where concurrent operations after closing a truncated file could result in I/O errors on subsequent reads. The issue was introduced in `mountpoint-s3-fs` v0.9.0.
4+
([#1781](https://github.com/awslabs/mountpoint-s3/pull/1781))
35
* Upgrade cargo dependencies.
46
* Fix incorrect validation of default data cache limit which would cause Mountpoint to preserve less than 5% of available space ([#1779](https://github.com/awslabs/mountpoint-s3/pull/1779))
57

mountpoint-s3-fs/src/superblock.rs

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,11 @@ impl<OC: ObjectClient + Send + Sync> Superblock<OC> {
280280

281281
{
282282
let mut sync = inode.get_mut_inode_state()?;
283-
let is_remote = sync.write_status == WriteStatus::Remote;
283+
let has_local_state = sync.write_status != WriteStatus::Remote || sync.pending_upload_hook.is_some();
284284

285-
if !is_remote || !force_revalidate_if_remote {
286-
// If the inode is local (open/unopened), extend its stat's validity before returning
287-
if !is_remote {
285+
if has_local_state || !force_revalidate_if_remote {
286+
// If the inode is local (open/unopened) and/or has a pending upload, extend its stat's validity before returning
287+
if has_local_state {
288288
let validity = match inode.kind() {
289289
InodeKind::File => self.inner.config.cache_config.file_ttl,
290290
InodeKind::Directory => self.inner.config.cache_config.dir_ttl,
@@ -848,9 +848,9 @@ impl<OC: ObjectClient + Send + Sync + Clone> Metablock for Superblock<OC> {
848848
));
849849
}
850850

851+
let inode = looked_up_inode.inode;
851852
let (pending_upload_hook, inode_lookup) = {
852-
let inode = looked_up_inode.inode.clone();
853-
let mut locked_inode = looked_up_inode.inode.get_mut_inode_state()?;
853+
let mut locked_inode = inode.get_mut_inode_state()?;
854854

855855
let pending_upload_hook = match mode {
856856
ReadWriteMode::Read => self.start_reading(&mut locked_inode, inode.clone(), fh)?,
@@ -870,10 +870,19 @@ impl<OC: ObjectClient + Send + Sync + Clone> Metablock for Superblock<OC> {
870870
(pending_upload_hook, inode_lookup)
871871
};
872872

873-
let lookup = if let Some(upload_hook) = pending_upload_hook
874-
&& let Some(lookup_after_upload) = upload_hook.wait_for_completion().await?
875-
{
876-
lookup_after_upload
873+
let lookup = if let Some(upload_hook) = pending_upload_hook {
874+
if let Some(lookup_after_upload) = upload_hook.wait_for_completion().await? {
875+
lookup_after_upload
876+
} else {
877+
// Return up-to-date lookup.
878+
let locked_inode = inode.get_inode_state()?;
879+
Lookup::new(
880+
inode.ino(),
881+
locked_inode.stat.clone(),
882+
inode.kind(),
883+
Some(S3Location::new(self.inner.s3_path.clone(), inode.valid_key().clone())),
884+
)
885+
}
877886
} else {
878887
inode_lookup
879888
};
@@ -1670,8 +1679,10 @@ impl<OC: ObjectClient + Send + Sync> SuperblockInner<OC> {
16701679
(None, None) => Err(InodeError::FileDoesNotExist(name.to_owned(), parent.err())),
16711680
(Some(remote), Some(existing_inode)) => {
16721681
let mut existing_state = existing_inode.get_mut_inode_state()?;
1682+
let existing_has_local_state =
1683+
existing_state.write_status != WriteStatus::Remote || existing_state.pending_upload_hook.is_some();
16731684
if remote.kind == existing_inode.kind()
1674-
&& existing_state.write_status == WriteStatus::Remote
1685+
&& !existing_has_local_state
16751686
&& existing_state.stat.etag == remote.stat.etag
16761687
{
16771688
trace!(parent=?existing_inode.parent(), name=?existing_inode.name(), ino=?existing_inode.ino(), "updating inode in place");
@@ -1758,11 +1769,12 @@ impl<OC: ObjectClient + Send + Sync> SuperblockInner<OC> {
17581769
// being consistent with our stated semantics about implicit directories and how
17591770
// directories shadow files.
17601771
let mut existing_state = existing_inode.get_mut_inode_state()?;
1761-
let existing_is_remote = existing_state.write_status == WriteStatus::Remote;
1772+
let existing_has_local_state =
1773+
existing_state.write_status != WriteStatus::Remote || existing_state.pending_upload_hook.is_some();
17621774

17631775
// Remote files are always shadowed by existing local files/directories, so do
17641776
// nothing and return the existing inode.
1765-
if remote.kind == InodeKind::File && !existing_is_remote {
1777+
if remote.kind == InodeKind::File && existing_has_local_state {
17661778
return Ok(LookedUpInode {
17671779
inode: existing_inode.clone(),
17681780
stat: existing_state.stat.clone(),
@@ -1776,10 +1788,10 @@ impl<OC: ObjectClient + Send + Sync> SuperblockInner<OC> {
17761788
// updating the parent.
17771789
let same_kind = remote.kind == existing_inode.kind();
17781790
let same_etag = existing_state.stat.etag == remote.stat.etag;
1779-
if same_kind && same_etag && (existing_is_remote || remote.kind == InodeKind::Directory) {
1791+
if same_kind && same_etag && (!existing_has_local_state || remote.kind == InodeKind::Directory) {
17801792
trace!(parent=?existing_inode.parent(), name=?existing_inode.name(), ino=?existing_inode.ino(), "updating inode in place (slow path)");
17811793
existing_state.stat = remote.stat.clone();
1782-
if remote.kind == InodeKind::Directory && !existing_is_remote {
1794+
if remote.kind == InodeKind::Directory && existing_has_local_state {
17831795
trace!(parent=?existing_inode.parent(), name=?existing_inode.name(), ino=?existing_inode.ino(), "local directory has become remote");
17841796
existing_state.write_status = WriteStatus::Remote;
17851797
let InodeKindData::Directory { writing_children, .. } = &mut parent_state.kind_data else {
@@ -1799,7 +1811,7 @@ impl<OC: ObjectClient + Send + Sync> SuperblockInner<OC> {
17991811
ino=?existing_inode.ino(),
18001812
same_kind,
18011813
same_etag,
1802-
existing_is_remote,
1814+
existing_has_local_state,
18031815
remote_is_dir = remote.kind == InodeKind::Directory,
18041816
"inode could not be updated in place",
18051817
);

mountpoint-s3-fs/tests/fuse_tests/write_test.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,74 @@ fn overwrite_after_read_test_mock(prefix: &str) {
11741174
overwrite_after_read_test(fuse::mock_session::new, prefix);
11751175
}
11761176

1177+
fn concurrent_open_after_overwrite_truncate_test(
1178+
creator_fn: impl TestSessionCreator,
1179+
prefix: &str,
1180+
rw_mode: ReadWriteMode,
1181+
) {
1182+
let filesystem_config = S3FilesystemConfig {
1183+
allow_overwrite: true,
1184+
..Default::default()
1185+
};
1186+
let test_config = TestSessionConfig {
1187+
filesystem_config,
1188+
..Default::default()
1189+
};
1190+
let test_session = creator_fn(prefix, test_config);
1191+
1192+
// Make sure there's an existing directory and a file
1193+
test_session
1194+
.client()
1195+
.put_object("dir/hello.txt", b"hello world")
1196+
.unwrap();
1197+
1198+
let _subdir = test_session.mount_path().join("dir");
1199+
let path = test_session.mount_path().join("dir/hello.txt");
1200+
1201+
// File should be empty when opened with O_TRUNC even without any write
1202+
let write_fh = File::options()
1203+
.rw_mode(rw_mode)
1204+
.truncate(true)
1205+
.open(&path)
1206+
.expect("open should succeed");
1207+
drop(write_fh);
1208+
1209+
fn check_empty(path: impl AsRef<Path>) {
1210+
let mut read_fh = File::options().read(true).open(&path).unwrap();
1211+
let mut hello_contents = String::new();
1212+
read_fh.read_to_string(&mut hello_contents).unwrap();
1213+
assert!(hello_contents.is_empty());
1214+
}
1215+
1216+
std::thread::scope(|s| {
1217+
s.spawn(|| {
1218+
check_empty(&path);
1219+
});
1220+
s.spawn(|| {
1221+
check_empty(&path);
1222+
});
1223+
});
1224+
}
1225+
1226+
#[cfg(feature = "s3_tests")]
1227+
#[test_matrix([WRITE_ONLY, READ_WRITE])]
1228+
fn concurrent_open_after_overwrite_truncate_test_s3(rw_mode: ReadWriteMode) {
1229+
concurrent_open_after_overwrite_truncate_test(
1230+
fuse::s3_session::new,
1231+
"concurrent_open_after_overwrite_truncate_test",
1232+
rw_mode,
1233+
);
1234+
}
1235+
1236+
#[test_matrix([WRITE_ONLY, READ_WRITE])]
1237+
fn concurrent_open_after_overwrite_truncate_test_mock(rw_mode: ReadWriteMode) {
1238+
concurrent_open_after_overwrite_truncate_test(
1239+
fuse::mock_session::new,
1240+
"concurrent_open_after_overwrite_truncate_test",
1241+
rw_mode,
1242+
);
1243+
}
1244+
11771245
fn write_handle_no_update_existing_empty_file(
11781246
creator_fn: impl TestSessionCreator,
11791247
prefix: &str,

mountpoint-s3/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
## Unreleased (v1.22.1)
22

3+
* Fix a race condition where concurrent operations after closing a truncated file could result in I/O errors on subsequent reads. The issue was introduced in v1.22.0.
4+
([#1781](https://github.com/awslabs/mountpoint-s3/pull/1781))
35
* Upgrade cargo dependencies.
46
* Fix incorrect validation of default data cache limit which would cause Mountpoint to preserve less than 5% of available space ([#1779](https://github.com/awslabs/mountpoint-s3/pull/1779))
57

0 commit comments

Comments
 (0)