Skip to content

Commit f4476d5

Browse files
Fix opening arrays with unrecognized files in their root directory et al. (#5474)
[SC-64121](https://app.shortcut.com/tiledb-inc/story/64121/cannot-open-array-with-unrecognized-files-in-its-root-directory) This PR updates `ArrayDirectory::is_fragment` to return a negative result if the file's name is not a fragment identifier, instead of throwing and failing the open. I also did a small clean-up of the `FragmentID` class. --- TYPE: BUG DESC: Fixed opening arrays with unrecognized files in their root directory.
1 parent 580ac6a commit f4476d5

File tree

6 files changed

+99
-10
lines changed

6 files changed

+99
-10
lines changed

test/regression/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ if (TILEDB_CPP_API)
5757
list(APPEND SOURCES targets/sc-53791.cc)
5858
list(APPEND SOURCES targets/sc-53970.cc)
5959
list(APPEND SOURCES targets/sc-54473.cc)
60+
list(APPEND SOURCES targets/sc-64121.cc)
6061
endif()
6162

6263
add_executable(tiledb_regression
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/**
2+
* @file sc-64121.cc
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2025 TileDB, Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*/
28+
29+
#include <stdio.h>
30+
#include <tiledb/tiledb>
31+
32+
#include <test/support/tdb_catch.h>
33+
34+
using namespace tiledb;
35+
36+
TEST_CASE(
37+
"SC-64121 open array with unrecognized files",
38+
"[regression][bug][sc-64121]") {
39+
Context ctx;
40+
41+
std::string uri("sc-64121");
42+
43+
ArraySchema schema(ctx, TILEDB_SPARSE);
44+
Domain domain(ctx);
45+
domain.add_dimension(Dimension::create<uint64_t>(ctx, "x", {{1, 100}}, 10));
46+
schema.set_domain(domain);
47+
schema.add_attribute(Attribute(ctx, "a", TILEDB_UINT64));
48+
Array::create(uri, schema);
49+
50+
VFS vfs(ctx);
51+
vfs.touch(uri + "/data.bin");
52+
53+
REQUIRE_NOTHROW(Array(ctx, uri, TILEDB_READ));
54+
}

tiledb/sm/array/array_directory.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1258,9 +1258,15 @@ Status ArrayDirectory::is_fragment(
12581258
const std::unordered_set<std::string>& ok_uris_set,
12591259
const std::unordered_set<std::string>& consolidated_uris_set,
12601260
int* is_fragment) const {
1261-
// If the URI name has a suffix, then it is not a fragment
1261+
// If the fragment ID does not have a name, ignore it.
1262+
if (!FragmentID::has_fragment_name(uri)) {
1263+
*is_fragment = 0;
1264+
return Status::Ok();
1265+
}
1266+
12621267
FragmentID fragment_id{uri};
12631268
auto name = fragment_id.name();
1269+
// If the URI name has a suffix, then it is not a fragment
12641270
if (name.find_first_of('.') != std::string::npos) {
12651271
*is_fragment = 0;
12661272
return Status::Ok();

tiledb/sm/fragment/fragment_identifier.cc

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ class InvalidURIException : public FragmentIDException {
5454
};
5555

5656
FragmentID::FragmentID(const URI& uri)
57-
: URI(uri)
58-
, name_(uri.remove_trailing_slash().last_path_part())
57+
: name_(uri.remove_trailing_slash().last_path_part())
5958
, timestamp_range_({0, 0})
6059
, name_version_(FragmentNameVersion::ONE) {
6160
// Ensure input uri is valid (non-empty)
@@ -88,19 +87,18 @@ FragmentID::FragmentID(const URI& uri)
8887
array_format_version_ = 2;
8988
sscanf(
9089
array_format_version_str.c_str(),
91-
(std::string("%") + std::string(PRId64)).c_str(),
92-
(long long int*)&timestamp_range_.first);
90+
"%" PRId64,
91+
(int64_t*)&timestamp_range_.first);
9392
timestamp_range_.second = timestamp_range_.first;
9493
} else {
9594
if (name_version_ == FragmentNameVersion::TWO) {
9695
array_format_version_ = 4;
9796
}
9897
sscanf(
9998
name_.c_str(),
100-
(std::string("__%") + std::string(PRId64) + "_%" + std::string(PRId64))
101-
.c_str(),
102-
(long long int*)&timestamp_range_.first,
103-
(long long int*)&timestamp_range_.second);
99+
"__%" PRId64 "_%" PRId64,
100+
(int64_t*)&timestamp_range_.first,
101+
(int64_t*)&timestamp_range_.second);
104102
}
105103
if (timestamp_range_.first > timestamp_range_.second) {
106104
throw FragmentIDException(
@@ -109,6 +107,19 @@ FragmentID::FragmentID(const URI& uri)
109107
}
110108
}
111109

110+
bool FragmentID::has_fragment_name(const URI& uri) {
111+
if (uri.empty()) {
112+
throw InvalidURIException("URI may not be empty.");
113+
}
114+
115+
auto name = uri.remove_trailing_slash().last_path_part();
116+
auto pos = name.find_last_of('.');
117+
if (pos != std::string::npos) {
118+
name = name.substr(0, pos);
119+
}
120+
return name.find_last_of('_') != std::string::npos;
121+
}
122+
112123
FragmentID::FragmentID(const std::string_view& path)
113124
: FragmentID(URI(path)) {
114125
}

tiledb/sm/fragment/fragment_identifier.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ enum class FragmentNameVersion { ONE, TWO, THREE };
7070
* Trailing "_".
7171
* Otherwise potentially-malformed names, which may not begin with "__".
7272
*/
73-
class FragmentID : private URI {
73+
class FragmentID {
7474
private:
7575
/**
7676
* Whitebox testing class provides additional accessors to components of the
@@ -99,6 +99,11 @@ class FragmentID : private URI {
9999
/** Destructor. */
100100
~FragmentID() = default;
101101

102+
/**
103+
* Returns whether a URI contains a fragment name.
104+
*/
105+
static bool has_fragment_name(const URI& uri);
106+
102107
/** Accessor to the fragment name. */
103108
inline const std::string& name() const {
104109
return name_;

tiledb/sm/fragment/test/unit_fragment_identifier.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,3 +268,15 @@ TEST_CASE("FragmentID: Valid uris", "[fragment_id][valid_uri]") {
268268
}
269269
}
270270
}
271+
272+
TEST_CASE("FragmentID: has_fragment_name", "[fragment_id][has_fragment_name]") {
273+
CHECK(
274+
FragmentID::has_fragment_name(URI("__0123456789ABCDEF0123456789ABCDEF")));
275+
CHECK_THROWS(FragmentID::has_fragment_name(URI("")));
276+
CHECK_FALSE(
277+
FragmentID::has_fragment_name(URI("0123456789ABCDEF0123456789ABCDEF")));
278+
CHECK_FALSE(FragmentID::has_fragment_name(
279+
URI("__directory/0123456789ABCDEF0123456789ABCDEF")));
280+
CHECK_FALSE(FragmentID::has_fragment_name(
281+
URI("0123456789ABCDEF0123456789ABCDEF.__ext")));
282+
}

0 commit comments

Comments
 (0)