Skip to content

Commit 74ebc44

Browse files
authored
Add class FragmentID. (#4607)
Add class FragmentID to validate, parse and handle components of a fragment uri. --- TYPE: NO_HISTORY DESC: Add class FragmentID.
1 parent f493b9e commit 74ebc44

31 files changed

+710
-364
lines changed

format_spec/timestamped_name.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ _Note_: The presence of `[]` is indicative of an optional parameter.
1616
| :-: | :-: | :-: |
1717
| 1 - 2 | 1.4 - 1.5 | `__uuid_t1[_t2]` |
1818
| 3 - 4 | 1.6 - 1.7 | `__t1_t2_uuid` |
19-
| 5+ | 2.0+ | `__t1_t2_uuid_[v]` |
19+
| 5+ | 2.0+ | `__t1_t2_uuid[_v]` |

test/src/unit-average-cell-size.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* The MIT License
77
*
8-
* @copyright Copyright (c) 2022 TileDB Inc.
8+
* @copyright Copyright (c) 2022-2024 TileDB Inc.
99
*
1010
* Permission is hereby granted, free of charge, to any person obtaining a copy
1111
* of this software and associated documentation files (the "Software"), to deal
@@ -38,6 +38,7 @@
3838
#include "tiledb/sm/cpp_api/tiledb"
3939
#include "tiledb/sm/cpp_api/tiledb_experimental"
4040
#include "tiledb/sm/enums/encryption_type.h"
41+
#include "tiledb/sm/fragment/fragment_identifier.h"
4142
#include "tiledb/sm/misc/constants.h"
4243
#include "tiledb/sm/misc/utils.h"
4344

@@ -166,9 +167,8 @@ struct CPPAverageCellSizeFx {
166167
REQUIRE(query.submit() == Query::Status::COMPLETE);
167168

168169
auto uri = sm::URI(query.fragment_uri(0));
169-
std::pair<uint64_t, uint64_t> timestamps;
170-
REQUIRE(sm::utils::parse::get_timestamp_range(uri, &timestamps).ok());
171-
return {uri, timestamps};
170+
sm::FragmentID fragment_id{uri};
171+
return {uri, fragment_id.timestamp_range()};
172172
}
173173

174174
/**

test/src/unit-capi-array.cc

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* The MIT License
77
*
8-
* @copyright Copyright (c) 2017-2023 TileDB Inc.
8+
* @copyright Copyright (c) 2017-2024 TileDB Inc.
99
* @copyright Copyright (c) 2016 MIT and Intel Corporation
1010
*
1111
* Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -53,10 +53,10 @@
5353
#include "tiledb/sm/cpp_api/tiledb"
5454
#include "tiledb/sm/enums/encryption_type.h"
5555
#include "tiledb/sm/enums/serialization_type.h"
56+
#include "tiledb/sm/fragment/fragment_identifier.h"
5657
#include "tiledb/sm/global_state/unit_test_config.h"
5758
#include "tiledb/sm/serialization/array.h"
5859
#include "tiledb/sm/serialization/fragments.h"
59-
#include "tiledb/storage_format/uri/parse_uri.h"
6060

6161
#include <chrono>
6262
#include <climits>
@@ -142,13 +142,10 @@ void ArrayFx::remove_temp_dir(const std::string& path) {
142142

143143
int ArrayFx::get_fragment_timestamps(const char* path, void* data) {
144144
auto data_vec = (std::vector<uint64_t>*)data;
145-
std::pair<uint64_t, uint64_t> timestamp_range;
146-
if (tiledb::sm::utils::parse::ends_with(
147-
path, tiledb::sm::constants::write_file_suffix)) {
148-
auto uri = tiledb::sm::URI(path);
149-
if (tiledb::sm::utils::parse::get_timestamp_range(uri, &timestamp_range)
150-
.ok())
151-
data_vec->push_back(timestamp_range.first);
145+
if (utils::parse::ends_with(path, constants::write_file_suffix)) {
146+
FragmentID fragment_id{path};
147+
auto timestamp_range{fragment_id.timestamp_range()};
148+
data_vec->push_back(timestamp_range.first);
152149
}
153150

154151
return 1;

tiledb/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
# The MIT License
66
#
7-
# Copyright (c) 2017-2023 TileDB, Inc.
7+
# Copyright (c) 2017-2024 TileDB, Inc.
88
# Copyright (c) 2016 MIT and Intel Corporation
99
#
1010
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -221,6 +221,7 @@ set(TILEDB_CORE_SOURCES
221221
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filter/webp_filter.cc
222222
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filter/noop_filter.cc
223223
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filter/positive_delta_filter.cc
224+
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/fragment_identifier.cc
224225
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/fragment_info.cc
225226
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/fragment_metadata.cc
226227
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/global_state/global_state.cc

tiledb/sm/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44
# The MIT License
55
#
6-
# Copyright (c) 2021 TileDB, Inc.
6+
# Copyright (c) 2021-2024 TileDB, Inc.
77
#
88
# Permission is hereby granted, free of charge, to any person obtaining a copy
99
# of this software and associated documentation files (the "Software"), to deal
@@ -35,6 +35,7 @@ add_subdirectory(consolidator)
3535
add_subdirectory(crypto)
3636
add_subdirectory(filesystem)
3737
add_subdirectory(filter)
38+
add_subdirectory(fragment)
3839
add_subdirectory(group)
3940
add_subdirectory(metadata)
4041
add_subdirectory(misc)

tiledb/sm/array/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44
# The MIT License
55
#
6-
# Copyright (c) 2022-2023 TileDB, Inc.
6+
# Copyright (c) 2022-2024 TileDB, Inc.
77
#
88
# Permission is hereby granted, free of charge, to any person obtaining a copy
99
# of this software and associated documentation files (the "Software"), to deal
@@ -35,8 +35,8 @@ commence(object_library array)
3535
this_target_object_libraries(
3636
array_schema
3737
baseline
38+
fragment
3839
generic_tile_io
39-
uri_format
4040
vfs
4141
)
4242
if(TILEDB_STATS)

tiledb/sm/array/array_directory.cc

Lines changed: 29 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* The MIT License
77
*
8-
* @copyright Copyright (c) 2017-2023 TileDB, Inc.
8+
* @copyright Copyright (c) 2017-2024 TileDB, Inc.
99
*
1010
* Permission is hereby granted, free of charge, to any person obtaining a copy
1111
* of this software and associated documentation files (the "Software"), to deal
@@ -535,28 +535,24 @@ URI ArrayDirectory::get_commits_dir(uint32_t write_version) const {
535535
}
536536

537537
URI ArrayDirectory::get_commit_uri(const URI& fragment_uri) const {
538-
auto name = fragment_uri.remove_trailing_slash().last_path_part();
539-
auto fragment_version = utils::parse::get_fragment_version(name);
540-
541-
if (fragment_version < 12) {
538+
FragmentID fragment_id{fragment_uri};
539+
if (fragment_id.array_format_version() < 12) {
542540
return URI(fragment_uri.to_string() + constants::ok_file_suffix);
543541
}
544542

545-
auto temp_uri =
546-
uri_.join_path(constants::array_commits_dir_name).join_path(name);
543+
auto temp_uri = uri_.join_path(constants::array_commits_dir_name)
544+
.join_path(fragment_id.name());
547545
return URI(temp_uri.to_string() + constants::write_file_suffix);
548546
}
549547

550548
URI ArrayDirectory::get_vacuum_uri(const URI& fragment_uri) const {
551-
auto name = fragment_uri.remove_trailing_slash().last_path_part();
552-
auto fragment_version = utils::parse::get_fragment_version(name);
553-
554-
if (fragment_version < 12) {
549+
FragmentID fragment_id{fragment_uri};
550+
if (fragment_id.array_format_version() < 12) {
555551
return URI(fragment_uri.to_string() + constants::vacuum_file_suffix);
556552
}
557553

558-
auto temp_uri =
559-
uri_.join_path(constants::array_commits_dir_name).join_path(name);
554+
auto temp_uri = uri_.join_path(constants::array_commits_dir_name)
555+
.join_path(fragment_id.name());
560556
return URI(temp_uri.to_string() + constants::vacuum_file_suffix);
561557
}
562558

@@ -678,11 +674,8 @@ ArrayDirectory::load_commits_dir_uris_v12_or_higher(
678674
stdx::string::ends_with(
679675
commits_dir_uris[i].to_string(), constants::update_file_suffix)) {
680676
// Get the start and end timestamp for this delete/update
681-
std::pair<uint64_t, uint64_t> timestamp_range;
682-
RETURN_NOT_OK_TUPLE(
683-
utils::parse::get_timestamp_range(
684-
commits_dir_uris[i], &timestamp_range),
685-
nullopt);
677+
FragmentID fragment_id{commits_dir_uris[i]};
678+
auto timestamp_range{fragment_id.timestamp_range()};
686679

687680
// Add the delete tile location if it overlaps the open start/end times
688681
if (timestamps_overlap(timestamp_range, false)) {
@@ -772,12 +765,8 @@ ArrayDirectory::load_consolidated_commit_uris(
772765
auto pos = ss.tellg();
773766

774767
// Get the start and end timestamp for this delete
775-
std::pair<uint64_t, uint64_t> delete_timestamp_range;
776-
RETURN_NOT_OK_TUPLE(
777-
utils::parse::get_timestamp_range(
778-
URI(condition_marker), &delete_timestamp_range),
779-
nullopt,
780-
nullopt);
768+
FragmentID fragment_id{URI(condition_marker)};
769+
auto delete_timestamp_range{fragment_id.timestamp_range()};
781770

782771
// Add the delete tile location if it overlaps the open start/end
783772
// times
@@ -1028,9 +1017,8 @@ ArrayDirectory::compute_uris_to_vacuum(
10281017
auto& uri = uris[i];
10291018

10301019
// Get the start and end timestamp for this fragment
1031-
std::pair<uint64_t, uint64_t> fragment_timestamp_range;
1032-
RETURN_NOT_OK(
1033-
utils::parse::get_timestamp_range(uri, &fragment_timestamp_range));
1020+
FragmentID fragment_id{uri};
1021+
auto fragment_timestamp_range{fragment_id.timestamp_range()};
10341022
if (is_vacuum_file(uri)) {
10351023
vac_file_bitmap[i] = 1;
10361024
if (timestamps_overlap(
@@ -1164,8 +1152,8 @@ ArrayDirectory::compute_filtered_uris(
11641152
}
11651153

11661154
// Get the start and end timestamp for this fragment
1167-
RETURN_NOT_OK(utils::parse::get_timestamp_range(
1168-
uri, &fragment_timestamp_ranges[i]));
1155+
FragmentID fragment_id{uri};
1156+
fragment_timestamp_ranges[i] = fragment_id.timestamp_range();
11691157
if (timestamps_overlap(
11701158
fragment_timestamp_ranges[i],
11711159
!full_overlap_only &&
@@ -1238,16 +1226,13 @@ URI ArrayDirectory::select_latest_array_schema_uri() {
12381226
uint64_t latest_ts = 0;
12391227

12401228
for (auto& uri : array_schema_uris_) {
1241-
auto name = uri.remove_trailing_slash().last_path_part();
1242-
1229+
FragmentID fragment_id{uri};
12431230
// Skip the old schema URI name since it doesn't have timestamps
1244-
if (name == constants::array_schema_filename) {
1231+
if (fragment_id.name() == constants::array_schema_filename) {
12451232
continue;
12461233
}
12471234

1248-
std::pair<uint64_t, uint64_t> ts_range;
1249-
throw_if_not_ok(utils::parse::get_timestamp_range(uri, &ts_range));
1250-
1235+
auto ts_range{fragment_id.timestamp_range()};
12511236
if (ts_range.second > latest_ts && ts_range.second <= timestamp_end_) {
12521237
latest_uri = uri;
12531238
latest_ts = ts_range.second;
@@ -1270,7 +1255,8 @@ Status ArrayDirectory::is_fragment(
12701255
const std::unordered_set<std::string>& consolidated_uris_set,
12711256
int* is_fragment) const {
12721257
// If the URI name has a suffix, then it is not a fragment
1273-
auto name = uri.remove_trailing_slash().last_path_part();
1258+
FragmentID fragment_id{uri};
1259+
auto name = fragment_id.name();
12741260
if (name.find_first_of('.') != std::string::npos) {
12751261
*is_fragment = 0;
12761262
return Status::Ok();
@@ -1299,10 +1285,9 @@ Status ArrayDirectory::is_fragment(
12991285
return Status::Ok();
13001286
}
13011287

1302-
// If the format version is >= 5, then the above suffices to check if
1288+
// If the array format version is >= 5, then the above suffices to check if
13031289
// the URI is indeed a fragment
1304-
auto fragment_version = utils::parse::get_fragment_version(name);
1305-
if (fragment_version >= 5) {
1290+
if (fragment_id.array_format_version() >= 5) {
13061291
*is_fragment = false;
13071292
return Status::Ok();
13081293
}
@@ -1317,15 +1302,12 @@ Status ArrayDirectory::is_fragment(
13171302

13181303
bool ArrayDirectory::consolidation_with_timestamps_supported(
13191304
const URI& uri) const {
1320-
// Get the fragment version from the uri
1321-
auto name = uri.remove_trailing_slash().last_path_part();
1322-
auto fragment_version = utils::parse::get_fragment_version(name);
1323-
1324-
// get_fragment_version returns UINT32_MAX for versions <= 2 so we should
1325-
// explicitly exclude this case when checking if consolidation with timestamps
1326-
// is supported on a fragment
1305+
// FragmentID::array_format_version() returns UINT32_MAX for versions <= 2
1306+
// so we should explicitly exclude this case when checking if consolidation
1307+
// with timestamps is supported on a fragment
1308+
FragmentID fragment_id{uri};
13271309
return mode_ == ArrayDirectoryMode::READ &&
1328-
fragment_version >=
1310+
fragment_id.array_format_version() >=
13291311
constants::consolidation_with_timestamps_min_version;
13301312
}
13311313

tiledb/sm/array/array_directory.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* The MIT License
77
*
8-
* @copyright Copyright (c) 2017-2023 TileDB, Inc.
8+
* @copyright Copyright (c) 2017-2024 TileDB, Inc.
99
*
1010
* Permission is hereby granted, free of charge, to any person obtaining a copy
1111
* of this software and associated documentation files (the "Software"), to deal
@@ -39,9 +39,9 @@
3939
#include "tiledb/sm/array_schema/array_schema.h"
4040
#include "tiledb/sm/filesystem/uri.h"
4141
#include "tiledb/sm/filesystem/vfs.h"
42+
#include "tiledb/sm/fragment/fragment_identifier.h"
4243
#include "tiledb/sm/stats/stats.h"
4344
#include "tiledb/sm/storage_manager/context_resources.h"
44-
#include "tiledb/storage_format/uri/parse_uri.h"
4545

4646
#include <functional>
4747
#include <unordered_map>
@@ -221,12 +221,8 @@ class ArrayDirectory {
221221
: uri_(uri)
222222
, condition_marker_(condition_marker)
223223
, offset_(offset) {
224-
std::pair<uint64_t, uint64_t> timestamps;
225-
if (!utils::parse::get_timestamp_range(URI(condition_marker), &timestamps)
226-
.ok()) {
227-
throw std::logic_error("Error parsing uri.");
228-
}
229-
224+
FragmentID fragment_id{condition_marker};
225+
auto timestamps{fragment_id.timestamp_range()};
230226
timestamp_ = timestamps.first;
231227
}
232228

tiledb/sm/array_schema/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44
# The MIT License
55
#
6-
# Copyright (c) 2021-2023 TileDB, Inc.
6+
# Copyright (c) 2021-2024 TileDB, Inc.
77
#
88
# Permission is hereby granted, free of charge, to any person obtaining a copy
99
# of this software and associated documentation files (the "Software"), to deal
@@ -71,7 +71,7 @@ conclude(object_library)
7171
commence(object_library array_schema)
7272
this_target_sources(array_schema.cc dimension_label.cc)
7373
this_target_object_libraries(
74-
attribute domain enumeration time uri_format vfs)
74+
attribute domain enumeration fragment time uri_format vfs)
7575
conclude(object_library)
7676

7777
# This is linked outside the object_library scope because ContextResources

tiledb/sm/array_schema/array_schema.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* The MIT License
77
*
8-
* @copyright Copyright (c) 2017-2023 TileDB, Inc.
8+
* @copyright Copyright (c) 2017-2024 TileDB, Inc.
99
* @copyright Copyright (c) 2016 MIT and Intel Corporation
1010
*
1111
* Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -49,12 +49,12 @@
4949
#include "tiledb/sm/enums/layout.h"
5050
#include "tiledb/sm/filter/compression_filter.h"
5151
#include "tiledb/sm/filter/webp_filter.h"
52+
#include "tiledb/sm/fragment/fragment_identifier.h"
5253
#include "tiledb/sm/misc/hilbert.h"
5354
#include "tiledb/sm/misc/integral_type_casts.h"
5455
#include "tiledb/sm/misc/tdb_time.h"
5556
#include "tiledb/sm/tile/generic_tile_io.h"
5657
#include "tiledb/storage_format/uri/generate_uri.h"
57-
#include "tiledb/storage_format/uri/parse_uri.h"
5858
#include "tiledb/type/apply_with_type.h"
5959

6060
#include <algorithm>
@@ -1386,8 +1386,8 @@ ArraySchema ArraySchema::deserialize(
13861386
}
13871387

13881388
// Populate timestamp range
1389-
std::pair<uint64_t, uint64_t> timestamp_range;
1390-
throw_if_not_ok(utils::parse::get_timestamp_range(uri, &timestamp_range));
1389+
FragmentID fragment_id{uri};
1390+
auto timestamp_range{fragment_id.timestamp_range()};
13911391

13921392
// Set schema name
13931393
std::string name = uri.last_path_part();

0 commit comments

Comments
 (0)