Skip to content

Commit 1461a52

Browse files
laramielcopybara-github
authored andcommitted
Update files to better handle network shares.
In some of the path processing, adopt the root-name concept from `std::filesystem::path`; this should better detect absolute vs. relative urls in windows as well as network shares. Translate network shares to the file://host/path syntax for uris. Fixes: #250 PiperOrigin-RevId: 814286407 Change-Id: I0d84be09fa85f82ed170a60166b05333ddda82a0
1 parent c847675 commit 1461a52

File tree

22 files changed

+546
-176
lines changed

22 files changed

+546
-176
lines changed

tensorstore/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,7 @@ tensorstore_cc_test(
755755
"//tensorstore/internal/json_binding:bindable",
756756
"//tensorstore/internal/json_binding:gtest",
757757
"//tensorstore/internal/testing:json_gtest",
758+
"//tensorstore/internal/testing:on_windows",
758759
"//tensorstore/kvstore",
759760
"//tensorstore/kvstore/file",
760761
"//tensorstore/kvstore/memory",

tensorstore/driver/auto/driver_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ auto MakeInitial(const Context &context) {
5555

5656
template <typename... Args>
5757
std::string AsFileUri(std::string_view path, Args... args) {
58-
return absl::StrCat("file://", tensorstore::internal::OsPathToUriPath(path),
58+
return absl::StrCat(tensorstore::internal::OsPathToFileUri(path).value(),
5959
std::forward<Args>(args)...);
6060
}
6161

tensorstore/driver/driver_testutil.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,13 @@ void TestTensorStoreDriverSpecRoundtrip(
173173
ReplaceStringInJson(options.full_base_spec, tempdir_key, tempdir->path());
174174

175175
// For the URL, the tempdir path must begin with a leading slash.
176+
// Generate the replacement url and strip off the "file://" prefix.
177+
static constexpr size_t kFileUriPrefixLen =
178+
std::string_view("file://").size();
179+
auto file_uri = internal::OsPathToFileUri(tempdir->path());
180+
TENSORSTORE_ASSERT_OK(file_uri.status());
176181
options.url = absl::StrReplaceAll(
177-
options.url,
178-
{{tempdir_key, internal::OsPathToUriPath(tempdir->path())}});
182+
options.url, {{tempdir_key, file_uri->substr(kFileUriPrefixLen)}});
179183
}
180184
Transaction transaction(mode);
181185
auto context = Context::Default();

tensorstore/driver/zarr/BUILD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ tensorstore_cc_test(
110110
":compressor",
111111
":zlib_compressor",
112112
"//tensorstore/internal/testing:json_gtest",
113-
"//tensorstore/util:status",
114113
"//tensorstore/util:status_testutil",
114+
"@abseil-cpp//absl/status",
115115
"@googletest//:gtest_main",
116116
"@nlohmann_json//:json",
117117
],

tensorstore/driver/zarr/compressor_test.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "absl/status/status.h"
2020
#include <nlohmann/json.hpp>
2121
#include "tensorstore/internal/testing/json_gtest.h"
22-
#include "tensorstore/util/status.h"
2322
#include "tensorstore/util/status_testutil.h"
2423

2524
namespace {

tensorstore/internal/BUILD

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,6 +1367,7 @@ tensorstore_cc_library(
13671367
srcs = ["path.cc"],
13681368
hdrs = ["path.h"],
13691369
deps = [
1370+
":ascii_set",
13701371
"@abseil-cpp//absl/strings",
13711372
],
13721373
)
@@ -1377,6 +1378,7 @@ tensorstore_cc_test(
13771378
srcs = ["path_test.cc"],
13781379
deps = [
13791380
":path",
1381+
"//tensorstore/internal/testing:on_windows",
13801382
"@googletest//:gtest_main",
13811383
],
13821384
)
@@ -1613,6 +1615,10 @@ tensorstore_cc_library(
16131615
hdrs = ["uri_utils.h"],
16141616
deps = [
16151617
":ascii_set",
1618+
":path",
1619+
"//tensorstore/util:quote_string",
1620+
"//tensorstore/util:result",
1621+
"//tensorstore/util:status",
16161622
"@abseil-cpp//absl/status",
16171623
"@abseil-cpp//absl/strings",
16181624
],
@@ -1624,6 +1630,9 @@ tensorstore_cc_test(
16241630
deps = [
16251631
":ascii_set",
16261632
":uri_utils",
1633+
"//tensorstore/internal/testing:on_windows",
1634+
"//tensorstore/util:status_testutil",
1635+
"@abseil-cpp//absl/status",
16271636
"@googletest//:gtest_main",
16281637
],
16291638
)

tensorstore/internal/path.cc

Lines changed: 64 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,29 @@
2121
#include <string_view>
2222
#include <utility>
2323

24+
#include "absl/strings/ascii.h" // IWYU pragma: keep
25+
#include "absl/strings/match.h" // IWYU pragma: keep
2426
#include "absl/strings/str_cat.h"
2527
#include "absl/strings/string_view.h"
28+
#include "tensorstore/internal/ascii_set.h" // IWYU pragma: keep
2629

30+
namespace tensorstore {
2731
namespace {
2832

2933
#ifdef _WIN32
3034
constexpr inline bool IsDirSeparator(char c) { return c == '\\' || c == '/'; }
35+
36+
bool IsWindowsDriveLetter(std::string_view path) {
37+
return path.length() >= 2 && path[1] == ':' && absl::ascii_isalpha(path[0]);
38+
}
39+
40+
static constexpr internal::AsciiSet kIllegalUNCCharacters{"<>:/\\\"|'?*"};
41+
3142
#else
3243
constexpr inline bool IsDirSeparator(char c) { return c == '/'; }
3344
#endif
3445

3546
} // namespace
36-
namespace tensorstore {
3747
namespace internal_path {
3848

3949
std::string JoinPathImpl(std::initializer_list<std::string_view> paths) {
@@ -56,20 +66,22 @@ namespace internal {
5666
// Splits a path into the pair {dirname, basename}
5767
std::pair<std::string_view, std::string_view> PathDirnameBasename(
5868
std::string_view path) {
69+
// Find the root directory.
70+
auto root_dir = PathRootName(path);
71+
if (root_dir.size() < path.size() && IsDirSeparator(path[root_dir.size()])) {
72+
root_dir = path.substr(0, root_dir.size() + 1);
73+
}
74+
5975
size_t pos = path.size();
60-
while (pos != 0 && !IsDirSeparator(path[pos - 1])) {
76+
while (pos > root_dir.size() && !IsDirSeparator(path[pos - 1])) {
6177
--pos;
6278
}
6379
size_t basename = pos;
64-
--pos;
65-
if (pos == std::string_view::npos) {
66-
return {"", path};
67-
}
68-
while (pos != 0 && IsDirSeparator(path[pos - 1])) {
80+
if (pos > root_dir.size()) {
6981
--pos;
70-
}
71-
if (pos == 0) {
72-
return {"/", path.substr(basename)};
82+
while (pos > root_dir.size() && IsDirSeparator(path[pos - 1])) {
83+
--pos;
84+
}
7385
}
7486
return {path.substr(0, pos), path.substr(basename)};
7587
}
@@ -99,20 +111,50 @@ void AppendPathComponent(std::string& path, std::string_view component) {
99111
}
100112
}
101113

114+
std::string_view PathRootName(std::string_view path) {
115+
#ifdef _WIN32
116+
if (path.empty()) return {};
117+
if (IsWindowsDriveLetter(path)) {
118+
return path.substr(0, 2);
119+
}
120+
121+
// Handle windows network shares, only on Windows
122+
if (absl::StartsWith(path, "\\\\") || absl::StartsWith(path, "//")) {
123+
size_t prefix = 2;
124+
while (prefix < path.size() && path[prefix] >= 31 &&
125+
!kIllegalUNCCharacters.Test(path[prefix])) {
126+
++prefix;
127+
}
128+
if (prefix > 2 && prefix < path.length() &&
129+
(path[prefix] == '/' || path[prefix] == '\\')) {
130+
return path.substr(0, prefix);
131+
}
132+
}
133+
#endif
134+
return {};
135+
}
136+
102137
std::string LexicalNormalizePath(std::string path) {
103138
if (path.empty()) return path;
104139

105140
const char* src = path.c_str();
106141
auto dst = path.begin();
107142

108-
// Remove initial '/' characters.
109-
// Assume that Windows paths do not begin with '/'.
110-
const bool is_absolute_path = (*src == '/');
143+
// Skip the root name if present.
144+
if (auto root_name = PathRootName(path); !root_name.empty()) {
145+
dst += root_name.size();
146+
src += root_name.size();
147+
}
148+
149+
// A root directory begins after the root name; if it is present,
150+
// copy it and skip any duplicate separators.
151+
const bool is_absolute_path = IsDirSeparator(*src);
111152
if (is_absolute_path) {
112-
dst++;
153+
*dst++ = '/';
113154
src++;
114-
while (*src == '/') ++src;
155+
while (IsDirSeparator(*src)) ++src;
115156
}
157+
116158
auto limit = dst;
117159

118160
// Process all parts
@@ -175,5 +217,12 @@ std::string LexicalNormalizePath(std::string path) {
175217
return path;
176218
}
177219

220+
bool IsAbsolutePath(std::string_view path) {
221+
if (path.empty()) return false;
222+
auto root_name = PathRootName(path);
223+
return path.size() > root_name.size() &&
224+
IsDirSeparator(path[root_name.size()]);
225+
}
226+
178227
} // namespace internal
179228
} // namespace tensorstore

tensorstore/internal/path.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,17 @@ void EnsureNonDirectoryPath(std::string& path);
7373
/// as many relative path segments ('.' and '..') as possible.
7474
std::string LexicalNormalizePath(std::string path);
7575

76+
// Returns the length of the root name. Generally this is the empty
77+
// string, however in Windows this may be drive-letter prefix or the
78+
// network share name. For example:
79+
// * "/tmp/foo" -> ""
80+
// * "C:\foo" -> "C:"
81+
// * "\\server\share\foo" -> "\\server"
82+
std::string_view PathRootName(std::string_view path);
83+
84+
/// Returns true if the path is an absolute path.
85+
bool IsAbsolutePath(std::string_view path);
86+
7687
} // namespace internal
7788
} // namespace tensorstore
7889

0 commit comments

Comments
 (0)