Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions google/cloud/spanner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ add_library(
transaction.cc
transaction.h
update_instance_request_builder.h
uuid.cc
uuid.h
value.cc
value.h
version.cc
Expand Down Expand Up @@ -502,6 +504,7 @@ function (spanner_client_define_tests)
timestamp_test.cc
transaction_test.cc
update_instance_request_builder_test.cc
uuid_test.cc
value_test.cc)

# Export the list of unit tests to a .bzl file so we do not need to maintain
Expand Down
2 changes: 2 additions & 0 deletions google/cloud/spanner/google_cloud_cpp_spanner.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ google_cloud_cpp_spanner_hdrs = [
"tracing_options.h",
"transaction.h",
"update_instance_request_builder.h",
"uuid.h",
"value.h",
"version.h",
"version_info.h",
Expand Down Expand Up @@ -198,6 +199,7 @@ google_cloud_cpp_spanner_srcs = [
"sql_statement.cc",
"timestamp.cc",
"transaction.cc",
"uuid.cc",
"value.cc",
"version.cc",
]
1 change: 1 addition & 0 deletions google/cloud/spanner/spanner_client_unit_tests.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,6 @@ spanner_client_unit_tests = [
"timestamp_test.cc",
"transaction_test.cc",
"update_instance_request_builder_test.cc",
"uuid_test.cc",
"value_test.cc",
]
158 changes: 158 additions & 0 deletions google/cloud/spanner/uuid.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "google/cloud/spanner/uuid.h"
#include "google/cloud/internal/make_status.h"
#include "absl/container/flat_hash_map.h"
#include "absl/strings/str_format.h"
#include "absl/strings/strip.h"

namespace google {
namespace cloud {
namespace spanner {
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN
namespace {

constexpr int kMaxUuidNumberOfHexDigits = 32;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why isn't this localized to ParseHexBlock() just like kMaxUuidBlockLength is?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


// Helper function to parse a single hexadecimal block of a UUID.
// A hexadecimal block is a 16-digit hexadecimal number, which is represented
// as 8 bytes.
StatusOr<std::uint64_t> ParseHexBlock(absl::string_view& str,
absl::string_view original_str) {
constexpr int kMaxUuidBlockLength = 16;
static auto const* char_to_hex = new absl::flat_hash_map<char, std::uint8_t>(
{{'0', 0x00}, {'1', 0x01}, {'2', 0x02}, {'3', 0x03}, {'4', 0x04},
{'5', 0x05}, {'6', 0x06}, {'7', 0x07}, {'8', 0x08}, {'9', 0x09},
{'a', 0x0a}, {'b', 0x0b}, {'c', 0x0c}, {'d', 0x0d}, {'e', 0x0e},
{'f', 0x0f}, {'A', 0x0a}, {'B', 0x0b}, {'C', 0x0c}, {'D', 0x0d},
{'E', 0x0e}, {'F', 0x0f}});
std::uint64_t block = 0;
for (int j = 0; j < kMaxUuidBlockLength; ++j) {
absl::ConsumePrefix(&str, "-");
if (str.empty()) {
return internal::InvalidArgumentError(
absl::StrFormat("UUID must contain %d hexadecimal digits: %s",
kMaxUuidNumberOfHexDigits, original_str),
GCP_ERROR_INFO());
}
auto it = char_to_hex->find(str[0]);
if (it == char_to_hex->end()) {
if (str[0] == '-') {
return internal::InvalidArgumentError(
absl::StrFormat("UUID cannot contain consecutive hyphens: %s",
original_str),
GCP_ERROR_INFO());
}

return internal::InvalidArgumentError(
absl::StrFormat("UUID contains invalid character (%c): %s", str[0],
original_str),
GCP_ERROR_INFO());
}
block = (block << 4) + it->second;
str.remove_prefix(1);
}
return block;
}
} // namespace

Uuid::Uuid(absl::uint128 value) : uuid_(value) {}

Uuid::Uuid(std::uint64_t high_bits, std::uint64_t low_bits)
: Uuid(absl::MakeUint128(high_bits, low_bits)) {}

bool operator==(Uuid const& lhs, Uuid const& rhs) {
return lhs.uuid_ == rhs.uuid_;
}

bool operator<(Uuid const& lhs, Uuid const& rhs) {
return lhs.uuid_ < rhs.uuid_;
}

Uuid::operator std::string() const {
constexpr int kUuidStringLen = 36;
constexpr int kChunkLength[] = {8, 4, 4, 4, 12};
auto to_hex = [](std::uint64_t v, int start_index, int end_index, char* out) {
static constexpr char kHexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
for (int i = start_index; i >= end_index; --i) {
*out++ = kHexChar[(v >> (i * 4)) & 0xf];
}
return start_index - end_index + 1;
};

std::string output;
output.resize(kUuidStringLen);
char* target = &((output)[output.size() - kUuidStringLen]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

output.size() - kUuidStringLen is zero. Why obfuscate that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

De-obfuscated

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const_casting isn't good either. Why not just ...

  char* target = &output[0];
  char* const last = &output[output.size()];

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

&output[0] turns into a clang-tidy diagnostic about using .data() instead which until c++17 only returns a const char*. I have multiple non-ideal choices.

char* const last = &((output)[output.size()]);
auto bits = Uint128High64(uuid_);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why split uuid_ into two 64-bit values when it could be more clearly treated as the single 128-bit value that it is?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

absl::uint128 isn't a drop in replacement in the existing code (I tried). When we get a std::uint128_t I'm happy to revisit.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what you tried, but if absl::uint128 didn't work as an integral type then that would be an Abseil bug.

In any case, I'll assert that it does work.

constexpr char kHexDigits[] = "0123456789abcdef";

Uuid::operator std::string() const {
  constexpr char kTemplate[] = "00000000-0000-0000-0000-000000000000";
  char buf[sizeof kTemplate];
  auto uuid = uuid_;
  for (auto j = sizeof buf; j-- != 0;) {
    if (kTemplate[j] != '0') {
      buf[j] = kTemplate[j];
    } else {
      buf[j] = kHexDigits[static_cast<int>(uuid & 0xf)];
      uuid >>= 4;
    }
  }
  return buf;
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't try much. I changed to_hex first parameter to an absl::uint128 and got compile errors trying to index into kHexChar
Created #15043 for future improvements as I'm out of time budget to work on this.

int start = 16;
for (auto length : kChunkLength) {
int end = start - length;
target += to_hex(bits, start - 1, end, target);
// Only hyphens write to valid addresses.
if (target < last) *(target++) = '-';
if (end == 0) {
start = 16;
bits = Uint128Low64(uuid_);
} else {
start = end;
}
}
return output;
}

StatusOr<Uuid> MakeUuid(absl::string_view str) {
if (str.empty()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this check as recommended, and also added a guard for an empty string.

I wouldn't make a special case of the empty string either. Just change the

  if (str[0] == '-') {

test to

  if (absl::StartsWith(str, "-")) {

and the empty string will be treated like any other illegal argument.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer the early exit and specific error message.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Early exit" is only a preferred strategy when it makes the following code simpler. Here it remains the same.

Making "UUID cannot be empty" a special case over "UUID must contain 32 hexadecimal digits" is not worth the redundancy. It makes the code more cluttered and more difficult to understand. And why shouldn't MakeUuid("{}") behave the same way?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed this check and added a test for "{}". Both now result in a satisfactory error message.

return internal::InvalidArgumentError(
absl::StrFormat("UUID cannot be empty"), GCP_ERROR_INFO());
}

std::string original_str = std::string(str);
Copy link
Contributor

@devbww devbww Mar 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this could/should be done without copying the string.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's necessary to copy the original string for error message contents.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, copying the string isn't necessary. Nothing here mutates the characters of the argument string_view.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few of the operations performed do mutate the string_view. I added additional expectation criteria to the tests that verified that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You appear to be confusing "mutate the string_view" with "mutate the characters of the string_view". Nothing does the latter. Indeed, string_view prohibits it. So, there is no need to copy the string.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copying the string_view instead.

// Check and remove optional braces
if (absl::ConsumePrefix(&str, "{")) {
if (!absl::ConsumeSuffix(&str, "}")) {
return internal::InvalidArgumentError(
absl::StrFormat("UUID missing closing '}': %s", original_str),
GCP_ERROR_INFO());
}
}

// Check for leading hyphen after stripping any surrounding braces.
if (str[0] == '-') {
return internal::InvalidArgumentError(
absl::StrFormat("UUID cannot begin with '-': %s", original_str),
GCP_ERROR_INFO());
}

auto high_bits = ParseHexBlock(str, original_str);
if (!high_bits.ok()) return std::move(high_bits).status();
auto low_bits = ParseHexBlock(str, original_str);
if (!low_bits.ok()) return std::move(low_bits).status();
Comment on lines +133 to +136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, I would suggest not introducing two 64-bit values here. (Otherwise, why not four 32-bit values, for example?) A single 128-bit value should suffice.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lacking a std::uint128_t and absl::uint128 not behaving exactly like a std::uint128_t, std::uint64_t is the largest integer currently available.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, I don't know what absl::uint128 behavior you think is lacking. For example,

#include <cctype>
#include <cstring>

constexpr char kHexDigits[] = "0123456789abcdef";

StatusOr<Uuid> MakeUuid(absl::string_view str) {
  absl::uint128 uuid = 0;
  auto const original_str = str;

  if (absl::ConsumePrefix(&str, "{")) {
    if (!absl::ConsumeSuffix(&str, "}")) {
      return internal::InvalidArgumentError(
          absl::StrFormat("UUID missing closing '}': %s", original_str),
          GCP_ERROR_INFO());
    }
  }
  if (absl::StartsWith(str, "-")) {
    return internal::InvalidArgumentError(
        absl::StrFormat("UUID cannot begin with '-': %s", original_str),
        GCP_ERROR_INFO());
  }

  constexpr int kUuidNumberOfHexDigits = 32;
  for (int j = 0; j != kUuidNumberOfHexDigits; ++j) {
    absl::ConsumePrefix(&str, "-");
    if (str.empty()) {
      return internal::InvalidArgumentError(
          absl::StrFormat("UUID must contain %d hexadecimal digits: %s",
                          kUuidNumberOfHexDigits, original_str),
          GCP_ERROR_INFO());
    }
    auto const* dp = std::strchr(
        kHexDigits, std::tolower(static_cast<unsigned char>(str[0])));
    if (dp == nullptr) {
      if (str[0] == '-') {
        return internal::InvalidArgumentError(
            absl::StrFormat("UUID cannot contain consecutive hyphens: %s",
                            original_str),
            GCP_ERROR_INFO());
      }
      return internal::InvalidArgumentError(
          absl::StrFormat("UUID contains invalid character (%c): %s", str[0],
                          original_str),
          GCP_ERROR_INFO());
    }
    str.remove_prefix(1);
    uuid <<= 4;
    uuid += dp - kHexDigits;
  }

  if (!str.empty()) {
    return internal::InvalidArgumentError(
        absl::StrFormat("Extra characters found after parsing UUID: %s", str),
        GCP_ERROR_INFO());
  }
  return Uuid{uuid};
}

No "blocks" (or hash maps). Just a simple per-nibble loop.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created #15043 for future improvements as I'm out of time budget to work on this.


if (!str.empty()) {
return internal::InvalidArgumentError(
absl::StrFormat("Extra characters (%d) found after parsing UUID: %s",
str.size(), original_str),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not provide the actual extra characters rather than the number of them?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

GCP_ERROR_INFO());
}

return Uuid(absl::MakeUint128(*high_bits, *low_bits));
}

GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
} // namespace spanner
} // namespace cloud
} // namespace google
112 changes: 112 additions & 0 deletions google/cloud/spanner/uuid.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_SPANNER_UUID_H
#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_SPANNER_UUID_H

#include "google/cloud/spanner/version.h"
#include "google/cloud/status_or.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
#include <iosfwd>
#include <string>

namespace google {
namespace cloud {
namespace spanner {
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN

/**
* A representation of the Spanner UUID type: A fixed size 16 byte value
* that can be represented as a 32-digit hexadecimal string.
*
* @see https://cloud.google.com/spanner/docs/data-types#uuid_type
*/
class Uuid {
public:
/// Default construction yields a zero value UUID.
Uuid() = default;

/// Construct a UUID from a packed integer.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what "packed" is supposed to convey.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reworded

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, although we now have "128 bit unsigned" and "unsigned 64 bit" in consecutive comments. Consistency would be nice. And "N-bit" should be hyphenated when used as an adjective.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reworded across usages for consistency.

explicit Uuid(absl::uint128 value);

/// Construct a UUID from two 64 bit pieces.
Uuid(std::uint64_t high_bits, std::uint64_t low_bits);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd get rid of this constructor. If the caller has two std::uint64_ts, they can build the absl::uint128 by themselves. It shouldn't be part of this class (just like there shouldn't be an accessor that returns a pair of std::uint64_ts).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to give users a options that uses std integer types.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then, there should be an accessor that returns a pair of std::uint64_ts.

And, you need to #include <cstdint>.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


/// @name Regular value type, supporting copy, assign, move.
///@{
Uuid(Uuid&&) = default;
Uuid& operator=(Uuid&&) = default;
Uuid(Uuid const&) = default;
Uuid& operator=(Uuid const&) = default;
///@}

/// @name Relational operators
///
///@{
friend bool operator==(Uuid const& lhs, Uuid const& rhs);
friend bool operator!=(Uuid const& lhs, Uuid const& rhs) {
return !(lhs == rhs);
}
friend bool operator<(Uuid const& lhs, Uuid const& rhs);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that == and < are simple operations on the representation, I don't know why we wouldn't choose to inline them.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

friend bool operator<=(Uuid const& lhs, Uuid const& rhs) {
return !(rhs < lhs);
}
friend bool operator>=(Uuid const& lhs, Uuid const& rhs) {
return !(lhs < rhs);
}
friend bool operator>(Uuid const& lhs, Uuid const& rhs) { return rhs < lhs; }
///@}

/// @name Conversion to packed integer representation.
explicit operator absl::uint128() const { return uuid_; }

/// @name Conversion to a lower case string using formatted:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"using formatted:" doesn't parse.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reworded

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/ using//

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

/// [8 hex-digits]-[4 hex-digits]-[4 hex-digits]-[4 hex-digits]-[12
/// hex-digits]
/// Example: 0b6ed04c-a16d-fc46-5281-7f9978c13738
explicit operator std::string() const;

/// @name Output streaming
friend std::ostream& operator<<(std::ostream& os, Uuid uuid) {
return os << std::string(uuid);
}

private:
absl::uint128 uuid_ = 0;
};

/**
* Parses a textual representation a `Uuid` from a string of hexadecimal digits.
* Returns an error if unable to parse the given input.
*
* Acceptable input strings must consist of [0-f] digits with formatting such:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[0-f] is not good definition of a character class.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expanded definition. If this still isn't a "good" definition, please suggest one.

* - Upper, lower, or mixed case.
* - Optional curly bracers around the entire UUID string.
* - Hyphens between any pair of hexadecimal digits are allowed.
*
* Example acceptable inputs:
* - {0b6ed04c-a16d-fc46-5281-7f9978c13738}
* - 0b6ed04ca16dfc4652817f9978c13738
* - 7Bf8-A7b8-1917-1919-2625-F208-c582-4254
* - {DECAFBAD-DEAD-FADE-CAFE-FEEDFACEBEEF}
*/
StatusOr<Uuid> MakeUuid(absl::string_view s);

GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
} // namespace spanner
} // namespace cloud
} // namespace google

#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_SPANNER_UUID_H
Loading
Loading