Skip to content

Commit daad95d

Browse files
committed
Iceberg catalog with S3 tables
1 parent 9a7b6b5 commit daad95d

File tree

3 files changed

+78
-6
lines changed

3 files changed

+78
-6
lines changed

src/Databases/DataLake/ICatalog.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,19 @@ void TableMetadata::setLocation(const std::string & location_)
6767
auto pos_to_path = location_.substr(pos_to_bucket).find('/');
6868

6969
if (pos_to_path == std::string::npos)
70-
throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Unexpected location format: {}", location_);
71-
72-
pos_to_path = pos_to_bucket + pos_to_path;
70+
{ // empty path
71+
location_without_path = location_;
72+
path.clear();
73+
bucket = location_.substr(pos_to_bucket);
74+
}
75+
else
76+
{
77+
pos_to_path = pos_to_bucket + pos_to_path;
7378

74-
location_without_path = location_.substr(0, pos_to_path);
75-
path = location_.substr(pos_to_path + 1);
76-
bucket = location_.substr(pos_to_bucket, pos_to_path - pos_to_bucket);
79+
location_without_path = location_.substr(0, pos_to_path);
80+
path = location_.substr(pos_to_path + 1);
81+
bucket = location_.substr(pos_to_bucket, pos_to_path - pos_to_bucket);
82+
}
7783

7884
LOG_TEST(getLogger("TableMetadata"),
7985
"Parsed location without path: {}, path: {}",

src/IO/S3/URI.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,10 +157,72 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
157157
}
158158
}
159159

160+
bool URI::isAWSRegion(std::string_view region)
161+
{
162+
/// List from https://docs.aws.amazon.com/general/latest/gr/s3.html
163+
static const std::unordered_set<std::string_view> regions = {
164+
"us-east-2",
165+
"us-east-1",
166+
"us-west-1",
167+
"us-west-2",
168+
"af-south-1",
169+
"ap-east-1",
170+
"ap-south-2",
171+
"ap-southeast-3",
172+
"ap-southeast-5",
173+
"ap-southeast-4",
174+
"ap-south-1",
175+
"ap-northeast-3",
176+
"ap-northeast-2",
177+
"ap-southeast-1",
178+
"ap-southeast-2",
179+
"ap-east-2",
180+
"ap-southeast-7",
181+
"ap-northeast-1",
182+
"ca-central-1",
183+
"ca-west-1",
184+
"eu-central-1",
185+
"eu-west-1",
186+
"eu-west-2",
187+
"eu-south-1",
188+
"eu-west-3",
189+
"eu-south-2",
190+
"eu-north-1",
191+
"eu-central-2",
192+
"il-central-1",
193+
"mx-central-1",
194+
"me-south-1",
195+
"me-central-1",
196+
"sa-east-1",
197+
"us-gov-east-1",
198+
"us-gov-west-1"
199+
};
200+
201+
/// 's3-us-west-2' is a legacy region format for S3 storage, equals to 'us-west-2'
202+
/// See https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#VirtualHostingBackwardsCompatibility
203+
if (region.substr(0, 3) == "s3-")
204+
region = region.substr(3);
205+
206+
return regions.contains(region);
207+
}
208+
160209
void URI::addRegionToURI(const std::string &region)
161210
{
162211
if (auto pos = endpoint.find(".amazonaws.com"); pos != std::string::npos)
212+
{
213+
if (pos > 0)
214+
{ /// Check if region is already in endpoint to avoid add it second time
215+
auto prev_pos = endpoint.find_last_of("/.", pos - 1);
216+
if (prev_pos == std::string::npos)
217+
prev_pos = 0;
218+
else
219+
++prev_pos;
220+
std::string_view endpoint_region = std::string_view(endpoint).substr(prev_pos, pos - prev_pos);
221+
if (isAWSRegion(endpoint_region))
222+
return;
223+
}
163224
endpoint = endpoint.substr(0, pos) + "." + region + endpoint.substr(pos);
225+
}
164226
}
165227

166228
void URI::validateBucket(const String & bucket, const Poco::URI & uri)

src/IO/S3/URI.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ struct URI
4141

4242
static void validateBucket(const std::string & bucket, const Poco::URI & uri);
4343

44+
/// Returns true if 'region' string is an AWS S3 region
45+
/// https://docs.aws.amazon.com/general/latest/gr/s3.html
46+
static bool isAWSRegion(std::string_view region);
47+
4448
private:
4549
std::pair<std::string, std::optional<std::string>> getURIAndArchivePattern(const std::string & source);
4650
};

0 commit comments

Comments
 (0)