Skip to content

Commit 6d15034

Browse files
committed
fix deducing metadata
1 parent 7fc341c commit 6d15034

File tree

1 file changed

+68
-5
lines changed

1 file changed

+68
-5
lines changed

src/Databases/DataLake/GlueCatalog.cpp

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -325,9 +325,16 @@ bool GlueCatalog::tryGetTableMetadata(
325325

326326
// Resolve the actual metadata file path based on table location
327327
std::string resolved_metadata_path = resolveMetadataPathFromTableLocation(location_with_slash, result);
328-
result.setDataLakeSpecificProperties(DataLakeSpecificProperties{.iceberg_metadata_file_location = resolved_metadata_path});
328+
if (resolved_metadata_path.empty())
329+
{
330+
result.setTableIsNotReadable(fmt::format("Could not determine metadata_location of table `{}`. ",
331+
database_name + "." + table_name));
332+
}
333+
else
334+
{
335+
result.setDataLakeSpecificProperties(DataLakeSpecificProperties{.iceberg_metadata_file_location = resolved_metadata_path});
336+
}
329337
}
330-
331338
else
332339
{
333340
result.setTableIsNotReadable(fmt::format("Cannot read table `{}` because it has no metadata_location. " \
@@ -559,9 +566,65 @@ String GlueCatalog::resolveMetadataPathFromTableLocation(const String & table_lo
559566
}
560567
catch (...)
561568
{
562-
// If version-hint.text doesn't exist or is unreadable, fall back to metadata.json
563-
LOG_TRACE(log, "Could not read version-hint.text from '{}', falling back to metadata.json", version_hint_path);
564-
return table_location + "metadata/metadata.json";
569+
// If version-hint.text doesn't exist or is unreadable, list all metadata files and select the latest
570+
LOG_TRACE(log, "Could not read version-hint.text from '{}', trying to find latest metadata file", version_hint_path);
571+
572+
try
573+
{
574+
String bucket_with_prefix;
575+
String metadata_dir = table_location + "metadata/";
576+
String metadata_dir_path = metadata_dir;
577+
578+
if (metadata_dir_path.starts_with("s3://"))
579+
{
580+
metadata_dir_path = metadata_dir_path.substr(5);
581+
// Remove bucket from path
582+
std::size_t pos = metadata_dir_path.find('/');
583+
if (pos != std::string::npos)
584+
{
585+
metadata_dir_path = metadata_dir_path.substr(pos + 1);
586+
bucket_with_prefix = table_location.substr(0, pos + 6);
587+
}
588+
}
589+
else
590+
return "";
591+
592+
// List all files in metadata directory
593+
DB::RelativePathsWithMetadata files;
594+
object_storage->listObjects(metadata_dir_path, files, 0);
595+
596+
// Filter for .metadata.json files and find the most recent one
597+
String latest_metadata_file;
598+
std::optional<DB::ObjectMetadata> latest_metadata;
599+
600+
for (const auto & file : files)
601+
{
602+
if (file->getPath().ends_with(".metadata.json"))
603+
{
604+
// Get file metadata to check last modified time
605+
if (!latest_metadata.has_value() ||
606+
(file->metadata->last_modified > latest_metadata->last_modified))
607+
{
608+
latest_metadata_file = file->getPath();
609+
latest_metadata = file->metadata;
610+
}
611+
}
612+
}
613+
614+
if (!latest_metadata_file.empty())
615+
{
616+
LOG_TRACE(log, "Found latest metadata file: {}", latest_metadata_file);
617+
return bucket_with_prefix + latest_metadata_file;
618+
}
619+
620+
LOG_TRACE(log, "No .metadata.json files found,");
621+
return "";
622+
}
623+
catch (...)
624+
{
625+
LOG_TRACE(log, "Failed to list metadata directory");
626+
return "";
627+
}
565628
}
566629
}
567630

0 commit comments

Comments
 (0)