Skip to content

Commit e2756e0

Browse files
authored
Merge pull request ClickHouse#63426 from ClickHouse/fix-backup-with-missing-projection
Fix backup/restore of projection part in case projection was removed from table metadata, but part still has projection
2 parents 98dcef6 + 96dd6c0 commit e2756e0

File tree

7 files changed

+120
-12
lines changed

7 files changed

+120
-12
lines changed

src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,20 @@ UInt32 DataPartStorageOnDiskFull::getRefCount(const String & file_name) const
9595
return volume->getDisk()->getRefCount(fs::path(root_path) / part_dir / file_name);
9696
}
9797

98-
std::string DataPartStorageOnDiskFull::getRemotePath(const std::string & file_name) const
98+
std::string DataPartStorageOnDiskFull::getRemotePath(const std::string & file_name, bool if_exists) const
9999
{
100-
auto objects = volume->getDisk()->getStorageObjects(fs::path(root_path) / part_dir / file_name);
100+
const std::string path = fs::path(root_path) / part_dir / file_name;
101+
auto objects = volume->getDisk()->getStorageObjects(path);
102+
103+
if (objects.empty() && if_exists)
104+
return "";
105+
101106
if (objects.size() != 1)
102-
throw Exception(ErrorCodes::LOGICAL_ERROR, "One file must be mapped to one object on blob storage in MergeTree tables");
107+
{
108+
throw Exception(ErrorCodes::LOGICAL_ERROR,
109+
"One file must be mapped to one object on blob storage by path {} in MergeTree tables, have {}.",
110+
path, objects.size());
111+
}
103112

104113
return objects[0].remote_path;
105114
}

src/Storages/MergeTree/DataPartStorageOnDiskFull.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class DataPartStorageOnDiskFull final : public DataPartStorageOnDiskBase
2323
Poco::Timestamp getFileLastModified(const String & file_name) const override;
2424
size_t getFileSize(const std::string & file_name) const override;
2525
UInt32 getRefCount(const std::string & file_name) const override;
26-
std::string getRemotePath(const std::string & file_name) const override;
26+
std::string getRemotePath(const std::string & file_name, bool if_exists) const override;
2727
String getUniqueId() const override;
2828

2929
std::unique_ptr<ReadBufferFromFileBase> readFile(

src/Storages/MergeTree/IDataPartStorage.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ class IDataPartStorage : public boost::noncopyable
126126
virtual UInt32 getRefCount(const std::string & file_name) const = 0;
127127

128128
/// Get path on remote filesystem from file name on local filesystem.
129-
virtual std::string getRemotePath(const std::string & file_name) const = 0;
129+
virtual std::string getRemotePath(const std::string & file_name, bool if_exists) const = 0;
130130

131131
virtual UInt64 calculateTotalSizeOnDisk() const = 0;
132132

src/Storages/MergeTree/MergeTreeData.cpp

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5361,20 +5361,50 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
53615361
&temp_dirs,
53625362
false, false);
53635363

5364-
auto projection_parts = part->getProjectionParts();
5365-
for (const auto & [projection_name, projection_part] : projection_parts)
5364+
auto backup_projection = [&](IDataPartStorage & storage, IMergeTreeDataPart & projection_part)
53665365
{
5367-
projection_part->getDataPartStorage().backup(
5368-
projection_part->checksums,
5369-
projection_part->getFileNamesWithoutChecksums(),
5366+
storage.backup(
5367+
projection_part.checksums,
5368+
projection_part.getFileNamesWithoutChecksums(),
53705369
fs::path{data_path_in_backup} / part->name,
53715370
backup_settings,
53725371
read_settings,
53735372
make_temporary_hard_links,
53745373
backup_entries_from_part,
53755374
&temp_dirs,
5376-
projection_part->is_broken,
5375+
projection_part.is_broken,
53775376
backup_settings.allow_backup_broken_projections);
5377+
};
5378+
5379+
auto projection_parts = part->getProjectionParts();
5380+
std::string proj_suffix = ".proj";
5381+
std::unordered_set<String> defined_projections;
5382+
5383+
for (const auto & [projection_name, projection_part] : projection_parts)
5384+
{
5385+
defined_projections.emplace(projection_name);
5386+
backup_projection(projection_part->getDataPartStorage(), *projection_part);
5387+
}
5388+
5389+
/// It is possible that the part has a written but not loaded projection,
5390+
/// e.g. it is written to parent part's checksums.txt and exists on disk,
5391+
/// but does not exist in table's projections definition.
5392+
/// Such a part can appear server was restarted after DROP PROJECTION but before old part was removed.
5393+
/// In this case, the old part will load only projections from metadata.
5394+
/// See 031145_non_loaded_projection_backup.sh.
5395+
for (const auto & [name, _] : part->checksums.files)
5396+
{
5397+
auto projection_name = fs::path(name).stem().string();
5398+
if (endsWith(name, proj_suffix) && !defined_projections.contains(projection_name))
5399+
{
5400+
auto projection_storage = part->getDataPartStorage().getProjection(projection_name + proj_suffix);
5401+
if (projection_storage->exists("checksums.txt"))
5402+
{
5403+
auto projection_part = const_cast<IMergeTreeDataPart &>(*part).getProjectionPartBuilder(
5404+
projection_name, /* is_temp_projection */false).withPartFormatFromDisk().build();
5405+
backup_projection(projection_part->getDataPartStorage(), *projection_part);
5406+
}
5407+
}
53785408
}
53795409

53805410
if (hold_storage_and_part_ptrs)

src/Storages/MergeTree/checkDataPart.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,16 @@ IMergeTreeDataPart::Checksums checkDataPart(
377377
auto file_name = it->name();
378378
if (!data_part_storage.isDirectory(file_name))
379379
{
380-
auto remote_path = data_part_storage.getRemotePath(file_name);
380+
const bool is_projection_part = data_part->isProjectionPart();
381+
auto remote_path = data_part_storage.getRemotePath(file_name, /* if_exists */is_projection_part);
382+
if (remote_path.empty())
383+
{
384+
chassert(is_projection_part);
385+
throw Exception(
386+
ErrorCodes::BROKEN_PROJECTION,
387+
"Remote path for {} does not exist for projection path. Projection {} is broken",
388+
file_name, data_part->name);
389+
}
381390
cache.removePathIfExists(remote_path, FileCache::getCommonUser().user_id);
382391
}
383392
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
7
2+
Found unexpected projection directories: pp.proj
3+
BACKUP_CREATED
4+
RESTORED
5+
7
6+
Found unexpected projection directories: pp.proj
7+
0
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env bash
2+
3+
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
4+
# shellcheck source=../shell_config.sh
5+
. "$CURDIR"/../shell_config.sh
6+
7+
$CLICKHOUSE_CLIENT -nm -q "
8+
drop table if exists tp_1;
9+
create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100);
10+
system stop merges tp_1;
11+
insert into tp_1 select number, number from numbers(3);
12+
13+
set mutations_sync = 2;
14+
15+
alter table tp_1 add projection pp (select x, count() group by x);
16+
insert into tp_1 select number, number from numbers(4);
17+
select count() from tp_1;
18+
19+
-- Here we have a part with written projection pp
20+
alter table tp_1 detach partition '0';
21+
-- Move part to detached
22+
alter table tp_1 clear projection pp;
23+
-- Remove projection from table metadata
24+
alter table tp_1 drop projection pp;
25+
-- Now, we don't load projection pp for attached part, but it is written on disk
26+
alter table tp_1 attach partition '0';
27+
"
28+
29+
$CLICKHOUSE_CLIENT -nm -q "
30+
set send_logs_level='fatal';
31+
check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj"
32+
33+
backup_id="$CLICKHOUSE_TEST_UNIQUE_NAME"
34+
$CLICKHOUSE_CLIENT -q "
35+
backup table tp_1 to Disk('backups', '$backup_id');
36+
" | grep -o "BACKUP_CREATED"
37+
38+
$CLICKHOUSE_CLIENT -nm -q "
39+
set send_logs_level='fatal';
40+
drop table tp_1;
41+
restore table tp_1 from Disk('backups', '$backup_id');
42+
" | grep -o "RESTORED"
43+
44+
$CLICKHOUSE_CLIENT -q "select count() from tp_1;"
45+
$CLICKHOUSE_CLIENT -nm -q "
46+
set send_logs_level='fatal';
47+
check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj"
48+
$CLICKHOUSE_CLIENT -nm -q "
49+
set send_logs_level='fatal';
50+
check table tp_1"
51+
$CLICKHOUSE_CLIENT -nm -q "
52+
set send_logs_level='fatal';
53+
drop table tp_1"

0 commit comments

Comments
 (0)