Skip to content

Commit f5099d8

Browse files
committed
Disallow colocating tables when collations don't match
- No need to make any changes for the following UDFs: - update_distributed_table_colocation()/create_distributed_table_concurrently() These are already checking whether collations of distribution keys match. - ALTER TABLE .. ALTER COLUMN .. TYPE .. COLLATE .. We already don't allow changing the collation of a distribution column. - The following UDFs are fixed with this commit: These UDFs were not explicitly checking whether collation of the distribution keys of two distributed tables match, however, we were catching this via EnsurePartitionMetadataIsSane() when syncing metadata. And this incorrectly caused allowing to colocate two such distributed tables in a single-node setup as we don't have any nodes to sync metadata there. In such cases, we would only become aware of the situation **when adding a new node to the cluster etc.**. Otherwise, i.e., if we already had more than one nodes in the cluster, we were seeing this error while Citus is syncing metadata for the new / altered distributed table, which wasn't too bad but not ideal too: ```sql ERROR: cannot colocate tables test_a_tbl_2 and test_a_tbl_1 DETAIL: Distribution column collations don't match for test_a_tbl_2 and test_a_tbl_1. CONTEXT: while executing command on localhost:9701 ``` - create_distributed_table() To fix that, now EnsureTableCanBeColocatedWith() calls EnsureColumnTypeEquality() to perform necessary checks, which already ensures collation match. And to do that, now EnsureTableCanBeColocatedWith() has to accept distributionColumn Var as an argument (to be passed into EnsureColumnTypeEquality()), so now it doesn't have to accept distributionColumnType separately, so refactor accordingly. And to provide distributionColumn Var to EnsureTableCanBeColocatedWith(), similarly the call sites of the function also need to have distributionColumn Var, like FindColocateWithColocationId(). And now that FindColocateWithColocationId() accepts distributionColumn Var, it doesn't have to accept distributionColumnType and distributionColumnCollation separately, so refactor accordingly. Plus, now this UDF also correctly records the colocation groups with the correct distribution column collations. - alter_distributed_table() Make sure to check column collations. - Also, now citus_finish_citus_upgrade() automatically fixes such colocation groups when executed after upgrading to Citus 14. This also helps removing the last alternative test output for upgrade_citus_finish_citus_upgrade. This is because, the recent change made in citus_finish_citus_upgrade() to automatically fix such colocation groups results in updating last_upgrade_version in pg_dist_node_metadata when upgrading to Citus 14, so the notice message in the mentioned test is now gone, so we don't anymore need to have an alternative output for that test.
1 parent b30ae94 commit f5099d8

28 files changed

+973
-86
lines changed

src/backend/distributed/commands/alter_table.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2111,7 +2111,7 @@ CheckAlterDistributedTableConversionParameters(TableConversionState *con)
21112111
{
21122112
ereport(ERROR, (errmsg("cannot colocate with %s and change distribution "
21132113
"column to %s because data type of column %s is "
2114-
"different then the distribution column of the %s",
2114+
"different than the distribution column of the %s",
21152115
con->colocateWith, con->distributionColumn,
21162116
con->distributionColumn, con->colocateWith)));
21172117
}
@@ -2122,6 +2122,23 @@ CheckAlterDistributedTableConversionParameters(TableConversionState *con)
21222122
"distribution column is different than %s",
21232123
con->colocateWith, con->relationName)));
21242124
}
2125+
else if (con->distributionColumn &&
2126+
colocateWithPartKey->varcollid != con->distributionKey->varcollid)
2127+
{
2128+
ereport(ERROR, (errmsg("cannot colocate with %s and change distribution "
2129+
"column to %s because collation of column %s is "
2130+
"different than the distribution column of the %s",
2131+
con->colocateWith, con->distributionColumn,
2132+
con->distributionColumn, con->colocateWith)));
2133+
}
2134+
else if (!con->distributionColumn &&
2135+
colocateWithPartKey->varcollid != con->originalDistributionKey->varcollid
2136+
)
2137+
{
2138+
ereport(ERROR, (errmsg("cannot colocate with %s because collation of its "
2139+
"distribution column is different than %s",
2140+
con->colocateWith, con->relationName)));
2141+
}
21252142
}
21262143

21272144
if (!con->suppressNoticeMessages)

src/backend/distributed/commands/create_distributed_table.c

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,6 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName,
531531
Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId,
532532
distributionColumnName,
533533
NoLock);
534-
Oid distributionColumnType = distributionColumn->vartype;
535-
Oid distributionColumnCollation = distributionColumn->varcollid;
536534

537535
/* get an advisory lock to serialize concurrent default group creations */
538536
if (IsColocateWithDefault(colocateWithTableName))
@@ -547,8 +545,7 @@ CreateDistributedTableConcurrently(Oid relationId, char *distributionColumnName,
547545
*/
548546
uint32 colocationId = FindColocateWithColocationId(relationId,
549547
replicationModel,
550-
distributionColumnType,
551-
distributionColumnCollation,
548+
distributionColumn,
552549
shardCount,
553550
shardCountIsStrict,
554551
colocateWithTableName);
@@ -697,19 +694,14 @@ EnsureColocateWithTableIsValid(Oid relationId, char distributionMethod,
697694
char replicationModel = DecideDistTableReplicationModel(distributionMethod,
698695
colocateWithTableName);
699696

700-
/*
701-
* we fail transaction before local table conversion if the table could not be colocated with
702-
* given table. We should make those checks after local table conversion by acquiring locks to
703-
* the relation because the distribution column can be modified in that period.
704-
*/
705-
Oid distributionColumnType = ColumnTypeIdForRelationColumnName(
706-
relationId,
707-
distributionColumnName);
708-
709697
text *colocateWithTableNameText = cstring_to_text(colocateWithTableName);
710698
Oid colocateWithTableId = ResolveRelationId(colocateWithTableNameText, false);
699+
700+
Var *distributionColumn = BuildDistributionKeyFromColumnName(relationId,
701+
distributionColumnName,
702+
NoLock);
711703
EnsureTableCanBeColocatedWith(relationId, replicationModel,
712-
distributionColumnType, colocateWithTableId);
704+
distributionColumn, colocateWithTableId);
713705
}
714706

715707

@@ -1993,10 +1985,11 @@ ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
19931985
* until this transaction is committed.
19941986
*/
19951987

1988+
/* distributionColumn can only be null for single-shard tables */
19961989
Oid distributionColumnType =
19971990
distributionColumn ? distributionColumn->vartype : InvalidOid;
19981991
Oid distributionColumnCollation =
1999-
distributionColumn ? get_typcollation(distributionColumnType) : InvalidOid;
1992+
distributionColumn ? distributionColumn->varcollid : InvalidOid;
20001993

20011994
Assert(distributedTableParams->colocationParam.colocationParamType ==
20021995
COLOCATE_WITH_TABLE_LIKE_OPT);
@@ -2011,8 +2004,7 @@ ColocationIdForNewTable(Oid relationId, CitusTableType tableType,
20112004

20122005
colocationId = FindColocateWithColocationId(relationId,
20132006
citusTableParams.replicationModel,
2014-
distributionColumnType,
2015-
distributionColumnCollation,
2007+
distributionColumn,
20162008
distributedTableParams->shardCount,
20172009
distributedTableParams->
20182010
shardCountIsStrict,

src/backend/distributed/sql/citus--13.2-1--14.0-1.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@
33

44
#include "udfs/citus_prepare_pg_upgrade/14.0-1.sql"
55
#include "udfs/citus_finish_pg_upgrade/14.0-1.sql"
6+
#include "udfs/fix_pre_citus14_colocation_group_collation_mismatches/14.0-1.sql"
7+
#include "udfs/citus_finish_citus_upgrade/14.0-1.sql"

src/backend/distributed/sql/downgrades/citus--14.0-1--13.2-1.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@
33

44
#include "../udfs/citus_prepare_pg_upgrade/13.0-1.sql"
55
#include "../udfs/citus_finish_pg_upgrade/13.2-1.sql"
6+
#include "../udfs/citus_finish_citus_upgrade/11.0-2.sql"
7+
DROP FUNCTION IF EXISTS pg_catalog.fix_pre_citus14_colocation_group_collation_mismatches();

src/backend/distributed/sql/udfs/citus_finish_citus_upgrade/14.0-1.sql

Lines changed: 54 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/backend/distributed/sql/udfs/citus_finish_citus_upgrade/latest.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ BEGIN
3333
performed_upgrade := true;
3434
END IF;
3535

36+
IF last_upgrade_major_version < 14 THEN
37+
PERFORM fix_pre_citus14_colocation_group_collation_mismatches();
38+
performed_upgrade := true;
39+
END IF;
40+
3641
-- add new upgrade steps here
3742

3843
IF NOT performed_upgrade THEN

src/backend/distributed/sql/udfs/fix_pre_citus14_colocation_group_collation_mismatches/14.0-1.sql

Lines changed: 79 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
CREATE OR REPLACE FUNCTION pg_catalog.fix_pre_citus14_colocation_group_collation_mismatches()
2+
RETURNS VOID AS $func$
3+
DECLARE
4+
v_colocationid oid;
5+
v_tables_to_move_out_grouped_by_collation json;
6+
v_collationid oid;
7+
v_tables_to_move_out oid[];
8+
v_table_to_move_out oid;
9+
v_first_table oid;
10+
BEGIN
11+
SET LOCAL search_path TO pg_catalog;
12+
13+
FOR v_colocationid, v_tables_to_move_out_grouped_by_collation
14+
IN
15+
WITH colocation_groups_and_tables_with_collation_mismatches AS (
16+
SELECT pdc.colocationid, pa.attcollation as distkeycollation, pdp.logicalrelid
17+
FROM pg_dist_colocation pdc
18+
JOIN pg_dist_partition pdp
19+
ON pdc.colocationid = pdp.colocationid
20+
JOIN pg_attribute pa
21+
ON pa.attrelid = pdp.logicalrelid
22+
AND pa.attname = column_to_column_name(pdp.logicalrelid, pdp.partkey)
23+
-- ignore the table if its distribution column collation matches the collation saved for the colocation group
24+
WHERE pdc.distributioncolumncollation != pa.attcollation
25+
)
26+
SELECT
27+
colocationid,
28+
json_object_agg(distkeycollation, rels) AS tables_to_move_out_grouped_by_collation
29+
FROM (
30+
SELECT colocationid,
31+
distkeycollation,
32+
array_agg(logicalrelid::oid ORDER BY logicalrelid) AS rels
33+
FROM colocation_groups_and_tables_with_collation_mismatches
34+
GROUP BY colocationid, distkeycollation
35+
) q
36+
GROUP BY colocationid
37+
LOOP
38+
RAISE DEBUG 'Processing colocation group with id %', v_colocationid;
39+
40+
FOR v_collationid, v_tables_to_move_out
41+
IN
42+
SELECT key::oid AS collationid,
43+
array_agg(elem::oid) AS tables_to_move_out
44+
FROM json_each(v_tables_to_move_out_grouped_by_collation) AS e(key, value),
45+
LATERAL json_array_elements_text(e.value) AS elem
46+
GROUP BY key
47+
LOOP
48+
RAISE DEBUG ' Moving out tables with collation id % from colocation group %', v_collationid, v_colocationid;
49+
50+
v_first_table := NULL;
51+
52+
FOR v_table_to_move_out IN SELECT unnest(v_tables_to_move_out)
53+
LOOP
54+
IF v_first_table IS NULL then
55+
-- Move the first table out to start a new colocation group.
56+
--
57+
-- Could check if there is an appropriate colocation group to move to instead of 'none',
58+
-- but this won't be super easy. Plus, even if we had such a colocation group, the user
59+
-- was anyways okay with having this in a different colocation group in the first place.
60+
61+
RAISE DEBUG ' Moving out table with id % to a new colocation group', v_table_to_move_out;
62+
PERFORM update_distributed_table_colocation(v_table_to_move_out, colocate_with => 'none');
63+
64+
-- save the first table to colocate the rest of the tables with it
65+
v_first_table := v_table_to_move_out;
66+
ELSE
67+
-- Move the rest of the tables to colocate with the first table.
68+
69+
RAISE DEBUG ' Moving out table with id % to colocate with table id %', v_table_to_move_out, v_first_table;
70+
PERFORM update_distributed_table_colocation(v_table_to_move_out, colocate_with => v_first_table::regclass::text);
71+
END IF;
72+
END LOOP;
73+
END LOOP;
74+
END LOOP;
75+
END;
76+
$func$
77+
LANGUAGE plpgsql;
78+
COMMENT ON FUNCTION pg_catalog.fix_pre_citus14_colocation_group_collation_mismatches()
79+
IS 'Fix distributed tables whose colocation group collations do not match their distribution columns by moving them to new colocation groups';

0 commit comments

Comments
 (0)