@@ -213,3 +213,72 @@ def test_remove_replica(started_cluster):
213213 "/etc/clickhouse-server/config.d/another_remote_servers.xml" ,
214214 ]
215215 )
216+
217+ def test_invalid_shard_directory_format (started_cluster ):
218+ """
219+ Test that ClickHouse doesn't crash when it encounters
220+ a malformed directory name like 'shard1_all_replicas_bkp'
221+ during distributed table initialization.
222+ """
223+ node .query ("drop table if exists test.dist_invalid sync" )
224+ node .query ("drop table if exists test.local_invalid sync" )
225+ node .query (
226+ "create table test.local_invalid (x UInt64, s String) engine = MergeTree order by x"
227+ )
228+ node .query (
229+ "create table test.dist_invalid (x UInt64, s String) "
230+ "engine = Distributed('test_cluster_internal_replication', test, local_invalid)"
231+ )
232+
233+ node .query (
234+ "insert into test.dist_invalid values (1, 'a'), (2, 'bb')" ,
235+ settings = {"use_compact_format_in_distributed_parts_names" : "1" },
236+ )
237+
238+ data_path = node .query (
239+ "SELECT arrayElement(data_paths, 1) FROM system.tables "
240+ "WHERE database='test' AND name='dist_invalid'"
241+ ).strip ()
242+
243+ # Create a malformed directory that would cause the bug
244+ malformed_dir = f"{ data_path } /shard1_all_replicas_bkp"
245+ node .exec_in_container (["mkdir" , "-p" , malformed_dir ])
246+
247+ # Create a dummy file so the directory isn't considered empty
248+ node .exec_in_container (["touch" , f"{ malformed_dir } /dummy.txt" ])
249+
250+ invalid_formats = [
251+ "shard1_all_replicas_backup" ,
252+ "shard1_all_replicas_old" ,
253+ "shard2_all_replicas_tmp" ,
254+ ]
255+ for invalid_dir in invalid_formats :
256+ invalid_path = f"{ data_path } /{ invalid_dir } "
257+ node .exec_in_container (["mkdir" , "-p" , invalid_path ])
258+ # just dummy file to have something in the directory
259+ node .exec_in_container (["touch" , f"{ invalid_path } /dummy.txt" ])
260+
261+ # Reproduce server restart with detach and attach
262+ node .query ("detach table test.dist_invalid" )
263+ node .query ("attach table test.dist_invalid" )
264+
265+ node .query ("SYSTEM FLUSH LOGS system.text_log" )
266+
267+ error_logs = node .query (
268+ """
269+ SELECT count()
270+ FROM system.text_log
271+ WHERE level = 'Error'
272+ AND message LIKE '%Invalid replica_index%'
273+ AND message LIKE '%shard1_all_replicas%'
274+ """
275+ ).strip ()
276+
277+ # We should have at least one error log for each malformed directory
278+ # But we don't strictly require this in case logging is disabled
279+ # The important thing is that the server didn't crash
280+ print (f"Found { error_logs } error log entries for invalid directories" )
281+
282+ # Clean up
283+ node .query ("drop table test.dist_invalid sync" )
284+ node .query ("drop table test.local_invalid sync" )
0 commit comments