@@ -22,8 +22,8 @@ def create_buckets_s3(cluster, files=1000):
2222 s3_data = []
2323
2424 for file_number in range (files ):
25- file_name = f"data/generated /file_{ file_number } .csv"
26- os .makedirs (os .path .join (SCRIPT_DIR , "data/generated /" ), exist_ok = True )
25+ file_name = f"data/generated_ { files } /file_{ file_number } .csv"
26+ os .makedirs (os .path .join (SCRIPT_DIR , f "data/generated_ { files } /" ), exist_ok = True )
2727 s3_data .append (file_name )
2828 with open (os .path .join (SCRIPT_DIR , file_name ), "w+" , encoding = "utf-8" ) as f :
2929 # a String, b UInt64
@@ -69,15 +69,17 @@ def started_cluster():
6969 logging .info ("Cluster started" )
7070
7171 create_buckets_s3 (cluster )
72+ create_buckets_s3 (cluster , files = 3 )
7273
7374 yield cluster
7475 finally :
75- shutil .rmtree (os .path .join (SCRIPT_DIR , "data/generated/" ), ignore_errors = True )
76+ shutil .rmtree (os .path .join (SCRIPT_DIR , "data/generated_1000/" ), ignore_errors = True )
77+ shutil .rmtree (os .path .join (SCRIPT_DIR , "data/generated_3/" ), ignore_errors = True )
7678 cluster .shutdown ()
7779
7880
7981def check_s3_gets (cluster , node , expected_result , cluster_first , cluster_second , enable_filesystem_cache ,
80- lock_object_storage_task_distribution_ms ):
82+ lock_object_storage_task_distribution_ms , files = 1000 ):
8183 for host in list (cluster .instances .values ()):
8284 host .query ("SYSTEM DROP FILESYSTEM CACHE 'raw_s3_cache'" , ignore_error = True )
8385
@@ -92,7 +94,7 @@ def check_s3_gets(cluster, node, expected_result, cluster_first, cluster_second,
9294 result_first = node .query (
9395 f"""
9496 SELECT count(*)
95- FROM s3Cluster('{ cluster_first } ', 'http://minio1:9001/root/data/generated /*', 'minio', '{ minio_secret_key } ', 'CSV', 'a String, b UInt64')
97+ FROM s3Cluster('{ cluster_first } ', 'http://minio1:9001/root/data/generated_ { files } /*', 'minio', '{ minio_secret_key } ', 'CSV', 'a String, b UInt64')
9698 WHERE b=42
9799 SETTINGS { "," .join (f"{ k } ={ v } " for k , v in settings .items ())}
98100 """ ,
@@ -103,7 +105,7 @@ def check_s3_gets(cluster, node, expected_result, cluster_first, cluster_second,
103105 result_second = node .query (
104106 f"""
105107 SELECT count(*)
106- FROM s3Cluster('{ cluster_second } ', 'http://minio1:9001/root/data/generated /*', 'minio', '{ minio_secret_key } ', 'CSV', 'a String, b UInt64')
108+ FROM s3Cluster('{ cluster_second } ', 'http://minio1:9001/root/data/generated_ { files } /*', 'minio', '{ minio_secret_key } ', 'CSV', 'a String, b UInt64')
107109 WHERE b=42
108110 SETTINGS { "," .join (f"{ k } ={ v } " for k , v in settings .items ())}
109111 """ ,
@@ -134,6 +136,40 @@ def check_s3_gets(cluster, node, expected_result, cluster_first, cluster_second,
134136 return int (s3_get_first ), int (s3_get_second )
135137
136138
139+ def check_s3_gets_by_hosts (cluster , node , expected_result ,
140+ lock_object_storage_task_distribution_ms , files = 1000 ):
141+ settings = {
142+ "enable_filesystem_cache" : False ,
143+ }
144+
145+ settings ["lock_object_storage_task_distribution_ms" ] = lock_object_storage_task_distribution_ms
146+ query_id = str (uuid .uuid4 ())
147+ result = node .query (
148+ f"""
149+ SELECT count(*)
150+ FROM s3Cluster('{ cluster } ', 'http://minio1:9001/root/data/generated_{ files } /*', 'minio', '{ minio_secret_key } ', 'CSV', 'a String, b UInt64')
151+ WHERE b=42
152+ SETTINGS { "," .join (f"{ k } ={ v } " for k , v in settings .items ())}
153+ """ ,
154+ query_id = query_id ,
155+ )
156+ assert result == expected_result
157+
158+ node .query (f"SYSTEM FLUSH LOGS ON CLUSTER { cluster } " )
159+
160+ s3_get = node .query (
161+ f"""
162+ SELECT ProfileEvents['S3GetObject']
163+ FROM clusterAllReplicas('{ cluster } ', system.query_log)
164+ WHERE type='QueryFinish'
165+ AND initial_query_id='{ query_id } '
166+ ORDER BY hostname
167+ """ ,
168+ )
169+
170+ return [int (events ) for events in s3_get .strip ().split ("\n " )]
171+
172+
137173def check_s3_gets_repeat (cluster , node , expected_result , cluster_first , cluster_second , enable_filesystem_cache ,
138174 lock_object_storage_task_distribution_ms ):
139175 # Repeat test several times to get average result
@@ -154,7 +190,7 @@ def test_cache_locality(started_cluster, lock_object_storage_task_distribution_m
154190 expected_result = node .query (
155191 f"""
156192 SELECT count(*)
157- FROM s3('http://minio1:9001/root/data/generated /*', 'minio', '{ minio_secret_key } ', 'CSV', 'a String, b UInt64')
193+ FROM s3('http://minio1:9001/root/data/generated_1000 /*', 'minio', '{ minio_secret_key } ', 'CSV', 'a String, b UInt64')
158194 WHERE b=42
159195 """
160196 )
@@ -170,26 +206,57 @@ def test_cache_locality(started_cluster, lock_object_storage_task_distribution_m
170206 (s3_get_first , s3_get_second ) = check_s3_gets_repeat (started_cluster , node , expected_result , 'cluster_12345' , 'cluster_12345' , 1 , lock_object_storage_task_distribution_ms )
171207 assert s3_get_second <= s3_get_first * dispersion
172208
173- # Different nodes order
209+ # Different replicas order
174210 (s3_get_first , s3_get_second ) = check_s3_gets_repeat (started_cluster , node , expected_result , 'cluster_12345' , 'cluster_34512' , 1 , lock_object_storage_task_distribution_ms )
175211 assert s3_get_second <= s3_get_first * dispersion
176212
177- # No last node
213+ # No last replica
178214 (s3_get_first , s3_get_second ) = check_s3_gets_repeat (started_cluster , node , expected_result , 'cluster_12345' , 'cluster_1234' , 1 , lock_object_storage_task_distribution_ms )
179- assert s3_get_second <= s3_get_first * (0.211 + dispersion ) # actual value - 24 for 100 files, 211 for 1000
215+ assert s3_get_second <= s3_get_first * (0.179 + dispersion ) # actual value - 179 of 1000 files changed replica
180216
181- # No first node
217+ # No first replica
182218 (s3_get_first , s3_get_second ) = check_s3_gets_repeat (started_cluster , node , expected_result , 'cluster_12345' , 'cluster_2345' , 1 , lock_object_storage_task_distribution_ms )
183- assert s3_get_second <= s3_get_first * (0.189 + dispersion ) # actual value - 12 for 100 files, 189 for 1000
219+ assert s3_get_second <= s3_get_first * (0.189 + dispersion ) # actual value - 189 of 1000 files changed replica
184220
185- # No first node , different nodes order
221+ # No first replica , different replicas order
186222 (s3_get_first , s3_get_second ) = check_s3_gets_repeat (started_cluster , node , expected_result , 'cluster_12345' , 'cluster_4523' , 1 , lock_object_storage_task_distribution_ms )
187223 assert s3_get_second <= s3_get_first * (0.189 + dispersion )
188224
189- # Add new node , different nodes order
225+ # Add new replica , different replicas order
190226 (s3_get_first , s3_get_second ) = check_s3_gets_repeat (started_cluster , node , expected_result , 'cluster_4523' , 'cluster_12345' , 1 , lock_object_storage_task_distribution_ms )
191227 assert s3_get_second <= s3_get_first * (0.189 + dispersion )
192228
193- # New node and old node, different nodes order
229+ # New replica and old replica, different replicas order
230+ # All files form removed replica changed replica
231+ # Some files form existed replicas changed replica on the new replica
194232 (s3_get_first , s3_get_second ) = check_s3_gets_repeat (started_cluster , node , expected_result , 'cluster_1234' , 'cluster_4523' , 1 , lock_object_storage_task_distribution_ms )
195- assert s3_get_second <= s3_get_first * (0.400 + dispersion ) # actual value - 36 for 100 files, 400 for 1000
233+ assert s3_get_second <= s3_get_first * (0.368 + dispersion ) # actual value - 368 of 1000 changed replica
234+
235+ if (lock_object_storage_task_distribution_ms > 0 ):
236+ s3_get = check_s3_gets_by_hosts ('cluster_12345' , node , expected_result , lock_object_storage_task_distribution_ms , files = 1000 )
237+ assert s3_get == [189 ,210 ,220 ,202 ,179 ]
238+ s3_get = check_s3_gets_by_hosts ('cluster_1234' , node , expected_result , lock_object_storage_task_distribution_ms , files = 1000 )
239+ assert s3_get == [247 ,243 ,264 ,246 ]
240+ s3_get = check_s3_gets_by_hosts ('cluster_2345' , node , expected_result , lock_object_storage_task_distribution_ms , files = 1000 )
241+ assert s3_get == [251 ,280 ,248 ,221 ]
242+
243+
244+ def test_cache_locality_few_files (started_cluster ):
245+ node = started_cluster .instances ["clickhouse0" ]
246+
247+ expected_result = node .query (
248+ f"""
249+ SELECT count(*)
250+ FROM s3('http://minio1:9001/root/data/generated_3/*', 'minio', '{ minio_secret_key } ', 'CSV', 'a String, b UInt64')
251+ WHERE b=42
252+ """
253+ )
254+
255+ # Rendezvous hash makes the next distribution:
256+ # file_0 - clickhouse1
257+ # file_1 - clickhouse4
258+ # file_2 - clickhouse3
259+ # The same distribution must be in each query
260+ for _ in range (10 ):
261+ s3_get = check_s3_gets_by_hosts ('cluster_12345' , node , expected_result , lock_object_storage_task_distribution_ms = 30000 , files = 3 )
262+ assert s3_get == [1 ,0 ,1 ,1 ,0 ]
0 commit comments