diff --git a/test/clt-tests/replication/test-sst-progress.rec b/test/clt-tests/replication/test-sst-progress.rec new file mode 100644 index 0000000000..09fb372c7d --- /dev/null +++ b/test/clt-tests/replication/test-sst-progress.rec @@ -0,0 +1,196 @@ +––– comment ––– +Test for SST (State Snapshot Transfer) progress monitoring during table replication +Issue: https://github.com/manticoresoftware/effyis/issues/390 + +Problem: When adding a large table to cluster via ALTER CLUSTER ADD, there was no way +to track replication progress. Users couldn't determine how much was copied or remaining. + +Solution: Added SST progress variables to SHOW STATUS: +- cluster__sst_total: Overall progress (0-100) +- cluster__sst_stage: Current stage (e.g., "send files") +- cluster__sst_stage_total: Progress of current stage (0-100) +- cluster__sst_tables: Total tables being transferred +- cluster__sst_table: Current table being transferred (e.g., "3 (products)") +- cluster__node_state: Node state (donor/joiner/synced) + +This test validates that SST progress variables exist in SHOW STATUS output. +––– input ––– +apt-get update -y > /dev/null; echo $? +––– output ––– +0 +––– input ––– +apt-get install -y iproute2 procps > /dev/null; echo $? +––– output ––– +0 +––– comment ––– +Add watchdog = 0 to base config if not already present +––– input ––– +grep -q "watchdog" test/clt-tests/base/searchd-with-flexible-ports.conf && echo "watchdog already exists" || { sed -i '/^searchd {/,/^}/ s/^\([[:space:]]*\)}$/\1\twatchdog = 0\n\1}/' test/clt-tests/base/searchd-with-flexible-ports.conf && echo "watchdog added"; } +––– output ––– +#!/watchdog already exists|watchdog added/!# +––– comment ––– +Start node 1 +––– input ––– +export INSTANCE=1 +––– output ––– +––– input ––– +mkdir -p /var/{run,lib,log}/manticore-${INSTANCE} +––– output ––– +––– input ––– +stdbuf -oL searchd --logreplication -c test/clt-tests/base/searchd-with-flexible-ports.conf > /dev/null +––– output ––– +––– input ––– +if timeout 10 grep -qm1 '\[BUDDY\] started' <(tail -n 1000 -f /var/log/manticore-${INSTANCE}/searchd.log); then echo 'Buddy started!'; else echo 'Timeout or failed!'; cat /var/log/manticore-${INSTANCE}/searchd.log; fi +––– output ––– +Buddy started! +––– comment ––– +Create cluster on node 1 +––– input ––– +mysql -h0 -P1306 -e "CREATE CLUSTER sst_test" +––– output ––– +––– input ––– +mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_status'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_status + Value: primary +––– comment ––– +Create table with MANY fields to make it large on disk +––– input ––– +mysql -h0 -P1306 -e "CREATE TABLE test_table (id bigint, title text, content text, description text, tags text, author text, category text, summary text, attr1 int, attr2 int, attr3 int, attr4 int, attr5 int, attr6 int, attr7 int, attr8 int, attr9 int, attr10 int, price float, rating float, created bigint, json_data json, str1 string, str2 string, str3 string, str4 string, str5 string)" +––– output ––– +––– input ––– +mysql -h0 -P1306 -e "ALTER CLUSTER sst_test ADD test_table" +––– output ––– +––– input ––– +mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_indexes'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_indexes + Value: test_table +––– comment ––– +Insert LARGE amount of data with many fields to create big disk chunks +Insert 30000 documents in 5000 batches of 6 documents each +Each document has lots of text fields and attributes to make it large on disk +––– input ––– +for batch in $(seq 1 5000); do mysql -h0 -P1306 -e "INSERT INTO sst_test:test_table(title,content,description,tags,author,category,summary,attr1,attr2,attr3,attr4,attr5,attr6,attr7,attr8,attr9,attr10,price,rating,created,json_data,str1,str2,str3,str4,str5) VALUES('Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor','Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat duis aute irure dolor','Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum','web development programming software engineering technology computer science artificial intelligence machine learning data science cloud computing devops','John Smith','Technology', 'Summary of the article about modern technology and software development practices in the current era',1,2,3,4,5,6,7,8,9,10,99.99,4.5,1234567890,'{\"key\":\"value\",\"items\":[1,2,3],\"metadata\":{\"type\":\"article\"}}','category1','tag1','attribute1','metadata1','value1'),('Consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore','Tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat Duis aute irure dolor in reprehenderit','In reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum sed ut','database architecture cloud computing devops kubernetes docker containers microservices serverless functions','Jane Doe','Engineering','Deep dive into cloud architecture and modern deployment strategies for scalable systems',11,12,13,14,15,16,17,18,19,20,149.99,4.8,1234567891,'{\"type\":\"article\",\"tags\":[\"tech\",\"cloud\"],\"author\":\"Jane\"}','category2','tag2','attribute2','metadata2','value2'),('Sed do eiusmod tempor incididunt ut labore et dolore magna','Et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat Duis aute irure dolor in reprehenderit in voluptate velit esse','Velit esse cillum dolore eu fugiat nulla pariatur Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum sed ut perspiciatis unde omnis','api rest graphql websockets realtime communication protocols networking infrastructure distributed systems','Bob Johnson','Architecture','Understanding modern API design patterns and best practices for distributed applications',21,22,23,24,25,26,27,28,29,30,199.99,4.2,1234567892,'{\"format\":\"json\",\"version\":1,\"schema\":\"v2\"}','category3','tag3','attribute3','metadata3','value3'),('Magna aliqua Ut enim ad minim veniam quis nostrud','Quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur','Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum sed ut perspiciatis unde omnis iste natus error','microservices architecture patterns best practices design principles solid clean code','Alice Williams','Development','Comprehensive guide to microservices architecture and implementation strategies',31,32,33,34,35,36,37,38,39,40,249.99,4.7,1234567893,'{\"service\":\"microservice\",\"version\":\"1.0\"}','category4','tag4','attribute4','metadata4','value4'),('Exercitation ullamco laboris nisi ut aliquip ex ea','Ex ea commodo consequat Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur Excepteur sint occaecat cupidatat non proident','Sunt in culpa qui officia deserunt mollit anim id est laborum sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque','performance optimization monitoring observability metrics logging tracing debugging profiling','Charlie Brown','Performance','Advanced performance optimization techniques for modern web applications',41,42,43,44,45,46,47,48,49,50,179.99,4.6,1234567894,'{\"perf\":\"optimization\",\"tools\":[\"prometheus\"]}','category5','tag5','attribute5','metadata5','value5'),('Commodo consequat Duis aute irure dolor in reprehenderit','Dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit','Anim id est laborum sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium totam rem aperiam eaque','security authentication authorization encryption cryptography oauth jwt tokens session','David Miller','Security','Essential security practices and authentication mechanisms for web services',51,52,53,54,55,56,57,58,59,60,299.99,4.9,1234567895,'{\"security\":\"critical\",\"level\":\"high\"}','category6','tag6','attribute6','metadata6','value6')"; done > /dev/null 2>&1; echo "Insert completed" +––– output ––– +Insert completed +––– comment ––– +Verify data was inserted +––– input ––– +count=$(mysql -h0 -P1306 -sN -e "SELECT COUNT(*) FROM sst_test:test_table" | grep -oE '[0-9]+' | head -1); echo "Documents inserted: $count" +––– output ––– +#!/Documents inserted: [0-9]+/!# +––– comment ––– +Wait briefly to allow some data to be written to disk +––– input ––– +sleep 2 +––– output ––– +––– comment ––– +Start background load to continue inserting during SST to slow it down +––– input ––– +(for i in $(seq 1 5000); do mysql -h0 -P1306 -e "INSERT INTO sst_test:test_table(title,content,description,attr1,attr2,attr3,str1,str2) VALUES('Background doc $i','Background content with lots of text to make it large','More description text here',$i,$i,$i,'str1','str2')"; done) > /dev/null 2>&1 & LOAD_PID=$!; sleep 1; echo "Background load started: $LOAD_PID" +––– output ––– +Background load started: %{NUMBER} +––– comment ––– +Start node 2 +––– input ––– +export INSTANCE=2 +––– output ––– +––– input ––– +mkdir -p /var/{run,lib,log}/manticore-${INSTANCE} +––– output ––– +––– input ––– +stdbuf -oL searchd --logreplication -c test/clt-tests/base/searchd-with-flexible-ports.conf > /dev/null +––– output ––– +––– input ––– +if timeout 10 grep -qm1 '\[BUDDY\] started' <(tail -n 1000 -f /var/log/manticore-${INSTANCE}/searchd.log); then echo 'Buddy started!'; else echo 'Timeout or failed!'; cat /var/log/manticore-${INSTANCE}/searchd.log; fi +––– output ––– +Buddy started! +––– comment ––– +Join node 2 to cluster in background - this triggers SST +We check metrics immediately while SST is in progress +––– input ––– +mysql -h0 -P2306 -e "JOIN CLUSTER sst_test AT '127.0.0.1:1312'" > /dev/null 2>&1 & +––– output ––– +––– comment ––– +Sleep briefly then check SST metrics WHILE transfer is happening +––– input ––– +sleep 0.5; mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_node_state'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_node_state + Value: #!/(donor|joiner|synced)/!# +––– comment ––– +Verify all 5 SST progress variables exist +––– input ––– +mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_sst_%'\G" | grep "Counter:" | wc -l +––– output ––– +5 +––– comment ––– +Check that each individual SST variable exists +Values will be "-" if SST completed, or numbers/stage names if still active +––– input ––– +mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_sst_total'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_sst_total + Value: #!/([0-9]{1,2}|100|-)/!# +––– input ––– +mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_sst_stage'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_sst_stage + Value: #!/([a-z ]+|-)/!# +––– input ––– +mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_sst_stage_total'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_sst_stage_total + Value: #!/([0-9]{1,2}|100|-)/!# +––– input ––– +mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_sst_tables'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_sst_tables + Value: #!/([0-9]+|-)/!# +––– input ––– +mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_sst_table'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_sst_table + Value: #!/([0-9]+ \([a-z_]+\)|-)/!# +––– comment ––– +Verify both nodes are synced +––– input ––– +bash -c 'end=$((SECONDS+60)); while [ $SECONDS -lt $end ]; do all_synced=true; for port in 1306 2306; do mysql -h0 -P$port -e "SHOW STATUS LIKE '\''cluster_sst_test_status'\''\G" > /tmp/status_$port.log 2>/dev/null && grep -q "Value: primary" /tmp/status_$port.log || { all_synced=false; break; }; done; if $all_synced; then for port in 1306 2306; do echo "Port $port: Node synced"; done; exit 0; fi; sleep 1; done; echo "Timeout waiting for nodes to sync!"; exit 1' +––– output ––– +Port 1306: Node synced +Port 2306: Node synced +––– comment ––– +Verify table exists on both nodes +––– input ––– +mysql -h0 -P1306 -e "SHOW STATUS LIKE 'cluster_sst_test_indexes'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_indexes + Value: test_table +––– input ––– +mysql -h0 -P2306 -e "SHOW STATUS LIKE 'cluster_sst_test_indexes'\G" +––– output ––– +*************************** 1. row *************************** +Counter: cluster_sst_test_indexes + Value: test_table +––– comment ––– +Stop background load process +––– input ––– +kill $LOAD_PID 2>/dev/null; wait $LOAD_PID 2>/dev/null; echo "Background load stopped" +––– output ––– +Background load stopped +––– comment ––– +Cleanup: Remove watchdog from base config to avoid affecting other tests +––– input ––– +sed -i '/watchdog = 0/d' test/clt-tests/base/searchd-with-flexible-ports.conf; echo $? +––– output ––– +0