Skip to content

Commit 040fce3

Browse files
pct960mmols
andauthored
Use Merkle trees in ACE (#253)
* Added build-mtree to build a merkle tree for a table * Fix node_list population * Also minimise critical section in compare_checksums * Use same block ranges on all nodes * Breakthrough: extending a merkle tree on new inserts works * Sweeping optimisations in table-diff * Use an optimised sql query instead of get_pkey_offsets * Fix pkey offsets query * Added feature to merge blocks on large deletes * Handle blocks splits and merges inside ACE * Fix some rebalancing bugs * Integrate merkle trees into ACE cli * Update mtree only splits blocks * Merges happen only when --rebalance=true is passed * Adding mtree diff * First version of mtree diff ready * Add progress bar for splits and merges * Use always trigger for tracking dirty blocks * Update cryptography to address CVE * No longer using OrderedSet for comparisons * Handle mismatching tree levels * Use generic trigger functions * Initialise mtree objects once per DB node * More cleanup * Tweaks to get all base tests to pass * Use SQL composables; fix rebalance issues * Unify pkey offset computations * Add support for specifying a ranges file * Remove node len check for mtree build * Update leaf hash during mtree update * Temp fix for range boundary issue * Address boundary issues using a lookup table * Use prepared statements * Sunset batches option and async rerun * Add block boundary and repset-diff tests * Remove explicit stmt.close() * Add support for composite keys; use stmt triggers * SQL cleanup * Add mtree init, teardown; fix block size usage * Added merkle tree tests * Use mogrify during repairs * Prior use of executemany would internally call execute multiple times, thereby making repairs slow because of statement triggers from mtrees * Fix write-ranges to use str by default * Add support for non-numeric datatypes in tracking triggers * Fix conn establishment in cleanup * Address codacy issues * Addressed more codacy issues * String literal fix * Codacy fix #4 * Codacy fix #5 * Use nosemgrep * More nosemgreps * Add mtree cli helptext * Fix metadata task type * Add metadata tracking for mtree modules * Move error codes out of config file * Use a separate consts file * Revamp ACE CLI invocation * Group mtree cmds into a sub-cmd * Treat each of table-diff, -repair, etc. as a top-level sub-cmd * Update tests * Fix table names in mtree test * Minor fixes * Help texts mostly fixed * fix fire.py helptext generation for mtree submodule * generate help for ace mtree submodule * Backward compatibility fixes * Rename 'override-block-size' to 'skip-block-size-check' to free up -o * Add back 'block_rows' as an alias of 'block_size' * Add back 'behavior' in rerun and mark as deprecated * Improve merkle tree help text * Fix tests * update generated helptext * Ensure pgcrypto is present * Fix spock diff --------- Co-authored-by: Matthew Mols <matt@pgedge.com>
1 parent cd59123 commit 040fce3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+8210
-1721
lines changed

cli/genHelp.sh

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,26 @@ export nc=../out/posix/pgedge
44
export output_dir=../docs
55

66
modules=(ace cluster db localhost service spock um)
7+
78
commands=(setup upgrade-cli)
89

910
mkdir -p "$output_dir"
1011

12+
13+
get_submodules(){
14+
local module="$1"
15+
16+
if [[ "$module" == "ace" ]]; then
17+
echo "mtree"
18+
fi
19+
}
1120
parse_to_markdown(){
1221
sed -r 's/\x1B\[[0-9;]*[mGKH]//g; /^(SYNOPSIS|POSITIONAL ARGUMENTS|DESCRIPTION|FLAGS|COMMANDS)/s/^/## /'
1322
}
1423

1524
get_module_commands() {
1625
local module="$1"
17-
local module_file="$output_dir/functions/$module.md"
26+
local module_file="$output_dir/functions/${module// /-}.md"
1827
local cmds=()
1928
if [[ -f "$module_file" ]]; then
2029
local in_commands=0
@@ -57,7 +66,7 @@ module_summary() {
5766
write_help() {
5867
# Generate help for a command or module (and its subcommands)
5968
local module="$1"
60-
$nc $module --help 2>/dev/null | parse_to_markdown > "$output_dir/functions/$module.md";
69+
$nc $module --help 2>/dev/null | parse_to_markdown > "$output_dir/functions/${module// /-}.md";
6170

6271
# Parse the generated module help file to extract subcommands (if they exist)
6372
module_commands=($(get_module_commands "$module"))
@@ -69,13 +78,25 @@ write_help() {
6978

7079
# Generate help for each command in the module
7180
for cmd in "${module_commands[@]}"; do
72-
local fname="${module}-$(echo "$cmd" | tr ' ' '-').md"
81+
local fname="${module// /-}-${cmd// /-}.md"
7382
echo "Generating help for module '$module', command '$cmd' -> $fname"
7483

7584
if ! $nc $module $cmd --help 2>/dev/null | parse_to_markdown > "$output_dir/functions/$fname"; then
7685
echo "ERROR: Failed to generate help for module '$module', command '$cmd'" >&2
7786
fi
7887
done
88+
89+
# If the module has submodules, recursively generate help for them
90+
local submodules=($(get_submodules "$module"))
91+
if [ ${#submodules[@]} -gt 0 ]; then
92+
echo "Found submodules for module '$module': ${submodules[*]}"
93+
for submodule in "${submodules[@]}"; do
94+
echo "Generating help for submodule '$submodule' in module '$module'"
95+
write_help "$module $submodule"
96+
done
97+
else
98+
echo "No submodules found for module '$module'"
99+
fi
79100
}
80101

81102
index() {
@@ -132,6 +153,32 @@ index() {
132153
' "$module_file" >> "$index_file"
133154
echo "" >> "$index_file"
134155

156+
for submodule in $(get_submodules "$module"); do
157+
echo "### $module $submodule submodule commands" >> "$index_file"
158+
echo "" >> "$index_file"
159+
echo "| Command | Description |" >> "$index_file"
160+
echo "|---------|-------------|" >> "$index_file"
161+
162+
# Parse the -submodule.md file to extract commands and descriptions
163+
submodule_file="$output_dir/functions/${module// /-}-${submodule// /-}.md"
164+
awk -v module="$module" -v submodule="$submodule" '
165+
BEGIN { in_commands=0 }
166+
/COMMAND is one of the following:/ { in_commands=1; next }
167+
in_commands && /^[[:space:]]*$/ { exit }
168+
in_commands && /^[[:space:]]*[^[:space:]]/ {
169+
split($0, parts, "#")
170+
cmd=parts[1]
171+
gsub(/^[ \t]+|[ \t]+$/, "", cmd)
172+
desc=parts[2]
173+
gsub(/^[ \t]+|[ \t]+$/, "", desc)
174+
if (cmd != "") {
175+
printf "| [%s %s](functions/%s-%s-%s.md) | %s |\n", module, submodule, module, submodule, cmd, desc
176+
}
177+
}
178+
' "$submodule_file" >> "$index_file"
179+
echo "" >> "$index_file"
180+
done
181+
135182
done
136183
}
137184

@@ -146,7 +193,7 @@ if [ "$m" == "all" ]; then
146193
echo "Generating help for all modules..."
147194
echo "Removing existing help files..."
148195
rm -f $output_dir/functions/*
149-
196+
150197
# Loop through all modules and generate help
151198
for module in "${modules[@]}"; do
152199
write_help "$module"

cli/scripts/ace-tests/conftest.py

Lines changed: 57 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import test_config
1010
from test_simple_base import TestSimpleBase
1111
from test_simple import TestSimple
12+
from test_merkle_trees_simple import TestMerkleTreesSimple
1213

1314
# Set up paths
1415
os.environ["PGEDGE_HOME"] = test_config.PGEDGE_HOME
@@ -49,7 +50,12 @@ def set_run_dir():
4950

5051
@pytest.fixture(scope="session")
5152
def cli():
52-
return load_mod("ace_cli")
53+
return load_mod("ace_cli").AceCLI()
54+
55+
56+
@pytest.fixture(scope="session")
57+
def mtree_cli(cli):
58+
return cli.mtree()
5359

5460

5561
@pytest.fixture(scope="session")
@@ -226,13 +232,24 @@ def prepare_spock(node):
226232
sleep(5)
227233

228234

235+
def pytest_addoption(parser):
236+
parser.addoption(
237+
"--skip-cleanup", action="store_true", help="Skip DB cleanup fixture"
238+
)
239+
240+
229241
@pytest.fixture(scope="session", autouse=True)
230-
def cleanup_databases(nodes):
242+
def cleanup_databases(request, nodes):
231243
"""Cleanup all databases after running tests"""
232244

233245
# Yield to let the tests run first
234246
yield
235247

248+
skip = request.config.getoption("--skip-cleanup")
249+
250+
if skip:
251+
pytest.skip("Skipping DB cleanup")
252+
236253
# Cleanup code that runs after all tests complete
237254
drop_customers_sql = "DROP TABLE IF EXISTS customers CASCADE;"
238255

@@ -335,22 +352,43 @@ def pytest_configure(config):
335352

336353

337354
def pytest_collection_modifyitems(items):
338-
"""Skip tests marked as abstract_base if they are in the base class."""
355+
"""
356+
Skips tests from TestSimpleBase as they should not be run directly.
357+
"""
339358
for item in items:
340-
if item.get_closest_marker("abstract_base"):
341-
# Skip only if the test is in TestSimpleBase class directly
342-
# or if the test method is not overridden in the child class
343-
if item.cls and (
344-
(
345-
item.cls.__name__ == "TestSimpleBase"
346-
and (
347-
issubclass(item.cls, TestSimpleBase)
348-
and item.function.__qualname__.startswith("TestSimpleBase.")
349-
)
350-
)
351-
or (
352-
issubclass(item.cls, TestSimple)
353-
and item.function.__qualname__.startswith("TestSimple.")
359+
if (
360+
item.cls
361+
and issubclass(item.cls, TestSimpleBase)
362+
and item.function.__qualname__.startswith("TestSimpleBase.")
363+
):
364+
item.add_marker(
365+
pytest.mark.skip(
366+
reason="TestSimpleBase tests are not meant to be run directly"
354367
)
355-
):
356-
item.add_marker(pytest.mark.skip(reason="Abstract base class"))
368+
)
369+
370+
371+
def pytest_runtest_setup(item):
372+
"""
373+
Skip parent class tests if a child class test is also in the run.
374+
"""
375+
if not item.get_closest_marker("abstract_base"):
376+
return
377+
378+
if item.cls is TestMerkleTreesSimple:
379+
is_child_running = any(
380+
i.cls
381+
and issubclass(i.cls, TestMerkleTreesSimple)
382+
and i.cls is not TestMerkleTreesSimple
383+
for i in item.session.items
384+
)
385+
if is_child_running:
386+
pytest.skip("Skipping parent class")
387+
388+
if item.cls is TestSimple:
389+
is_child_running = any(
390+
i.cls and issubclass(i.cls, TestSimple) and i.cls is not TestSimple
391+
for i in item.session.items
392+
)
393+
if is_child_running:
394+
pytest.skip("Skipping parent class")

cli/scripts/ace-tests/test_api.py

Lines changed: 8 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from test_simple_base import TestSimpleBase
99

1010

11+
# @pytest.mark.skip(reason="Skipping API tests")
1112
@pytest.mark.usefixtures("prepare_databases")
1213
class TestAPI(TestSimpleBase):
1314
@pytest.fixture(scope="class", autouse=True)
@@ -52,7 +53,7 @@ def test_simple_table_diff(self, cli, capsys, ace_conf, table_name):
5253
"cluster_name": "eqn-t9da",
5354
"table_name": table_name,
5455
"dbname": "demo",
55-
"block_rows": 10000,
56+
"block_size": 10000,
5657
"max_cpu_ratio": 0.6,
5758
"output": "json",
5859
"nodes": "all",
@@ -137,7 +138,7 @@ def test_table_diff_with_differences(
137138
"cluster_name": "eqn-t9da",
138139
"table_name": table_name,
139140
"dbname": "demo",
140-
"block_rows": 10000,
141+
"block_size": 10000,
141142
"max_cpu_ratio": 0.6,
142143
"output": "json",
143144
"nodes": "all",
@@ -307,7 +308,7 @@ def test_table_rerun_temptable(
307308
"cluster_name": "eqn-t9da",
308309
"table_name": table_name,
309310
"dbname": "demo",
310-
"block_rows": 10000,
311+
"block_size": 10000,
311312
"max_cpu_ratio": 0.6,
312313
"output": "json",
313314
"nodes": "all",
@@ -358,7 +359,6 @@ def test_table_rerun_temptable(
358359
"diff_file": diff_file_path.path,
359360
"table_name": table_name,
360361
"dbname": "demo",
361-
"behavior": "hostdb",
362362
"quiet": False,
363363
}
364364

@@ -417,93 +417,11 @@ def test_table_rerun_temptable(
417417
"-modified"
418418
), f"Modified row {diff[key_column]} doesn't have expected suffix"
419419

420-
except Exception as e:
421-
pytest.fail(f"Test failed: {str(e)}")
422-
423-
@pytest.mark.parametrize("table_name", ["public.customers"])
424-
def test_table_rerun_multiprocessing(
425-
self,
426-
cli,
427-
capsys,
428-
ace_conf,
429-
table_name,
430-
diff_file_path,
431-
):
432-
"""Test table rerun API (multiprocessing mode) on cluster eqn-t9da"""
433-
max_retries = 30
434-
retry_count = 0
435-
task_completed = False
436-
cert_config = self._get_cert_config(ace_conf)
437-
438-
try:
439-
rerun_payload = {
440-
"cluster_name": "eqn-t9da",
441-
"diff_file": diff_file_path.path,
442-
"table_name": table_name,
443-
"dbname": "demo",
444-
"behavior": "multiprocessing",
445-
"quiet": False,
446-
}
447-
448-
rerun_response = requests.post(
449-
f"{self._get_api_base_url()}/table-rerun",
450-
json=rerun_payload,
451-
**cert_config,
452-
)
453-
454-
assert rerun_response.status_code == 200
455-
rerun_task_id = rerun_response.json()["task_id"]
456-
457-
# Wait for rerun to complete
458-
retry_count = 0
459-
task_completed = False
460-
461-
while retry_count < max_retries and not task_completed:
462-
status_response = requests.get(
463-
f"{self._get_api_base_url()}/task-status",
464-
params={"task_id": rerun_task_id},
465-
**cert_config,
466-
)
467-
468-
assert status_response.status_code == 200
469-
status_data = status_response.json()
470-
471-
if status_data["task_status"] == "COMPLETED":
472-
task_completed = True
473-
elif status_data["task_status"] == "FAILED":
474-
error_msg = status_data.get("error_message", "Unknown error")
475-
pytest.fail(f"Rerun task failed: {error_msg}")
476-
else:
477-
time.sleep(1)
478-
retry_count += 1
479-
480-
assert task_completed, "Rerun task did not complete within timeout period"
481-
482-
# Verify the diff file contains 50 differences
483-
with open(diff_file_path.path, "r") as f:
484-
diff_data = json.load(f)
485-
486-
assert (
487-
len(diff_data["diffs"]["n1/n2"]["n2"]) == 50
488-
), "Expected 50 differences"
489-
490-
# Verify the differences are correctly reported
491-
for diff in diff_data["diffs"]["n1/n2"]["n2"]:
492-
assert diff["first_name"].endswith(
493-
"-modified"
494-
), f"Modified row {diff['index']} doesn't have expected suffix"
495-
496-
# Verify the control rows are not modified
497-
for diff in diff_data["diffs"]["n1/n2"]["n1"]:
498-
assert not diff["first_name"].endswith(
499-
"-modified"
500-
), f"Control row {diff['index']} shouldn't have modification suffix"
501-
502420
# Repair to restore state
503-
cli.table_repair_cli(
504-
"eqn-t9da",
505-
table_name,
506-
diff_file_path.path,
421+
cli.table_repair(
422+
cluster_name="eqn-t9da",
423+
diff_file=diff_file_path.path,
424+
table_name=table_name,
507425
source_of_truth="n1",
508426
)
509427

0 commit comments

Comments
 (0)