Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
00429d5
Added build-mtree to build a merkle tree for a table
pct960 Jan 24, 2025
d4105a4
Merge branch 'REL25_01' into ace/merkle-trees
pct960 Jan 28, 2025
3f052e8
Fix node_list population
pct960 Jan 29, 2025
9a30c1c
Use same block ranges on all nodes
pct960 Jan 29, 2025
48f8422
Breakthrough: extending a merkle tree on new inserts works
pct960 Jan 31, 2025
1a2313a
Sweeping optimisations in table-diff
pct960 Feb 2, 2025
8930207
Use an optimised sql query instead of get_pkey_offsets
pct960 Feb 2, 2025
c0d0730
Fix pkey offsets query
pct960 Feb 2, 2025
8cea399
Added feature to merge blocks on large deletes
pct960 Feb 3, 2025
bc8ac75
Handle blocks splits and merges inside ACE
pct960 Feb 5, 2025
eaa8681
Fix some rebalancing bugs
pct960 Feb 5, 2025
88c85aa
Integrate merkle trees into ACE cli
pct960 Feb 7, 2025
84e918d
Update mtree only splits blocks
pct960 Feb 14, 2025
0aef4b1
Adding mtree diff
pct960 Feb 19, 2025
57df142
First version of mtree diff ready
pct960 Feb 20, 2025
b957ce2
Add progress bar for splits and merges
pct960 Feb 20, 2025
4761cf6
Use always trigger for tracking dirty blocks
pct960 Feb 21, 2025
50929a1
Update cryptography to address CVE
pct960 Feb 21, 2025
3f61bf7
No longer using OrderedSet for comparisons
pct960 Feb 25, 2025
fd52656
Handle mismatching tree levels
pct960 Feb 25, 2025
1d435df
Use generic trigger functions
pct960 Mar 3, 2025
901c4a9
Initialise mtree objects once per DB node
pct960 Mar 4, 2025
bd4ffe6
More cleanup
pct960 Mar 5, 2025
fe41a0a
Merge branch 'REL25_01' into ace/merkle-trees
pct960 Mar 5, 2025
09edea3
Tweaks to get all base tests to pass
pct960 Mar 10, 2025
9577803
Use SQL composables; fix rebalance issues
pct960 Mar 11, 2025
5306791
Unify pkey offset computations
pct960 Mar 12, 2025
1c2f40a
Add support for specifying a ranges file
pct960 Mar 13, 2025
bcf0734
Remove node len check for mtree build
pct960 Mar 13, 2025
b0b2e7d
Update leaf hash during mtree update
pct960 Mar 13, 2025
37e85a6
Temp fix for range boundary issue
pct960 Mar 13, 2025
36aae5c
Address boundary issues using a lookup table
pct960 Mar 14, 2025
0c4aab7
Use prepared statements
pct960 Mar 18, 2025
be026de
Sunset batches option and async rerun
pct960 Mar 18, 2025
07ed616
Add block boundary and repset-diff tests
pct960 Mar 19, 2025
85b91c3
Remove explicit stmt.close()
pct960 Mar 25, 2025
fd49596
Add support for composite keys; use stmt triggers
pct960 Mar 29, 2025
d4ebfbc
SQL cleanup
pct960 Mar 30, 2025
5a9dd2c
Add mtree init, teardown; fix block size usage
pct960 Mar 30, 2025
b04272e
Added merkle tree tests
pct960 Mar 31, 2025
a68c388
Use mogrify during repairs
pct960 Apr 7, 2025
d53b5e6
Fix write-ranges to use str by default
pct960 Apr 9, 2025
a1de782
Merge branch 'main' into ace/merkle-trees
pct960 Jun 5, 2025
c8c042f
Add support for non-numeric datatypes in tracking triggers
pct960 Jun 9, 2025
e87664a
Fix conn establishment in cleanup
pct960 Jun 10, 2025
e0a665f
Address codacy issues
pct960 Jun 10, 2025
34d6899
Addressed more codacy issues
pct960 Jun 10, 2025
9255639
String literal fix
pct960 Jun 10, 2025
ac34dc1
Codacy fix #4
pct960 Jun 10, 2025
bc35e9d
Codacy fix #5
pct960 Jun 10, 2025
8bee8af
Use nosemgrep
pct960 Jun 10, 2025
569e862
More nosemgreps
pct960 Jun 10, 2025
ed2da90
Add mtree cli helptext
pct960 Jun 15, 2025
352c50a
Fix metadata task type
pct960 Jun 15, 2025
471812b
Add metadata tracking for mtree modules
pct960 Jun 16, 2025
af4f293
Move error codes out of config file
pct960 Jun 16, 2025
226f04b
Use a separate consts file
pct960 Jun 16, 2025
d372ef4
Revamp ACE CLI invocation
pct960 Jun 18, 2025
4bd273f
Update tests
pct960 Jun 18, 2025
ce8524b
Fix table names in mtree test
pct960 Jun 18, 2025
c9162e1
Minor fixes
pct960 Jun 18, 2025
f9ad9d1
Help texts mostly fixed
pct960 Jun 18, 2025
daa8089
fix fire.py helptext generation for mtree submodule
mmols Jun 19, 2025
66b72ce
generate help for ace mtree submodule
mmols Jun 19, 2025
4314aa4
Backward compatibility fixes
pct960 Jun 23, 2025
7de65fd
Fix tests
pct960 Jun 23, 2025
f15ffdc
update generated helptext
mmols Jun 24, 2025
1d863fe
Ensure pgcrypto is present
pct960 Jun 25, 2025
fd59fdc
Fix spock diff
pct960 Jun 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 51 additions & 4 deletions cli/genHelp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,26 @@ export nc=../out/posix/pgedge
export output_dir=../docs

modules=(ace cluster db localhost service spock um)

commands=(setup upgrade-cli)

mkdir -p "$output_dir"


get_submodules(){
local module="$1"

if [[ "$module" == "ace" ]]; then
echo "mtree"
fi
}
parse_to_markdown(){
sed -r 's/\x1B\[[0-9;]*[mGKH]//g; /^(SYNOPSIS|POSITIONAL ARGUMENTS|DESCRIPTION|FLAGS|COMMANDS)/s/^/## /'
}

get_module_commands() {
local module="$1"
local module_file="$output_dir/functions/$module.md"
local module_file="$output_dir/functions/${module// /-}.md"
local cmds=()
if [[ -f "$module_file" ]]; then
local in_commands=0
Expand Down Expand Up @@ -57,7 +66,7 @@ module_summary() {
write_help() {
# Generate help for a command or module (and its subcommands)
local module="$1"
$nc $module --help 2>/dev/null | parse_to_markdown > "$output_dir/functions/$module.md";
$nc $module --help 2>/dev/null | parse_to_markdown > "$output_dir/functions/${module// /-}.md";

# Parse the generated module help file to extract subcommands (if they exist)
module_commands=($(get_module_commands "$module"))
Expand All @@ -69,13 +78,25 @@ write_help() {

# Generate help for each command in the module
for cmd in "${module_commands[@]}"; do
local fname="${module}-$(echo "$cmd" | tr ' ' '-').md"
local fname="${module// /-}-${cmd// /-}.md"
echo "Generating help for module '$module', command '$cmd' -> $fname"

if ! $nc $module $cmd --help 2>/dev/null | parse_to_markdown > "$output_dir/functions/$fname"; then
echo "ERROR: Failed to generate help for module '$module', command '$cmd'" >&2
fi
done

# If the module has submodules, recursively generate help for them
local submodules=($(get_submodules "$module"))
if [ ${#submodules[@]} -gt 0 ]; then
echo "Found submodules for module '$module': ${submodules[*]}"
for submodule in "${submodules[@]}"; do
echo "Generating help for submodule '$submodule' in module '$module'"
write_help "$module $submodule"
done
else
echo "No submodules found for module '$module'"
fi
}

index() {
Expand Down Expand Up @@ -132,6 +153,32 @@ index() {
' "$module_file" >> "$index_file"
echo "" >> "$index_file"

for submodule in $(get_submodules "$module"); do
echo "### $module $submodule submodule commands" >> "$index_file"
echo "" >> "$index_file"
echo "| Command | Description |" >> "$index_file"
echo "|---------|-------------|" >> "$index_file"

# Parse the -submodule.md file to extract commands and descriptions
submodule_file="$output_dir/functions/${module// /-}-${submodule// /-}.md"
awk -v module="$module" -v submodule="$submodule" '
BEGIN { in_commands=0 }
/COMMAND is one of the following:/ { in_commands=1; next }
in_commands && /^[[:space:]]*$/ { exit }
in_commands && /^[[:space:]]*[^[:space:]]/ {
split($0, parts, "#")
cmd=parts[1]
gsub(/^[ \t]+|[ \t]+$/, "", cmd)
desc=parts[2]
gsub(/^[ \t]+|[ \t]+$/, "", desc)
if (cmd != "") {
printf "| [%s %s](functions/%s-%s-%s.md) | %s |\n", module, submodule, module, submodule, cmd, desc
}
}
' "$submodule_file" >> "$index_file"
echo "" >> "$index_file"
done

done
}

Expand All @@ -146,7 +193,7 @@ if [ "$m" == "all" ]; then
echo "Generating help for all modules..."
echo "Removing existing help files..."
rm -f $output_dir/functions/*

# Loop through all modules and generate help
for module in "${modules[@]}"; do
write_help "$module"
Expand Down
76 changes: 57 additions & 19 deletions cli/scripts/ace-tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import test_config
from test_simple_base import TestSimpleBase
from test_simple import TestSimple
from test_merkle_trees_simple import TestMerkleTreesSimple

# Set up paths
os.environ["PGEDGE_HOME"] = test_config.PGEDGE_HOME
Expand Down Expand Up @@ -49,7 +50,12 @@ def set_run_dir():

@pytest.fixture(scope="session")
def cli():
return load_mod("ace_cli")
return load_mod("ace_cli").AceCLI()


@pytest.fixture(scope="session")
def mtree_cli(cli):
return cli.mtree()


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -226,13 +232,24 @@ def prepare_spock(node):
sleep(5)


def pytest_addoption(parser):
parser.addoption(
"--skip-cleanup", action="store_true", help="Skip DB cleanup fixture"
)


@pytest.fixture(scope="session", autouse=True)
def cleanup_databases(nodes):
def cleanup_databases(request, nodes):
"""Cleanup all databases after running tests"""

# Yield to let the tests run first
yield

skip = request.config.getoption("--skip-cleanup")

if skip:
pytest.skip("Skipping DB cleanup")

# Cleanup code that runs after all tests complete
drop_customers_sql = "DROP TABLE IF EXISTS customers CASCADE;"

Expand Down Expand Up @@ -335,22 +352,43 @@ def pytest_configure(config):


def pytest_collection_modifyitems(items):
"""Skip tests marked as abstract_base if they are in the base class."""
"""
Skips tests from TestSimpleBase as they should not be run directly.
"""
for item in items:
if item.get_closest_marker("abstract_base"):
# Skip only if the test is in TestSimpleBase class directly
# or if the test method is not overridden in the child class
if item.cls and (
(
item.cls.__name__ == "TestSimpleBase"
and (
issubclass(item.cls, TestSimpleBase)
and item.function.__qualname__.startswith("TestSimpleBase.")
)
)
or (
issubclass(item.cls, TestSimple)
and item.function.__qualname__.startswith("TestSimple.")
if (
item.cls
and issubclass(item.cls, TestSimpleBase)
and item.function.__qualname__.startswith("TestSimpleBase.")
):
item.add_marker(
pytest.mark.skip(
reason="TestSimpleBase tests are not meant to be run directly"
)
):
item.add_marker(pytest.mark.skip(reason="Abstract base class"))
)


def pytest_runtest_setup(item):
"""
Skip parent class tests if a child class test is also in the run.
"""
if not item.get_closest_marker("abstract_base"):
return

if item.cls is TestMerkleTreesSimple:
is_child_running = any(
i.cls
and issubclass(i.cls, TestMerkleTreesSimple)
and i.cls is not TestMerkleTreesSimple
for i in item.session.items
)
if is_child_running:
pytest.skip("Skipping parent class")

if item.cls is TestSimple:
is_child_running = any(
i.cls and issubclass(i.cls, TestSimple) and i.cls is not TestSimple
for i in item.session.items
)
if is_child_running:
pytest.skip("Skipping parent class")
98 changes: 8 additions & 90 deletions cli/scripts/ace-tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from test_simple_base import TestSimpleBase


# @pytest.mark.skip(reason="Skipping API tests")
@pytest.mark.usefixtures("prepare_databases")
class TestAPI(TestSimpleBase):
@pytest.fixture(scope="class", autouse=True)
Expand Down Expand Up @@ -52,7 +53,7 @@ def test_simple_table_diff(self, cli, capsys, ace_conf, table_name):
"cluster_name": "eqn-t9da",
"table_name": table_name,
"dbname": "demo",
"block_rows": 10000,
"block_size": 10000,
"max_cpu_ratio": 0.6,
"output": "json",
"nodes": "all",
Expand Down Expand Up @@ -137,7 +138,7 @@ def test_table_diff_with_differences(
"cluster_name": "eqn-t9da",
"table_name": table_name,
"dbname": "demo",
"block_rows": 10000,
"block_size": 10000,
"max_cpu_ratio": 0.6,
"output": "json",
"nodes": "all",
Expand Down Expand Up @@ -307,7 +308,7 @@ def test_table_rerun_temptable(
"cluster_name": "eqn-t9da",
"table_name": table_name,
"dbname": "demo",
"block_rows": 10000,
"block_size": 10000,
"max_cpu_ratio": 0.6,
"output": "json",
"nodes": "all",
Expand Down Expand Up @@ -358,7 +359,6 @@ def test_table_rerun_temptable(
"diff_file": diff_file_path.path,
"table_name": table_name,
"dbname": "demo",
"behavior": "hostdb",
"quiet": False,
}

Expand Down Expand Up @@ -417,93 +417,11 @@ def test_table_rerun_temptable(
"-modified"
), f"Modified row {diff[key_column]} doesn't have expected suffix"

except Exception as e:
pytest.fail(f"Test failed: {str(e)}")

@pytest.mark.parametrize("table_name", ["public.customers"])
def test_table_rerun_multiprocessing(
self,
cli,
capsys,
ace_conf,
table_name,
diff_file_path,
):
"""Test table rerun API (multiprocessing mode) on cluster eqn-t9da"""
max_retries = 30
retry_count = 0
task_completed = False
cert_config = self._get_cert_config(ace_conf)

try:
rerun_payload = {
"cluster_name": "eqn-t9da",
"diff_file": diff_file_path.path,
"table_name": table_name,
"dbname": "demo",
"behavior": "multiprocessing",
"quiet": False,
}

rerun_response = requests.post(
f"{self._get_api_base_url()}/table-rerun",
json=rerun_payload,
**cert_config,
)

assert rerun_response.status_code == 200
rerun_task_id = rerun_response.json()["task_id"]

# Wait for rerun to complete
retry_count = 0
task_completed = False

while retry_count < max_retries and not task_completed:
status_response = requests.get(
f"{self._get_api_base_url()}/task-status",
params={"task_id": rerun_task_id},
**cert_config,
)

assert status_response.status_code == 200
status_data = status_response.json()

if status_data["task_status"] == "COMPLETED":
task_completed = True
elif status_data["task_status"] == "FAILED":
error_msg = status_data.get("error_message", "Unknown error")
pytest.fail(f"Rerun task failed: {error_msg}")
else:
time.sleep(1)
retry_count += 1

assert task_completed, "Rerun task did not complete within timeout period"

# Verify the diff file contains 50 differences
with open(diff_file_path.path, "r") as f:
diff_data = json.load(f)

assert (
len(diff_data["diffs"]["n1/n2"]["n2"]) == 50
), "Expected 50 differences"

# Verify the differences are correctly reported
for diff in diff_data["diffs"]["n1/n2"]["n2"]:
assert diff["first_name"].endswith(
"-modified"
), f"Modified row {diff['index']} doesn't have expected suffix"

# Verify the control rows are not modified
for diff in diff_data["diffs"]["n1/n2"]["n1"]:
assert not diff["first_name"].endswith(
"-modified"
), f"Control row {diff['index']} shouldn't have modification suffix"

# Repair to restore state
cli.table_repair_cli(
"eqn-t9da",
table_name,
diff_file_path.path,
cli.table_repair(
cluster_name="eqn-t9da",
diff_file=diff_file_path.path,
table_name=table_name,
source_of_truth="n1",
)

Expand Down
Loading