From fcaa56298660224537988108118329e3212f92b5 Mon Sep 17 00:00:00 2001 From: CarlosCoelhoSL Date: Mon, 3 Mar 2025 18:11:39 +0000 Subject: [PATCH 1/4] adds incremental override and resource folder to state.json --- digital_land/cli.py | 55 +++++++++++++++++-- digital_land/commands.py | 29 +++++++++- digital_land/state.py | 10 +++- tests/data/state/resource/example_resource | 1 + .../state/resource_diff/different_resource | 1 + tests/e2e/test_state.py | 36 ++++++++++++ tests/integration/test_state.py | 4 ++ 7 files changed, 129 insertions(+), 7 deletions(-) create mode 100644 tests/data/state/resource/example_resource create mode 100644 tests/data/state/resource_diff/different_resource diff --git a/digital_land/cli.py b/digital_land/cli.py index ca92ef3f4..6aa15884c 100644 --- a/digital_land/cli.py +++ b/digital_land/cli.py @@ -642,6 +642,13 @@ def config_load_cmd(ctx, config_path): default="pipeline", help="directory containing the pipeline", ) +@click.option( + "--resource-dir", + type=click.Path(), + default="collection/resource", + help="directory containing resources", +) +@click.option("--incremental-override", type=click.BOOL, default=False) @click.option( "--output-path", "-o", @@ -649,8 +656,22 @@ def config_load_cmd(ctx, config_path): default="state.json", help="path of the output state file", ) -def save_state_cmd(specification_dir, collection_dir, pipeline_dir, output_path): - save_state(specification_dir, collection_dir, pipeline_dir, output_path) +def save_state_cmd( + specification_dir, + collection_dir, + pipeline_dir, + resource_dir, + incremental_override, + output_path, +): + save_state( + specification_dir, + collection_dir, + pipeline_dir, + resource_dir, + incremental_override, + output_path, + ) @cli.command( @@ -675,17 +696,43 @@ def save_state_cmd(specification_dir, collection_dir, pipeline_dir, output_path) default="pipeline", help="directory containing the pipeline", ) +@click.option( + "--resource-dir", + type=click.Path(), + default="collection/resource", + help="directory containing resources", +) +@click.option("--incremental-override", type=click.BOOL, default=False) @click.option( "--state-path", type=click.Path(), default="state.json", help="path of the output state file", ) -def check_state_cmd(specification_dir, collection_dir, pipeline_dir, state_path): +def check_state_cmd( + specification_dir, + collection_dir, + pipeline_dir, + resource_dir, + incremental_override, + state_path, +): # If the state isn't the same, use a non-zero return code so scripts can # detect this, and print a message. If it is the same, exit silenty wirh a # 0 retun code. - diffs = compare_state(specification_dir, collection_dir, pipeline_dir, state_path) + + if incremental_override: + print("State comparison skipped as incremental override enabled") + sys.exit(1) + + diffs = compare_state( + specification_dir, + collection_dir, + pipeline_dir, + resource_dir, + incremental_override, + state_path, + ) if diffs: print(f"State differs from {state_path} - {', '.join(diffs)}") sys.exit(1) diff --git a/digital_land/commands.py b/digital_land/commands.py index 870eeab9c..b4a6f6fa0 100644 --- a/digital_land/commands.py +++ b/digital_land/commands.py @@ -1405,29 +1405,54 @@ def organisation_check(**kwargs): package.check(lpa_path, output_path) -def save_state(specification_dir, collection_dir, pipeline_dir, output_path): +def save_state( + specification_dir, + collection_dir, + pipeline_dir, + resource_dir, + incremental_override, + output_path, +): state = State.build( specification_dir=specification_dir, collection_dir=collection_dir, pipeline_dir=pipeline_dir, + resource_dir=resource_dir, + incremental_override=incremental_override, ) + # if we include incremental override for this state.build what do we do about the others? state.save( output_path=output_path, ) -def compare_state(specification_dir, collection_dir, pipeline_dir, state_path): +def compare_state( + specification_dir, + collection_dir, + pipeline_dir, + resource_dir, + incremental_override, + state_path, +): """Compares the current state against the one in state_path. Returns a list of different elements, or None if they are the same.""" current = State.build( specification_dir=specification_dir, collection_dir=collection_dir, pipeline_dir=pipeline_dir, + resource_dir=resource_dir, + incremental_override=incremental_override, ) + # in here current incremental override must be false compare = State.load(state_path) + # we don't want to include whether the previous state was an incremental override in comparison + current.pop("incremental_override") + compare.pop("incremental_override") if current == compare: + # if current override = false and compare override = true we need to update state.json on s3 but we don't need to process everything + # removing the incremental override from the comparison should mean we don't process everything unless spec/code has changed return None return [i for i in current.keys() if current[i] != compare[i]] diff --git a/digital_land/state.py b/digital_land/state.py index 81f4fcc32..380eb99e3 100644 --- a/digital_land/state.py +++ b/digital_land/state.py @@ -12,7 +12,13 @@ def __init__(self, data): for k, v in data.items(): self.__setitem__(k, v) - def build(specification_dir, collection_dir, pipeline_dir): + def build( + specification_dir, + collection_dir, + pipeline_dir, + resource_dir, + incremental_override, + ): """Build a state object from the current configuration and code""" return State( { @@ -21,7 +27,9 @@ def build(specification_dir, collection_dir, pipeline_dir): "collection": State.get_dir_hash( collection_dir, ["log/", "log.csv", "pipeline.mk", "resource/"] ), + "resource": State.get_dir_hash(resource_dir), "pipeline": State.get_dir_hash(pipeline_dir), + "incremental_override": incremental_override, } ) diff --git a/tests/data/state/resource/example_resource b/tests/data/state/resource/example_resource new file mode 100644 index 000000000..4d400557e --- /dev/null +++ b/tests/data/state/resource/example_resource @@ -0,0 +1 @@ +resource here \ No newline at end of file diff --git a/tests/data/state/resource_diff/different_resource b/tests/data/state/resource_diff/different_resource new file mode 100644 index 000000000..34fa90ce9 --- /dev/null +++ b/tests/data/state/resource_diff/different_resource @@ -0,0 +1 @@ +this resource is different \ No newline at end of file diff --git a/tests/e2e/test_state.py b/tests/e2e/test_state.py index 2e38d37f9..51ee5bd0a 100644 --- a/tests/e2e/test_state.py +++ b/tests/e2e/test_state.py @@ -7,6 +7,7 @@ specification_hash = "ebe620f5228d01170b1857bad3e738aa432f5fd6" collection_hash = "ed4c5979268ad880f7edbdc2047cfcfa6b9ee3b4" pipeline_hash = "4a5a778d678db812e4f3d498a5aaa6f39af38d10" +resource_hash = "063e908c6695671063dee27c534bf3471aa3f5d5" def get_code_hash(): @@ -26,6 +27,8 @@ def test_state(tmp_path): specification_dir=os.path.join(test_data_dir, "specification"), collection_dir=os.path.join(test_data_dir, "collection"), pipeline_dir=os.path.join(test_data_dir, "pipeline"), + resource_dir=os.path.join(test_data_dir, "resource"), + incremental_override=True, output_path=state_path, ) @@ -36,18 +39,24 @@ def test_state(tmp_path): "code", "specification", "collection", + "resource", "pipeline", + "incremental_override", ] assert state_data["code"] == get_code_hash() assert state_data["specification"] == specification_hash assert state_data["collection"] == collection_hash assert state_data["pipeline"] == pipeline_hash + assert state_data["resource"] == resource_hash + assert state_data["incremental_override"] assert ( compare_state( specification_dir=os.path.join(test_data_dir, "specification"), collection_dir=os.path.join(test_data_dir, "collection"), pipeline_dir=os.path.join(test_data_dir, "pipeline"), + resource_dir=os.path.join(test_data_dir, "resource"), + incremental_override=True, state_path=state_path, ) is None @@ -58,6 +67,8 @@ def test_state(tmp_path): specification_dir=os.path.join(test_data_dir, "specification"), collection_dir=os.path.join(test_data_dir, "collection_exclude"), pipeline_dir=os.path.join(test_data_dir, "pipeline"), + resource_dir=os.path.join(test_data_dir, "resource"), + incremental_override=True, state_path=state_path, ) is None @@ -67,5 +78,30 @@ def test_state(tmp_path): specification_dir=os.path.join(test_data_dir, "specification"), collection_dir=os.path.join(test_data_dir, "collection_blank"), pipeline_dir=os.path.join(test_data_dir, "pipeline"), + resource_dir=os.path.join(test_data_dir, "resource"), + incremental_override=True, state_path=state_path, ) == ["collection"] + + assert compare_state( + specification_dir=os.path.join(test_data_dir, "specification"), + collection_dir=os.path.join(test_data_dir, "collection"), + pipeline_dir=os.path.join(test_data_dir, "pipeline"), + resource_dir=os.path.join(test_data_dir, "resource_diff"), + incremental_override=True, + state_path=state_path, + ) == ["resource"] + + # we shouldn't include the incremental override value in state comparison + # so test it isn't flagged if different + assert ( + compare_state( + specification_dir=os.path.join(test_data_dir, "specification"), + collection_dir=os.path.join(test_data_dir, "collection"), + pipeline_dir=os.path.join(test_data_dir, "pipeline"), + resource_dir=os.path.join(test_data_dir, "resource"), + incremental_override=False, + state_path=state_path, + ) + is None + ) diff --git a/tests/integration/test_state.py b/tests/integration/test_state.py index 5e7be1e85..500175708 100644 --- a/tests/integration/test_state.py +++ b/tests/integration/test_state.py @@ -65,6 +65,8 @@ def test_state_build_persist(tmp_path): os.path.join(test_dir, "specification"), os.path.join(test_dir, "collection"), os.path.join(test_dir, "pipeline"), + os.path.join(test_dir, "resource"), + True, ) state_1.save(tmp_json) @@ -77,6 +79,8 @@ def test_state_build_persist(tmp_path): os.path.join(test_dir, "specification"), os.path.join(test_dir, "collection_blank"), os.path.join(test_dir, "pipeline"), + os.path.join(test_dir, "resource"), + True, ) # Check that's different from the first one From cd0085462bdad081d75e7b26dc13eb4cbca5fea7 Mon Sep 17 00:00:00 2001 From: CarlosCoelhoSL Date: Wed, 5 Mar 2025 12:33:21 +0000 Subject: [PATCH 2/4] handles cases when state doesn't have incremental override --- digital_land/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/digital_land/commands.py b/digital_land/commands.py index b4a6f6fa0..61b5324d2 100644 --- a/digital_land/commands.py +++ b/digital_land/commands.py @@ -1447,8 +1447,8 @@ def compare_state( compare = State.load(state_path) # we don't want to include whether the previous state was an incremental override in comparison - current.pop("incremental_override") - compare.pop("incremental_override") + current.pop("incremental_override", None) + compare.pop("incremental_override", None) if current == compare: # if current override = false and compare override = true we need to update state.json on s3 but we don't need to process everything From 4328279c51e4e266faf5c64c2452ceb8f4f78099 Mon Sep 17 00:00:00 2001 From: CarlosCoelhoSL Date: Wed, 5 Mar 2025 12:38:52 +0000 Subject: [PATCH 3/4] uses .get(i) in comparison --- digital_land/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/digital_land/commands.py b/digital_land/commands.py index 61b5324d2..4ecc00f61 100644 --- a/digital_land/commands.py +++ b/digital_land/commands.py @@ -1455,4 +1455,4 @@ def compare_state( # removing the incremental override from the comparison should mean we don't process everything unless spec/code has changed return None - return [i for i in current.keys() if current[i] != compare[i]] + return [i for i in current.keys() if current[i] != compare.get(i, "")] From d0024375f60e13d7b7e2df3ae55c78e713a0f319 Mon Sep 17 00:00:00 2001 From: CarlosCoelhoSL Date: Wed, 5 Mar 2025 16:06:22 +0000 Subject: [PATCH 4/4] tidies --- digital_land/commands.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/digital_land/commands.py b/digital_land/commands.py index 4ecc00f61..eb26a4035 100644 --- a/digital_land/commands.py +++ b/digital_land/commands.py @@ -1420,7 +1420,6 @@ def save_state( resource_dir=resource_dir, incremental_override=incremental_override, ) - # if we include incremental override for this state.build what do we do about the others? state.save( output_path=output_path, ) @@ -1451,8 +1450,6 @@ def compare_state( compare.pop("incremental_override", None) if current == compare: - # if current override = false and compare override = true we need to update state.json on s3 but we don't need to process everything - # removing the incremental override from the comparison should mean we don't process everything unless spec/code has changed return None return [i for i in current.keys() if current[i] != compare.get(i, "")]