Skip to content

Commit 34335fc

Browse files
authored
feat(config): improve merge conflict behavior (#638)
1 parent b2292de commit 34335fc

File tree

8 files changed

+163
-72
lines changed

8 files changed

+163
-72
lines changed

.github/workflows/test_schema.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111

1212
steps:
1313
- uses: actions/checkout@v4
14-
- run: pip install pydantic pyyaml yamlcore
14+
- run: pip install pydantic pydantic-partial pyyaml yamlcore
1515
- run: python3 src/penguin/penguin_config/gen_docs.py docs > schema_doc.md
1616
# Ensure generated schema_doc.md matches the one in the repo at docs/schema_doc.md
1717
- run: |

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
233233
lz4 \
234234
openai \
235235
pydantic \
236+
pydantic-partial \
236237
pyelftools \
237238
pyyaml \
238239
pyvis \

docs/schema_doc.md

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ intel64
7979
|||
8080
|-|-|
8181
|__Type__|string or null|
82-
|__Default__|`./base/fs.tar.gz`|
82+
|__Default__|`null`|
8383

8484

8585
```yaml
@@ -317,7 +317,8 @@ true
317317

318318
|||
319319
|-|-|
320-
|__Type__|string or null|
320+
|__Type__|string|
321+
|__Patch merge behavior__|Concatenate strings separated by `' '`|
321322
|__Default__|`null`|
322323

323324
A list of additional QEMU command-line arguments to use when booting the guest
@@ -852,7 +853,7 @@ NVRAM values to add to the guest
852853
|||
853854
|-|-|
854855
|__Type__|list of string|
855-
|__Default__|`null`|
856+
|__Default__|`[]`|
856857

857858
Names for guest network interfaces
858859

@@ -888,7 +889,7 @@ Value of the U-Boot environment variable
888889
|||
889890
|-|-|
890891
|__Type__|list of integer|
891-
|__Default__|`null`|
892+
|__Default__|`[]`|
892893

893894
Signals numbers to block within the guest. Supported values are 6 (SIGABRT), 9 (SIGKILL), 15 (SIGTERM), and 17 (SIGCHLD).
894895

@@ -934,7 +935,8 @@ nvram_init
934935

935936
|||
936937
|-|-|
937-
|__Type__|string or null|
938+
|__Type__|string|
939+
|__Patch merge behavior__|Concatenate strings separated by `'\n'`|
938940
|__Default__|`null`|
939941

940942
Custom source code for library functions to intercept and model
@@ -1241,7 +1243,7 @@ thumb
12411243

12421244
|||
12431245
|-|-|
1244-
|__Type__|string|
1246+
|__Type__|string or null|
12451247
|__Default__|`null`|
12461248

12471249

@@ -1264,6 +1266,10 @@ Whether to enable this plugin (default depends on plugin)
12641266

12651267
## `network` Network Configuration
12661268

1269+
|||
1270+
|-|-|
1271+
|__Default__|`null`|
1272+
12671273
Configuration for networks to attach to guest
12681274

12691275
### `network.external` Set up NAT for outgoing connections

src/penguin/common.py

Lines changed: 79 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import hashlib
22
import logging
33
import re
4-
from pathlib import Path
54
import coloredlogs
65
import yaml
76
from os.path import join, isfile
8-
from yamlcore import CoreLoader, CoreDumper
7+
from yamlcore import CoreDumper
98

109

1110
# Hex integers
@@ -52,44 +51,87 @@ def hash_yaml(section_to_hash):
5251
return hash_digest
5352

5453

55-
def patch_config(base_config, patch):
56-
# Merge configs.
57-
def _recursive_update(base, new):
58-
for k, v in new.items():
59-
if isinstance(v, dict):
60-
base[k] = _recursive_update(base.get(k, {}), v)
61-
elif isinstance(v, list):
62-
# Append
63-
base[k] = base.get(k, []) + v
64-
else:
65-
base[k] = v
66-
return base
67-
68-
if issubclass(type(patch), Path):
69-
with open(patch, "r") as f:
70-
patch = yaml.load(f, Loader=CoreLoader)
54+
def patch_config(logger, base_config, patch):
7155
if not patch:
7256
# Empty patch, possibly an empty file or one with all comments
7357
return base_config
74-
for key, value in patch.items():
75-
# Check if the key already exists in the base_config
76-
if key in base_config:
77-
# If the value is a dictionary, update subfields
78-
if isinstance(value, dict):
79-
# Recursive update to handle nested dictionaries
80-
base_config[key] = _recursive_update(base_config.get(key, {}), value)
81-
elif isinstance(value, list):
82-
# Merge lists
83-
seen = set()
84-
combined = base_config[key] + value
85-
base_config[key] = [x for x in combined if not (x in seen or seen.add(x))]
86-
else:
87-
# Replace the base value with the incoming value
88-
base_config[key] = value
89-
else:
90-
# New key, add all data directly
91-
base_config[key] = value
92-
return base_config
58+
59+
# Merge configs.
60+
def _recursive_update(base, new, config_option):
61+
if base is None:
62+
return new
63+
if new is None:
64+
return base
65+
66+
assert type(base) is type(new)
67+
68+
if hasattr(base, "merge"):
69+
return base.merge(new)
70+
71+
if hasattr(base, "model_fields_set"):
72+
result = dict()
73+
for base_key in base.model_fields_set:
74+
result[base_key] = getattr(base, base_key)
75+
if base.model_extra is not None:
76+
for base_key, base_value in base.model_extra.items():
77+
result[base_key] = base_value
78+
for new_key in new.model_fields_set:
79+
new_value = getattr(new, new_key)
80+
if new_key in result:
81+
result[new_key] = _recursive_update(
82+
result[new_key],
83+
new_value,
84+
f"{config_option}.{new_key}" if config_option else new_key,
85+
)
86+
else:
87+
result[new_key] = new_value
88+
if new.model_extra is not None:
89+
for new_key, new_value in new.model_extra.items():
90+
if new_key in result:
91+
result[new_key] = _recursive_update(
92+
result[new_key],
93+
new_value,
94+
f"{config_option}.{new_key}" if config_option else new_key,
95+
)
96+
else:
97+
result[new_key] = new_value
98+
return type(base)(**result)
99+
100+
if isinstance(base, list):
101+
return base + new
102+
103+
if isinstance(base, dict):
104+
result = dict()
105+
for key, base_value in base.items():
106+
if key in new:
107+
new_value = new[key]
108+
result[key] = _recursive_update(
109+
base_value,
110+
new_value,
111+
f"{config_option}.{key}" if config_option else key,
112+
)
113+
else:
114+
result[key] = base_value
115+
for new_key, new_value in new.items():
116+
if new_key not in base:
117+
result[new_key] = new_value
118+
return result
119+
120+
if base == new:
121+
return base
122+
123+
base_str = yaml.dump(base).strip().removesuffix("...").strip()
124+
new_str = yaml.dump(new).strip().removesuffix("...").strip()
125+
change_str = (
126+
f"\n```\n{base_str}\n```↓\n```\n{new_str}\n```"
127+
if "\n" in base_str + new_str
128+
else f"`{base_str}` → `{new_str}`"
129+
)
130+
logger.warning(f"patch conflict: {config_option}: {change_str}")
131+
132+
return new
133+
134+
return _recursive_update(base_config, patch, None)
93135

94136

95137
class PathHighlightingFormatter(coloredlogs.ColoredFormatter):

src/penguin/genetic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ def get_patched_config(self, config: ConfigChromosome):
509509
for p in patched_config.get("patches", []):
510510
# kinda funny how the names wound up...
511511
p_yaml = load_unpatched_config(os.path.join(self.proj_dir, p))
512-
patched_config = patch_config(patched_config, p_yaml)
512+
patched_config = patch_config(self.logger, patched_config, p_yaml)
513513
return patched_config
514514

515515
def run_config(self, config: ConfigChromosome, run_index: int) -> Tuple[List[Failure], float]:

src/penguin/penguin_config/__init__.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -191,26 +191,31 @@ def load_config(proj_dir, path, validate=True):
191191
"""Load penguin config from path"""
192192
with open(path, "r") as f:
193193
config = yaml.load(f, Loader=CoreLoader)
194+
config = structure.Patch(**config)
194195
# look for files called patch_*.yaml in the same directory as the config file
195-
if config["core"].get("auto_patching", False) is True:
196+
if config.core.auto_patching:
196197
patch_files = list(Path(proj_dir).glob("patch_*.yaml"))
197198
patches_dir = Path(proj_dir, "patches")
198199
if patches_dir.exists():
199200
patch_files += list(patches_dir.glob("*.yaml"))
200201
if patch_files:
201-
if config.get("patches", None) is None:
202-
config["patches"] = []
202+
if config.patches.root is None:
203+
config.patches.root = []
203204
for patch_file in patch_files:
204-
config["patches"].append(str(patch_file))
205-
if config.get("patches", None) is not None:
206-
patch_list = config["patches"]
205+
config.patches.root.append(str(patch_file))
206+
if config.patches.root is not None:
207+
patch_list = config.patches.root
207208
for patch in patch_list:
208209
# patches are loaded relative to the main config file
209210
patch_relocated = Path(proj_dir, patch)
210211
if patch_relocated.exists():
211212
# TODO: If we're missing a patch we should warn, but this happens 3-4x
212213
# and that's too verbose.
213-
config = patch_config(config, patch_relocated)
214+
with open(patch_relocated, "r") as f:
215+
patch = yaml.load(f, Loader=CoreLoader)
216+
patch = structure.Patch(**patch)
217+
config = patch_config(logger, config, patch)
218+
config = config.model_dump()
214219
if config["core"].get("guest_cmd", False) is True:
215220
config["static_files"]["/igloo/utils/guesthopper"] = dict(
216221
type="host_file",

src/penguin/penguin_config/gen_docs.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ def gen_docs_field(path, docs_field, include_type=True):
7575
out += "|-|-|\n"
7676
if include_type:
7777
out += f"|__Type__|{gen_docs_type_name(docs_field.type_)}|\n"
78+
if docs_field.merge_behavior is not None:
79+
out += f"|__Patch merge behavior__|{docs_field.merge_behavior}|\n"
7880
if include_docs:
7981
out += f"|__Default__|`{gen_docs_yaml_dump(docs_field.default)}`|\n"
8082
out += "\n"
@@ -94,6 +96,7 @@ class DocsField:
9496
"""Information about a field of the config, for generating docs"""
9597

9698
type_: type
99+
merge_behavior: Optional[str]
97100
title: Optional[str]
98101
description: Optional[str]
99102
default: Union[PydanticUndefinedType, Any]
@@ -112,7 +115,11 @@ def from_type(type_: type) -> "DocsField":
112115

113116
if hasattr(type_, "model_config"):
114117
# Inherits BaseModel or RootModel
115-
title = type_.model_config["title"]
118+
try:
119+
merge_behavior = type_.merge_behavior()
120+
except AttributeError:
121+
merge_behavior = None
122+
title = type_.model_config.get("title")
116123
description = type_.__doc__
117124
try:
118125
default = type_.model_config["default"]
@@ -124,16 +131,17 @@ def from_type(type_: type) -> "DocsField":
124131
examples = []
125132
else:
126133
# Doesn't inherit BaseModel or RootModel, so make all values empty
127-
title = description = None
134+
merge_behavior = title = description = None
128135
default = PydanticUndefined
129136
examples = []
130-
return DocsField(type_, title, description, default, examples)
137+
return DocsField(type_, merge_behavior, title, description, default, examples)
131138

132139
def from_field(field) -> "DocsField":
133140
"""Create a `DocsField` from a Pydantic `Field`"""
134141

135142
return DocsField(
136143
field.annotation,
144+
None,
137145
field.title,
138146
field.description,
139147
field.default,
@@ -144,10 +152,15 @@ def merge(self, other: "DocsField") -> "DocsField":
144152
"""Create a `DocsField` by combining two `DocsField`s, using the second to fill in gaps in the first"""
145153
return DocsField(
146154
self.type_,
155+
self.merge_behavior or other.merge_behavior,
147156
self.title or other.title,
148157
self.description or other.description,
149158
other.default if self.default is PydanticUndefined else self.default,
150-
self.examples + other.examples,
159+
(
160+
self.examples
161+
if self.examples == other.examples
162+
else self.examples + other.examples
163+
),
151164
)
152165

153166

@@ -229,7 +242,7 @@ def gen_docs(path=[], docs_field=DocsField.from_type(structure.Main)):
229242
# The type is `Optional[T]`. Try again with just `T`.
230243
out += gen_docs(
231244
path=path,
232-
docs_field=DocsField.from_type(first_model_arg),
245+
docs_field=DocsField.from_type(first_model_arg).merge(docs_field),
233246
)
234247
else:
235248
# The type does not inherit from `BaseModel` and it doesn't have an argument that does.

0 commit comments

Comments
 (0)