Skip to content

Commit 210d185

Browse files
committed
Deduplicate module streams when merging
Signed-off-by: Stephen Gallagher <[email protected]>
1 parent 2719fb4 commit 210d185

File tree

4 files changed

+13796
-22
lines changed

4 files changed

+13796
-22
lines changed

modulemd/v1/modulemd-prioritizer.c

Lines changed: 139 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ modulemd_prioritizer_class_init (ModulemdPrioritizerClass *klass)
6060
object_class->set_property = NULL;
6161
}
6262

63+
static GPtrArray *
64+
_deduplicate_module_streams (const GPtrArray *first,
65+
const GPtrArray *second,
66+
GError **error);
67+
6368
static void
6469
_modulemd_ptr_array_unref (gpointer ptr)
6570
{
@@ -82,7 +87,10 @@ modulemd_prioritizer_add (ModulemdPrioritizer *self,
8287
{
8388
GPtrArray *current_objects = NULL;
8489
g_autoptr (GPtrArray) concat_objects = NULL;
90+
g_autoptr (GPtrArray) deduped_objects = NULL;
8591
g_autoptr (GPtrArray) merged_objects = NULL;
92+
g_autoptr (ModulemdSimpleSet) nsvcs = modulemd_simpleset_new ();
93+
g_autofree gchar *nsvc = NULL;
8694
gint64 *prio = NULL;
8795
gsize i;
8896

@@ -142,14 +150,17 @@ modulemd_prioritizer_add (ModulemdPrioritizer *self,
142150
}
143151
}
144152

145-
for (i = 0; i < objects->len; i++)
153+
deduped_objects =
154+
_deduplicate_module_streams (concat_objects, objects, error);
155+
if (!deduped_objects)
146156
{
147-
g_ptr_array_add (concat_objects,
148-
g_object_ref (g_ptr_array_index (objects, i)));
157+
/* Something went wrong. Return the error here. */
158+
g_clear_pointer (&prio, g_free);
159+
return FALSE;
149160
}
150161

151162
merged_objects =
152-
modulemd_merge_defaults (concat_objects, NULL, FALSE, error);
163+
modulemd_merge_defaults (deduped_objects, NULL, FALSE, error);
153164
if (!merged_objects)
154165
{
155166
/* Something went wrong. Return the error here. */
@@ -188,18 +199,18 @@ GPtrArray *
188199
modulemd_prioritizer_resolve (ModulemdPrioritizer *self, GError **error)
189200
{
190201
g_autoptr (GPtrArray) current = NULL;
191-
g_autoptr (GPtrArray) next = NULL;
202+
g_autoptr (GPtrArray) prev = NULL;
192203
g_autoptr (GPtrArray) tmp = NULL;
204+
g_autoptr (GPtrArray) deduped_objects = NULL;
193205
g_autoptr (GList) priority_levels = NULL;
194206
GList *current_level = NULL;
195207

196208
g_return_val_if_fail (MODULEMD_IS_PRIORITIZER (self), FALSE);
197209
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
198210

199-
current_level = priority_levels =
200-
_modulemd_ordered_int64_keys (self->priorities);
211+
priority_levels = _modulemd_ordered_int64_keys (self->priorities);
201212

202-
if (!current_level)
213+
if (!priority_levels)
203214
{
204215
/* Nothing has been added to the resolver. */
205216
g_set_error (error,
@@ -210,32 +221,144 @@ modulemd_prioritizer_resolve (ModulemdPrioritizer *self, GError **error)
210221
return NULL;
211222
}
212223

224+
/* Go through the merge from highest priority down to lowest. */
225+
current_level = g_list_last (priority_levels);
226+
213227
current = g_ptr_array_ref (
214-
g_hash_table_lookup (self->priorities, priority_levels->data));
228+
g_hash_table_lookup (self->priorities, current_level->data));
215229

216-
while (current_level->next)
230+
while (current_level->prev)
217231
{
218-
next = g_ptr_array_ref (
219-
g_hash_table_lookup (self->priorities, current_level->next->data));
232+
prev = g_ptr_array_ref (
233+
g_hash_table_lookup (self->priorities, current_level->prev->data));
220234

221235
/* Merge the values, replacing any conflicts */
222-
tmp = modulemd_merge_defaults (current, next, TRUE, error);
236+
tmp = modulemd_merge_defaults (prev, current, TRUE, error);
223237
if (!tmp)
224238
{
225239
/* Something went wrong with the merge. Return the error */
226240
return NULL;
227241
}
242+
243+
/* Deduplicate after the merge */
244+
deduped_objects = _deduplicate_module_streams (tmp, NULL, error);
245+
if (!deduped_objects)
246+
{
247+
/* Something went wrong. Return the error here. */
248+
return NULL;
249+
}
250+
228251
g_clear_pointer (&current, g_ptr_array_unref);
229-
current = g_ptr_array_ref (tmp);
252+
current = g_ptr_array_ref (deduped_objects);
230253
g_clear_pointer (&tmp, g_ptr_array_unref);
231-
g_clear_pointer (&next, g_ptr_array_unref);
254+
g_clear_pointer (&deduped_objects, g_ptr_array_unref);
255+
g_clear_pointer (&prev, g_ptr_array_unref);
232256

233-
current_level = current_level->next;
257+
current_level = current_level->prev;
234258
}
235259

236260
return g_ptr_array_ref (current);
237261
}
238262

263+
static GPtrArray *
264+
_deduplicate_module_streams (const GPtrArray *first,
265+
const GPtrArray *second,
266+
GError **error)
267+
{
268+
GObject *object = NULL;
269+
g_autoptr (GPtrArray) deduplicated = NULL;
270+
g_autoptr (ModulemdSimpleSet) nsvcs = modulemd_simpleset_new ();
271+
g_autofree gchar *nsvc = NULL;
272+
gssize reserved_size, i;
273+
274+
g_return_val_if_fail (first, NULL);
275+
g_return_val_if_fail (error == NULL || *error == NULL, NULL);
276+
277+
/* Assume the common case that there are no duplicates and preallocate
278+
* space to hold the entire set.
279+
*/
280+
reserved_size = first->len;
281+
if (second)
282+
{
283+
reserved_size += second->len;
284+
}
285+
286+
deduplicated = g_ptr_array_new_full (reserved_size, g_object_unref);
287+
288+
/* We check the second list here as a preventative measure. In a proper
289+
* implementation of this, we'd do a full check of the module stream entries
290+
* to ensure that they don't have the same NSVC but different content. For
291+
* now, however, we'll just assume that the second list has the right data
292+
* since it's likely to be newer.
293+
*/
294+
if (second)
295+
for (i = 0; i < second->len; i++)
296+
{
297+
object = g_ptr_array_index (second, i);
298+
299+
if (MODULEMD_IS_MODULE (object))
300+
{
301+
nsvc = modulemd_module_dup_nsvc (MODULEMD_MODULE (object));
302+
}
303+
else if (MODULEMD_IS_MODULESTREAM (object))
304+
{
305+
nsvc =
306+
modulemd_modulestream_get_nsvc (MODULEMD_MODULESTREAM (object));
307+
}
308+
if (nsvc)
309+
{
310+
if (modulemd_simpleset_contains (nsvcs, nsvc))
311+
{
312+
/* We've seen this NSVC before; skip it */
313+
continue;
314+
}
315+
316+
/* Save this NSVC so we don't add it twice */
317+
modulemd_simpleset_add (nsvcs, nsvc);
318+
g_clear_pointer (&nsvc, g_free);
319+
}
320+
321+
g_ptr_array_add (deduplicated, g_object_ref (object));
322+
}
323+
324+
/* For the 'first' list, go through in reverse order, because this may be
325+
* called after the modulemd_merge_defaults() routine has already
326+
* concatenated the higher-priority list. This will ensure that the
327+
* other list wins any merge disputes.
328+
*/
329+
for (i = (gsize)first->len - 1; i >= 0; i--)
330+
{
331+
object = g_ptr_array_index (first, i);
332+
333+
if (MODULEMD_IS_MODULE (object))
334+
{
335+
nsvc = modulemd_module_dup_nsvc (MODULEMD_MODULE (object));
336+
}
337+
else if (MODULEMD_IS_MODULESTREAM (object))
338+
{
339+
nsvc =
340+
modulemd_modulestream_get_nsvc (MODULEMD_MODULESTREAM (object));
341+
}
342+
343+
if (nsvc)
344+
{
345+
if (modulemd_simpleset_contains (nsvcs, nsvc))
346+
{
347+
/* We've seen this NSVC before; skip it */
348+
continue;
349+
}
350+
351+
/* Save this NSVC so we don't add it twice */
352+
modulemd_simpleset_add (nsvcs, nsvc);
353+
g_clear_pointer (&nsvc, g_free);
354+
}
355+
356+
g_ptr_array_add (deduplicated, g_object_ref (object));
357+
}
358+
359+
return g_ptr_array_ref (deduplicated);
360+
}
361+
239362

240363
GHashTable *
241364
modulemd_prioritizer_resolve_index (ModulemdPrioritizer *self, GError **error)

modulemd/v1/tests/test-modulemd-defaults.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ modulemd_defaults_test_prioritizer (DefaultsFixture *fixture,
718718

719719

720720
/* HTTPD */
721-
defaults = MODULEMD_DEFAULTS (g_ptr_array_index (merged_objects, 0));
721+
defaults = MODULEMD_DEFAULTS (g_ptr_array_index (merged_objects, 2));
722722
g_assert_cmpstr (modulemd_defaults_peek_module_name (defaults), ==, "httpd");
723723
g_assert_cmpstr (
724724
modulemd_defaults_peek_default_stream (defaults), ==, "2.4");
@@ -760,7 +760,7 @@ modulemd_defaults_test_prioritizer (DefaultsFixture *fixture,
760760
g_hash_table_lookup (htable, "9.0"), "supermegaultra"));
761761

762762
/* POSTGRESQL */
763-
defaults = MODULEMD_DEFAULTS (g_ptr_array_index (merged_objects, 2));
763+
defaults = MODULEMD_DEFAULTS (g_ptr_array_index (merged_objects, 0));
764764
g_assert_cmpstr (
765765
modulemd_defaults_peek_module_name (defaults), ==, "postgresql");
766766
g_assert_cmpstr (
@@ -900,7 +900,7 @@ modulemd_defaults_test_index_prioritizer (DefaultsFixture *fixture,
900900

901901

902902
/* HTTPD */
903-
defaults = MODULEMD_DEFAULTS (g_ptr_array_index (merged_objects, 0));
903+
defaults = MODULEMD_DEFAULTS (g_ptr_array_index (merged_objects, 2));
904904
g_assert_cmpstr (modulemd_defaults_peek_module_name (defaults), ==, "httpd");
905905
g_assert_cmpstr (
906906
modulemd_defaults_peek_default_stream (defaults), ==, "2.4");
@@ -942,7 +942,7 @@ modulemd_defaults_test_index_prioritizer (DefaultsFixture *fixture,
942942
g_hash_table_lookup (htable, "9.0"), "supermegaultra"));
943943

944944
/* POSTGRESQL */
945-
defaults = MODULEMD_DEFAULTS (g_ptr_array_index (merged_objects, 2));
945+
defaults = MODULEMD_DEFAULTS (g_ptr_array_index (merged_objects, 0));
946946
g_assert_cmpstr (
947947
modulemd_defaults_peek_module_name (defaults), ==, "postgresql");
948948
g_assert_cmpstr (

modulemd/v1/tests/test-modulemd-python.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ def test_issue25(self):
175175
assert mmd.peek_rpm_buildopts() != {}
176176
assert mmd.peek_rpm_buildopts()['macros'] == '%my_macro 1'
177177
dumped = mmd.dumps()
178-
print("YAML:\n%s" % dumped, file=sys.stderr)
179178
mmd2 = Modulemd.Module.new_from_string(dumped)
180179
assert mmd2.peek_rpm_buildopts() != {}
181180
assert mmd2.peek_rpm_buildopts()['macros'] == '%my_macro 1'
@@ -186,7 +185,7 @@ def test_issue33(self):
186185
# double-freeing memory.
187186
defs = Modulemd.Module.new_all_from_file_ext(
188187
"%s/mod-defaults/spec.v1.yaml" % os.getenv('MESON_SOURCE_ROOT'))
189-
print(defs)
188+
assert defs
190189

191190
def test_issue77(self):
192191
# This would crash on a type constraint accepting a signed value
@@ -266,6 +265,36 @@ def test_issue85(self):
266265
assert mmd3.get_module_components(
267266
)['testmodule'].peek_ref() == 'private-x'
268267

268+
def test_issue88(self):
269+
# Here, load the same file twice.
270+
# All entries in these two lists should be duplicates of each other.
271+
objects_from_repo_a = Modulemd.objects_from_file(
272+
'%s/test_data/issue88.yaml' % os.getenv('MESON_SOURCE_ROOT'))
273+
objects_from_repo_b = Modulemd.objects_from_file(
274+
'%s/test_data/issue88.yaml' % os.getenv('MESON_SOURCE_ROOT'))
275+
276+
# Test at the same priority level
277+
prioritizer = Modulemd.Prioritizer()
278+
prioritizer.add(objects_from_repo_a, 0)
279+
prioritizer.add(objects_from_repo_b, 0)
280+
supposedly_merged_objects = prioritizer.resolve()
281+
282+
# I would expect all three of these to be the same length... but they
283+
# are not.
284+
assert len(objects_from_repo_a) == len(objects_from_repo_b)
285+
assert len(objects_from_repo_a) == len(supposedly_merged_objects)
286+
287+
# Test at different priorities
288+
prioritizer = Modulemd.Prioritizer()
289+
prioritizer.add(objects_from_repo_a, 0)
290+
prioritizer.add(objects_from_repo_b, 1)
291+
supposedly_merged_objects = prioritizer.resolve()
292+
293+
# I would expect all three of these to be the same length... but they
294+
# are not.
295+
assert len(objects_from_repo_a) == len(objects_from_repo_b)
296+
assert len(objects_from_repo_a) == len(supposedly_merged_objects)
297+
269298

270299
class TestIntent(unittest.TestCase):
271300

0 commit comments

Comments
 (0)