Skip to content

Commit 60afdf4

Browse files
committed
Added --deterministic switch to create a deterministic schema
A determininistic schema takes the top level rng:interleave elements and replace it with a choice of zero or more XMP properties
1 parent 9cadb6b commit 60afdf4

File tree

1 file changed

+99
-53
lines changed

1 file changed

+99
-53
lines changed

XMP_RNG_Suite/RNGMerger/RNGMerger.cs

Lines changed: 99 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using Microsoft.Extensions.Configuration.Ini;
22
using Mono.Options;
3+
using System.Diagnostics;
34
using System.Text;
45
using System.Xml;
56
using System.Xml.Linq;
@@ -18,15 +19,16 @@ static int Main(string[] args)
1819
{
1920
string? inputPath = null;
2021
string? outputDir = null;
21-
bool dropDescriptions = false;
22+
RNGSchemaFlags flags = RNGSchemaFlags.None;
2223
var presets = new List<string>();
2324

2425
var options = new OptionSet()
2526
{
2627
{ "i|input=", "The input RELAX NG schema", (i) => inputPath = i },
2728
{ "o|outdir=", "The output directory", (o) => outputDir = o },
2829
{ "p|preset=", "Condition preset for the schema generation", presets.Add },
29-
{ "dropdesc", "Drop descritions", (_) => dropDescriptions = true },
30+
{ "dropdesc", "Drop descriptions", (_) => flags |= RNGSchemaFlags.DropDescriptions },
31+
{ "deterministic", "Make the schema deterministic", (_) => flags |= RNGSchemaFlags.Deterministic },
3032
};
3133

3234
_ = options.Parse(args);
@@ -38,7 +40,7 @@ static int Main(string[] args)
3840

3941
try
4042
{
41-
MakeSchemas(inputPath, outputDir, presets, dropDescriptions);
43+
MakeSchemas(inputPath, outputDir, presets, flags);
4244
return 0;
4345
}
4446
catch (Exception ex)
@@ -48,31 +50,30 @@ static int Main(string[] args)
4850
}
4951
}
5052

51-
public static void MakeSchemas(string rngFilePath, string outputPath, IReadOnlyList<string> presets, bool dropDescriptions = false)
53+
public static void MakeSchemas(string rngFilePath, string outputPath,
54+
IReadOnlyList<string> presets, RNGSchemaFlags flags = RNGSchemaFlags.None)
5255
{
5356
Console.WriteLine($"Merging RELAX NG schema from: {rngFilePath} ...");
5457
var mainDoc = XDocument.Load(rngFilePath);
5558
string schemaBaseDir = Path.GetDirectoryName(Path.GetFullPath(rngFilePath))!;
5659

5760
var namespaces = new Dictionary<string, string>();
58-
processIncludes(mainDoc.Root!, mainDoc.Root!, schemaBaseDir,
59-
namespaces, new HashSet<string>(), dropDescriptions);
61+
processIncludes(mainDoc.Root ?? throw new Exception("Invalid document"), mainDoc.Root, schemaBaseDir,
62+
namespaces, new HashSet<string>(), flags.HasFlag(RNGSchemaFlags.DropDescriptions));
6063

6164
foreach (var (prefix, uri) in namespaces)
6265
{
6366
XName nsAttrName = XNamespace.Xmlns + prefix;
6467

65-
if (mainDoc.Root!.Attribute(nsAttrName) == null)
66-
mainDoc.Root!.SetAttributeValue(nsAttrName, uri);
68+
if (mainDoc.Root.Attribute(nsAttrName) == null)
69+
mainDoc.Root.SetAttributeValue(nsAttrName, uri);
6770
}
6871

69-
validateSchema(mainDoc.Root!);
72+
validateSchema(mainDoc.Root);
7073
saveDoc(mainDoc, Path.Combine(outputPath, "Merged_XMP_Packet.rng"), skipIndent: true);
7174

7275
foreach (var preset in presets)
73-
{
74-
makeSchema(mainDoc, preset, outputPath);
75-
}
76+
makeSchema(mainDoc, preset, outputPath, flags.HasFlag(RNGSchemaFlags.Deterministic));
7677
}
7778

7879
static void ShowHelp(OptionSet options)
@@ -90,10 +91,8 @@ static void processIncludes(XElement rootElement, XElement element, string baseP
9091
removeDescendantElements(element, UiNs);
9192

9293
// Remove all comments recursively in the included schema
93-
element.DescendantNodes()
94-
.OfType<XComment>()
95-
.ToList()
96-
.ForEach(c => c.Remove());
94+
foreach (var comment in element.DescendantNodes().OfType<XComment>().ToList())
95+
comment.Remove();
9796

9897
foreach (var include in element.Elements(RngNs + "include").ToList())
9998
{
@@ -148,31 +147,49 @@ static void processIncludes(XElement rootElement, XElement element, string baseP
148147
}
149148
}
150149

151-
static void validateSchema(XElement element)
150+
static void validateSchema(XElement root)
152151
{
153-
var defines = new HashSet<string>();
154-
foreach (var define in element.Descendants(RngNs + "define").Select((XElement r) => r.Attribute("name")?.Value ?? throw new Exception("Missing name attribute")))
152+
var defines = new Dictionary<string, XElement>();
153+
foreach (var define in root.Descendants(RngNs + "define"))
155154
{
156-
if (string.IsNullOrEmpty(define) || define.Contains(':'))
155+
var name = define.Attribute("name")?.Value ?? throw new Exception("Missing name attribute");
156+
if (string.IsNullOrEmpty(name) || name.Contains(':'))
157157
throw new Exception($"Invalid element \"{define}\"");
158158

159-
if (!defines.Add(define))
160-
throw new Exception($"Element \"{define}\" is already defined");
159+
if (!defines.TryAdd(name, define))
160+
throw new Exception($"Define \"{name}\" is already present");
161161
}
162162

163-
var references = element.Descendants(RngNs + "ref").Select((XElement r) => r.Attribute("name")?.Value ?? throw new Exception("Missing name attribute")).ToHashSet();
163+
var references = root.Descendants(RngNs + "ref").Select((XElement r) => r.Attribute("name")?.Value ?? throw new Exception("Missing name attribute")).ToHashSet();
164164
foreach (var reference in references)
165165
{
166166
if (string.IsNullOrEmpty(reference) || reference.Contains(':'))
167167
throw new Exception($"Invalid reference \"{reference}\"");
168168

169-
if (!defines.Contains(reference))
169+
if (!defines.ContainsKey(reference))
170170
throw new Exception($"Reference \"{reference}\" is not defined");
171171
}
172+
173+
// Find main <rng:interleave> element
174+
var interleave = root.Descendants(RngNs + "interleave").First();
175+
var refs = interleave.Descendants(RngNs + "ref").ToList();
176+
foreach (var reference in refs)
177+
{
178+
var name = reference.Attribute("name")!.Value!;
179+
var define = defines[name];
180+
interleave = define.Descendants(RngNs + "interleave").First();
181+
foreach (var child in interleave.Elements())
182+
{
183+
// Ensure the second level interleave elements
184+
// are <rng:optional> elements with a single child
185+
if (child.Name.LocalName != "optional" || child.Elements().Count() != 1)
186+
throw new Exception("Invalid interleaved element");
187+
}
188+
}
172189
}
173190

174191
static void makeSchema(XDocument document, string presetPath,
175-
string outDir)
192+
string outDir, bool wantDeterministic)
176193
{
177194
if (!File.Exists(presetPath))
178195
throw new Exception($"Preset not fount at \"{presetPath}\"");
@@ -185,8 +202,30 @@ static void makeSchema(XDocument document, string presetPath,
185202
var processed = new XDocument(document);
186203
preprocess(processed.Root!, new BoolDictionaryXsltContext(readPreset(presetPath)));
187204

205+
var defineMap = new Dictionary<string, XElement>();
206+
foreach (var define in processed.Root!.Descendants(RngNs + "define"))
207+
{
208+
var name = define.Attribute("name")?.Value ?? throw new Exception("Missing name attribute");
209+
defineMap[name] = define;
210+
}
211+
212+
var start = processed.Root!.Descendants(RngNs + "start").First();
213+
if (wantDeterministic)
214+
makeDeterministic(start, defineMap);
215+
188216
Console.WriteLine($"Collecting schema garbage...");
189-
collectGarbage(processed);
217+
var visitedDefines = new HashSet<string>();
218+
collectGarbage(start, visitedDefines, defineMap);
219+
220+
foreach (var pair in defineMap)
221+
{
222+
if (!visitedDefines.Contains(pair.Key))
223+
{
224+
logElementRemoval(pair.Value, RemovalReason.Collected);
225+
pair.Value.Remove();
226+
}
227+
}
228+
190229
var outputPath = Path.Combine(outDir, filename);
191230
saveDoc(processed, outputPath);
192231
Console.WriteLine($"Merged schema saved to: {outputPath}");
@@ -225,33 +264,6 @@ bool evaluate(string? expr)
225264
}
226265
}
227266

228-
/// <summary>
229-
/// Remove unreferenced defines
230-
/// </summary>
231-
static void collectGarbage(XDocument document)
232-
{
233-
var defineMap = new Dictionary<string, XElement>();
234-
foreach (var define in document.Root!.Descendants(RngNs + "define"))
235-
{
236-
var name = define.Attribute("name")?.Value ?? throw new Exception("Missing name attribute");
237-
defineMap[name] = define;
238-
}
239-
240-
var start = document.Root!.Descendants(RngNs + "start").FirstOrDefault() ??
241-
throw new Exception("Missing start element");
242-
var visitedDefines = new HashSet<string>();
243-
collectGarbage(start, visitedDefines, defineMap);
244-
245-
foreach (var pair in defineMap)
246-
{
247-
if (!visitedDefines.Contains(pair.Key))
248-
{
249-
logElementRemoval(pair.Value, RemovalReason.Collected);
250-
pair.Value.Remove();
251-
}
252-
}
253-
}
254-
255267
static void collectGarbage(XElement element, HashSet<string> visitedDefines, Dictionary<string, XElement> defineMap)
256268
{
257269
foreach (var reference in element.Descendants(RngNs + "ref"))
@@ -300,6 +312,32 @@ static void logElementRemoval(XElement child, RemovalReason reason)
300312
}
301313
}
302314

315+
/// <summary>
316+
/// This erase the top level rng:interleave elements and place a
317+
/// big choice of zero or more XMP properties
318+
/// </summary>
319+
private static void makeDeterministic(XElement start, Dictionary<string, XElement> defineMap)
320+
{
321+
var interleave = start.Descendants(RngNs + "interleave").First();
322+
var zeroOrMore = new XElement(RngNs + "zeroOrMore");
323+
interleave.Parent!.Add(zeroOrMore);
324+
var choice = new XElement(RngNs + "choice");
325+
zeroOrMore.Add(choice);
326+
var refs = interleave.Descendants(RngNs + "ref").ToList();
327+
interleave.Remove();
328+
foreach (var reference in refs)
329+
{
330+
var name = reference.Attribute("name")!.Value!;
331+
var define = defineMap[name];
332+
interleave = define.Descendants(RngNs + "interleave").First();
333+
foreach (var child in interleave.Elements())
334+
{
335+
Debug.Assert(child.Name.LocalName == "optional");
336+
choice.Add(child.Elements().First());
337+
}
338+
}
339+
}
340+
303341
static void saveDoc(XDocument doc, string filepath, bool skipIndent = false)
304342
{
305343
using (var writer = XmlWriter.Create(filepath,
@@ -384,3 +422,11 @@ enum RemovalReason
384422
Collected,
385423
}
386424
}
425+
426+
[Flags]
427+
public enum RNGSchemaFlags
428+
{
429+
None = 0,
430+
DropDescriptions = 1,
431+
Deterministic = 2,
432+
}

0 commit comments

Comments
 (0)