Skip to content

Commit a29edf2

Browse files
committed
New changelog parser + typed representation of the changelog structure.
1 parent 0612345 commit a29edf2

File tree

4 files changed

+920
-67
lines changed

4 files changed

+920
-67
lines changed

app/lib/shared/changelog.dart

Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
/// The library provides support for parsing `CHANGELOG.md` files formatted
6+
/// with Markdown. It converts the file's content into a structured [Changelog]
7+
/// object, which encapsulates individual [Release] entries.
8+
9+
/// The [ChangelogParser] accommodates various formatting styles. It can
10+
/// effectively parse changelogs with inconsistent header levels or those
11+
/// that include additional information beyond just the version number in
12+
/// the release header.
13+
///
14+
/// The parser is designed to support the widely adopted "Keep a Changelog"
15+
/// format (see https://keepachangelog.com/en/1.1.0/ for details).
16+
/// Additionally, it has been tested with a diverse set of changelog files
17+
/// available a part of the packages on https://pub.dev/.
18+
library;
19+
20+
import 'package:collection/collection.dart';
21+
import 'package:html/dom.dart' as html;
22+
import 'package:html/parser.dart' as html_parser;
23+
import 'package:markdown/markdown.dart' as m;
24+
import 'package:pub_semver/pub_semver.dart';
25+
26+
/// Represents the entire changelog, containing a list of releases.
27+
class Changelog {
28+
/// The main title of the changelog (e.g., 'Changelog').
29+
final String? title;
30+
31+
/// An optional introductory description for the changelog.
32+
final Content? description;
33+
34+
/// A list of releases, typically in reverse chronological order.
35+
final List<Release> releases;
36+
37+
Changelog({
38+
this.title,
39+
this.description,
40+
required this.releases,
41+
});
42+
}
43+
44+
/// Represents a single version entry in the changelog,
45+
/// such as '[1.2.0] - 2025-07-10' or the 'Unreleased' section.
46+
class Release {
47+
/// The version string or section title (e.g., '1.2.0', 'Unreleased').
48+
final String version;
49+
50+
/// The HTML anchor value (`id` attribute).
51+
final String? anchor;
52+
53+
/// The text of the header after the version.
54+
final String? label;
55+
56+
/// The release date for this version.
57+
/// `null` if it's the 'Unreleased' section or is missing
58+
final DateTime? date;
59+
60+
/// The additional text of the label, without the [date] part (if present).
61+
final String? note;
62+
63+
/// The content of the release.
64+
final Content content;
65+
66+
Release({
67+
required this.version,
68+
this.anchor,
69+
this.label,
70+
this.date,
71+
this.note,
72+
required this.content,
73+
});
74+
}
75+
76+
/// Describes an arbitrary content (e.g. a changelog description or inside an entry).
77+
///
78+
/// If the content is specified as parsed HTML nodes, the class will store it as-is,
79+
/// and serialize them only when needed.
80+
class Content {
81+
String? _asText;
82+
html.Node? _asNode;
83+
84+
Content.fromHtmlText(String text) : _asText = text;
85+
Content.fromParsedHtml(List<html.Node> nodes) {
86+
_asNode = html.DocumentFragment();
87+
for (final node in nodes) {
88+
_asNode!.append(node);
89+
}
90+
}
91+
92+
late final asHtmlText = () {
93+
if (_asText != null) return _asText!;
94+
final root = _asNode is html.DocumentFragment
95+
? _asNode as html.DocumentFragment
96+
: html.DocumentFragment()
97+
..append(_asNode!);
98+
return root.outerHtml;
99+
}();
100+
101+
late final asHtmlNode = () {
102+
if (_asNode != null) return _asNode!;
103+
return html_parser.parseFragment(_asText!);
104+
}();
105+
}
106+
107+
/// Parses the changelog with pre-configured options.
108+
class ChangelogParser {
109+
final _acceptedHeaderTags = ['h1', 'h2', 'h3', 'h4'];
110+
final bool _strictLevels;
111+
final int _partOfLevelThreshold;
112+
113+
ChangelogParser({
114+
bool strictLevels = false,
115+
int partOfLevelThreshold = 2,
116+
}) : _strictLevels = strictLevels,
117+
_partOfLevelThreshold = partOfLevelThreshold;
118+
119+
Changelog parseMarkdown(String input) {
120+
final nodes =
121+
m.Document(extensionSet: m.ExtensionSet.gitHubWeb).parse(input);
122+
final rawHtml = m.renderToHtml(nodes);
123+
final root = html_parser.parseFragment(rawHtml);
124+
return parseHtmlNodes(root.nodes);
125+
}
126+
127+
/// Parses markdown nodes into a [Changelog] structure.
128+
Changelog parseHtmlNodes(List<html.Node> input) {
129+
String? title;
130+
Content? description;
131+
final releases = <Release>[];
132+
133+
String? firstReleaseLocalName;
134+
_ParsedHeader? current;
135+
136+
var nodes = <html.Node>[];
137+
void finalizeNodes() {
138+
if (current == null) {
139+
description = Content.fromParsedHtml(nodes);
140+
if (description!.asHtmlText.trim().isEmpty) {
141+
description = null;
142+
}
143+
} else {
144+
releases.add(Release(
145+
version: current.version,
146+
anchor: current.anchor,
147+
label: current.label,
148+
date: current.date,
149+
note: current.note,
150+
content: Content.fromParsedHtml(nodes),
151+
));
152+
}
153+
nodes = <html.Node>[];
154+
}
155+
156+
for (final node in [...input]) {
157+
if (node is html.Element &&
158+
_acceptedHeaderTags.contains(node.localName)) {
159+
if (_strictLevels &&
160+
firstReleaseLocalName != null &&
161+
node.localName != firstReleaseLocalName) {
162+
continue;
163+
}
164+
final headerText = _extractText(node).trim();
165+
166+
// Check if this looks like a version header first
167+
final parsed = _tryParseAsHeader(node, headerText);
168+
169+
final isNewVersion = parsed != null &&
170+
releases.every((r) => r.version != parsed.version) &&
171+
current?.version != parsed.version;
172+
final isPartOfCurrent = current != null &&
173+
parsed != null &&
174+
current.level + _partOfLevelThreshold <= parsed.level;
175+
if (isNewVersion && !isPartOfCurrent) {
176+
firstReleaseLocalName ??= node.localName!;
177+
finalizeNodes();
178+
current = parsed;
179+
continue;
180+
}
181+
182+
// only consider as title if it's h1 and we haven't found any versions yet
183+
if (node.localName == 'h1' && title == null && current == null) {
184+
title = headerText;
185+
continue;
186+
}
187+
}
188+
189+
// collect nodes for description (before any version) or current release
190+
nodes.add(node);
191+
}
192+
193+
// complete last section
194+
finalizeNodes();
195+
196+
return Changelog(
197+
title: title,
198+
description: description,
199+
releases: releases,
200+
);
201+
}
202+
203+
String _extractText(html.Node node) {
204+
if (node is html.Text) {
205+
return node.text;
206+
} else if (node is html.Element) {
207+
return node.nodes.map(_extractText).join();
208+
} else {
209+
return node.text ?? '';
210+
}
211+
}
212+
213+
/// Parses the release header line or return `null` when no version part was recognized.
214+
///
215+
/// Handles some of the common formats:
216+
/// - `1.2.0`
217+
/// - `v1.2.0`
218+
/// - `[1.2.0] - 2025-07-14`
219+
/// - `unreleased`
220+
/// - `next release (...)`
221+
_ParsedHeader? _tryParseAsHeader(html.Element elem, String input) {
222+
final level = _acceptedHeaderTags.indexOf(elem.localName!);
223+
224+
final anchor = elem.attributes['id'];
225+
// special case: unreleased
226+
final inputLowerCase = input.toLowerCase().trim();
227+
final unreleasedTexts = ['unreleased', 'next release'];
228+
for (final unreleasedText in unreleasedTexts) {
229+
if (inputLowerCase == unreleasedText) {
230+
return _ParsedHeader(level, 'Unreleased', null, null, anchor, null);
231+
}
232+
if (inputLowerCase.startsWith('$unreleasedText ')) {
233+
String? label = input.substring(unreleasedText.length + 1).trim();
234+
if (label.isEmpty) {
235+
label = null;
236+
}
237+
return _ParsedHeader(level, 'Unreleased', label, null, anchor, null);
238+
}
239+
}
240+
241+
// extract version
242+
final versionPart = input.split(' ').firstWhereOrNull((e) => e.isNotEmpty);
243+
if (versionPart == null) {
244+
return null;
245+
}
246+
final version = _parseVersionPart(versionPart.trim());
247+
if (version == null) {
248+
return null;
249+
}
250+
251+
// rest of the release header
252+
String? label =
253+
input.substring(input.indexOf(versionPart) + versionPart.length).trim();
254+
if (label.startsWith('- ')) {
255+
label = label.substring(2).trim();
256+
}
257+
if (label.isEmpty) {
258+
label = null;
259+
}
260+
261+
DateTime? date;
262+
String? note;
263+
264+
if (label != null) {
265+
final parts = label.split(' ');
266+
date = _parseDatePart(parts[0].trim());
267+
if (date != null) {
268+
parts.removeAt(0);
269+
}
270+
271+
if (parts.isNotEmpty) {
272+
note = parts.join(' ');
273+
}
274+
}
275+
276+
return _ParsedHeader(level, version, label, date,
277+
anchor ?? version.replaceAll('.', ''), note);
278+
}
279+
280+
/// Parses the version part of a release title.
281+
///
282+
/// Returns the extracted version string, or null if no version was recognized.
283+
String? _parseVersionPart(String input) {
284+
// remove brackets or 'v' if present
285+
if (input.startsWith('[') && input.endsWith(']')) {
286+
input = input.substring(1, input.length - 1).trim();
287+
}
288+
if (input.startsWith('v')) {
289+
input = input.substring(1).trim();
290+
}
291+
292+
// sanity check if it's a valid semantic version
293+
try {
294+
final version = Version.parse(input);
295+
if (!version.isEmpty && !version.isAny) {
296+
return input;
297+
}
298+
} on FormatException catch (_) {}
299+
300+
return null;
301+
}
302+
303+
final _yyyymmddDateFormats = <RegExp>[
304+
RegExp(r'^(\d{4})-(\d{2})-(\d{2})$'), // 2025-07-10
305+
RegExp(r'^(\d{4})/(\d{2})/(\d{2})$'), // 2025/07/10
306+
];
307+
308+
/// Parses the date part of a release title.
309+
///
310+
/// Returns the parsed date or null if no date was recognized.
311+
///
312+
/// Note: currently only date formats that start with a year are recognized.
313+
DateTime? _parseDatePart(String input) {
314+
if (input.startsWith('(') && input.endsWith(')')) {
315+
input = input.substring(1, input.length - 1);
316+
}
317+
for (final format in _yyyymmddDateFormats) {
318+
final match = format.matchAsPrefix(input);
319+
if (match == null) continue;
320+
final year = int.parse(match.group(1)!);
321+
final month = int.parse(match.group(2)!);
322+
final day = int.parse(match.group(3)!);
323+
final date = DateTime(year, month, day);
324+
// sanity check for overflow dates
325+
if (date.year != year || date.month != month || date.day != day) {
326+
continue;
327+
}
328+
return date;
329+
}
330+
331+
return null;
332+
}
333+
}
334+
335+
class _ParsedHeader {
336+
final int level;
337+
final String version;
338+
final String? label;
339+
final DateTime? date;
340+
final String? anchor;
341+
final String? note;
342+
343+
_ParsedHeader(
344+
this.level, this.version, this.label, this.date, this.anchor, this.note);
345+
}

0 commit comments

Comments
 (0)