Skip to content

Commit 26c990a

Browse files
authored
New changelog parser + typed representation of the changelog structure. (#8856)
1 parent cb0c03b commit 26c990a

File tree

4 files changed

+901
-67
lines changed

4 files changed

+901
-67
lines changed

app/lib/shared/changelog.dart

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
/// The library provides support for parsing `CHANGELOG.md` files formatted
6+
/// with Markdown. It converts the file's content into a structured [Changelog]
7+
/// object, which encapsulates individual [Release] entries.
8+
9+
/// The [ChangelogParser] accommodates various formatting styles. It can
10+
/// effectively parse changelogs with inconsistent header levels or those
11+
/// that include additional information beyond just the version number in
12+
/// the release header.
13+
///
14+
/// The parser is designed to support the widely adopted "Keep a Changelog"
15+
/// format (see https://keepachangelog.com/en/1.1.0/ for details).
16+
/// Additionally, it has been tested with a diverse set of changelog files
17+
/// available as part of the packages on https://pub.dev/.
18+
library;
19+
20+
import 'package:collection/collection.dart';
21+
import 'package:html/dom.dart' as html;
22+
import 'package:html/parser.dart' as html_parser;
23+
import 'package:pub_semver/pub_semver.dart';
24+
25+
/// Represents the entire changelog, containing a list of releases.
26+
class Changelog {
27+
/// The main title of the changelog (e.g., 'Changelog').
28+
final String? title;
29+
30+
/// An optional introductory description for the changelog.
31+
final Content? description;
32+
33+
/// A list of releases, typically in reverse chronological order.
34+
final List<Release> releases;
35+
36+
Changelog({
37+
this.title,
38+
this.description,
39+
required this.releases,
40+
});
41+
}
42+
43+
/// Represents a single version entry in the changelog,
44+
/// such as '[1.2.0] - 2025-07-10' or the 'Unreleased' section.
45+
class Release {
46+
/// The version string or section title (e.g., '1.2.0', 'Unreleased').
47+
final String version;
48+
49+
/// The HTML anchor value (`id` attribute).
50+
final String? anchor;
51+
52+
/// The text of the header after the version.
53+
final String? label;
54+
55+
/// The release date for this version.
56+
/// `null` if it's the 'Unreleased' section or is missing
57+
final DateTime? date;
58+
59+
/// The additional text of the label, without the [date] part (if present).
60+
final String? note;
61+
62+
/// The content of the release.
63+
final Content content;
64+
65+
Release({
66+
required this.version,
67+
this.anchor,
68+
this.label,
69+
this.date,
70+
this.note,
71+
required this.content,
72+
});
73+
}
74+
75+
/// Describes an arbitrary piece of content (e.g. the description of a single version).
76+
///
77+
/// If the content is specified as parsed HTML nodes, the class will store it as-is,
78+
/// and serialize them only when needed.
79+
class Content {
80+
String? _asText;
81+
html.Node? _asNode;
82+
83+
Content.fromHtmlText(String text) : _asText = text;
84+
Content.fromParsedHtml(List<html.Node> nodes) {
85+
_asNode = html.DocumentFragment();
86+
for (final node in nodes) {
87+
_asNode!.append(node);
88+
}
89+
}
90+
91+
late final asHtmlText = () {
92+
if (_asText != null) return _asText!;
93+
final root = _asNode is html.DocumentFragment
94+
? _asNode as html.DocumentFragment
95+
: html.DocumentFragment()
96+
..append(_asNode!);
97+
return root.outerHtml;
98+
}();
99+
100+
late final asHtmlNode = () {
101+
if (_asNode != null) return _asNode!;
102+
return html_parser.parseFragment(_asText!);
103+
}();
104+
}
105+
106+
/// Parses the changelog with pre-configured options.
107+
class ChangelogParser {
108+
final _acceptedHeaderTags = ['h1', 'h2', 'h3', 'h4'];
109+
final bool _strictLevels;
110+
final int _partOfLevelThreshold;
111+
112+
ChangelogParser({
113+
bool strictLevels = false,
114+
int partOfLevelThreshold = 2,
115+
}) : _strictLevels = strictLevels,
116+
_partOfLevelThreshold = partOfLevelThreshold;
117+
118+
/// Parses markdown nodes into a [Changelog] structure.
119+
Changelog parseHtmlNodes(List<html.Node> input) {
120+
String? title;
121+
Content? description;
122+
final releases = <Release>[];
123+
124+
String? firstReleaseLocalName;
125+
_ParsedHeader? current;
126+
127+
var nodes = <html.Node>[];
128+
void finalizeNodes() {
129+
if (current == null) {
130+
description = Content.fromParsedHtml(nodes);
131+
if (description!.asHtmlText.trim().isEmpty) {
132+
description = null;
133+
}
134+
} else {
135+
releases.add(Release(
136+
version: current.version,
137+
anchor: current.anchor,
138+
label: current.label,
139+
date: current.date,
140+
note: current.note,
141+
content: Content.fromParsedHtml(nodes),
142+
));
143+
}
144+
nodes = <html.Node>[];
145+
}
146+
147+
for (final node in [...input]) {
148+
if (node is html.Element &&
149+
_acceptedHeaderTags.contains(node.localName)) {
150+
if (_strictLevels &&
151+
firstReleaseLocalName != null &&
152+
node.localName != firstReleaseLocalName) {
153+
continue;
154+
}
155+
final headerText = node.text.trim();
156+
157+
// Check if this looks like a version header first
158+
final parsed = _tryParseAsHeader(node, headerText);
159+
160+
final isNewVersion = parsed != null &&
161+
releases.every((r) => r.version != parsed.version) &&
162+
current?.version != parsed.version;
163+
final isPartOfCurrent = current != null &&
164+
parsed != null &&
165+
current.level + _partOfLevelThreshold <= parsed.level;
166+
if (isNewVersion && !isPartOfCurrent) {
167+
firstReleaseLocalName ??= node.localName!;
168+
finalizeNodes();
169+
current = parsed;
170+
continue;
171+
}
172+
173+
// only consider as title if it's h1 and we haven't found any versions yet
174+
if (node.localName == 'h1' && title == null && current == null) {
175+
title = headerText;
176+
continue;
177+
}
178+
}
179+
180+
// collect nodes for description (before any version) or current release
181+
nodes.add(node);
182+
}
183+
184+
// complete last section
185+
finalizeNodes();
186+
187+
return Changelog(
188+
title: title,
189+
description: description,
190+
releases: releases,
191+
);
192+
}
193+
194+
/// Parses the release header line or return `null` when no version part was recognized.
195+
///
196+
/// Handles some of the common formats:
197+
/// - `1.2.0`
198+
/// - `v1.2.0`
199+
/// - `[1.2.0] - 2025-07-14`
200+
/// - `unreleased`
201+
/// - `next release (...)`
202+
_ParsedHeader? _tryParseAsHeader(html.Element elem, String input) {
203+
final level = _acceptedHeaderTags.indexOf(elem.localName!);
204+
205+
final anchor = elem.attributes['id'];
206+
// special case: unreleased
207+
final inputLowerCase = input.toLowerCase().trim();
208+
final unreleasedTexts = ['unreleased', 'next release'];
209+
for (final unreleasedText in unreleasedTexts) {
210+
if (inputLowerCase == unreleasedText) {
211+
return _ParsedHeader(level, 'Unreleased', null, null, anchor, null);
212+
}
213+
if (inputLowerCase.startsWith('$unreleasedText ')) {
214+
String? label = input.substring(unreleasedText.length + 1).trim();
215+
if (label.isEmpty) {
216+
label = null;
217+
}
218+
return _ParsedHeader(level, 'Unreleased', label, null, anchor, null);
219+
}
220+
}
221+
222+
// extract version
223+
final versionPart = input.split(' ').firstWhereOrNull((e) => e.isNotEmpty);
224+
if (versionPart == null) {
225+
return null;
226+
}
227+
final version = _parseVersionPart(versionPart.trim());
228+
if (version == null) {
229+
return null;
230+
}
231+
232+
// rest of the release header
233+
String? label =
234+
input.substring(input.indexOf(versionPart) + versionPart.length).trim();
235+
if (label.startsWith('- ')) {
236+
label = label.substring(2).trim();
237+
}
238+
if (label.isEmpty) {
239+
label = null;
240+
}
241+
242+
DateTime? date;
243+
String? note;
244+
245+
if (label != null) {
246+
final parts = label.split(' ');
247+
date = _parseDatePart(parts[0].trim());
248+
if (date != null) {
249+
parts.removeAt(0);
250+
}
251+
252+
if (parts.isNotEmpty) {
253+
note = parts.join(' ');
254+
}
255+
}
256+
257+
return _ParsedHeader(level, version, label, date,
258+
anchor ?? version.replaceAll('.', ''), note);
259+
}
260+
261+
/// Parses the version part of a release title.
262+
///
263+
/// Returns the extracted version string, or null if no version was recognized.
264+
String? _parseVersionPart(String input) {
265+
// remove brackets or 'v' if present
266+
if (input.startsWith('[') && input.endsWith(']')) {
267+
input = input.substring(1, input.length - 1).trim();
268+
}
269+
if (input.startsWith('v')) {
270+
input = input.substring(1).trim();
271+
}
272+
273+
// sanity check if it's a valid semantic version
274+
try {
275+
final version = Version.parse(input);
276+
if (!version.isEmpty && !version.isAny) {
277+
return input;
278+
}
279+
} on FormatException catch (_) {}
280+
281+
return null;
282+
}
283+
284+
final _yyyymmddDateFormats = <RegExp>[
285+
RegExp(r'^(\d{4})-(\d{2})-(\d{2})$'), // 2025-07-10
286+
RegExp(r'^(\d{4})/(\d{2})/(\d{2})$'), // 2025/07/10
287+
];
288+
289+
/// Parses the date part of a release title.
290+
///
291+
/// Returns the parsed date or null if no date was recognized.
292+
///
293+
/// Note: currently only date formats that start with a year are recognized.
294+
DateTime? _parseDatePart(String input) {
295+
if (input.startsWith('(') && input.endsWith(')')) {
296+
input = input.substring(1, input.length - 1);
297+
}
298+
for (final format in _yyyymmddDateFormats) {
299+
final match = format.matchAsPrefix(input);
300+
if (match == null) continue;
301+
final year = int.parse(match.group(1)!);
302+
final month = int.parse(match.group(2)!);
303+
final day = int.parse(match.group(3)!);
304+
final date = DateTime(year, month, day);
305+
// sanity check for overflow dates
306+
if (date.year != year || date.month != month || date.day != day) {
307+
continue;
308+
}
309+
return date;
310+
}
311+
312+
return null;
313+
}
314+
}
315+
316+
class _ParsedHeader {
317+
final int level;
318+
final String version;
319+
final String? label;
320+
final DateTime? date;
321+
final String? anchor;
322+
final String? note;
323+
324+
_ParsedHeader(
325+
this.level, this.version, this.label, this.date, this.anchor, this.note);
326+
}

0 commit comments

Comments
 (0)