-
Notifications
You must be signed in to change notification settings - Fork 164
New changelog parser + typed representation of the changelog structure. #8856
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,345 @@ | ||
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file | ||
// for details. All rights reserved. Use of this source code is governed by a | ||
// BSD-style license that can be found in the LICENSE file. | ||
|
||
/// The library provides support for parsing `CHANGELOG.md` files formatted | ||
/// with Markdown. It converts the file's content into a structured [Changelog] | ||
/// object, which encapsulates individual [Release] entries. | ||
/// The [ChangelogParser] accommodates various formatting styles. It can | ||
/// effectively parse changelogs with inconsistent header levels or those | ||
/// that include additional information beyond just the version number in | ||
/// the release header. | ||
/// | ||
/// The parser is designed to support the widely adopted "Keep a Changelog" | ||
/// format (see https://keepachangelog.com/en/1.1.0/ for details). | ||
/// Additionally, it has been tested with a diverse set of changelog files | ||
/// available a part of the packages on https://pub.dev/. | ||
library; | ||
|
||
import 'package:collection/collection.dart'; | ||
import 'package:html/dom.dart' as html; | ||
import 'package:html/parser.dart' as html_parser; | ||
import 'package:markdown/markdown.dart' as m; | ||
import 'package:pub_semver/pub_semver.dart'; | ||
|
||
/// Represents the entire changelog, containing a list of releases. | ||
class Changelog { | ||
/// The main title of the changelog (e.g., 'Changelog'). | ||
final String? title; | ||
|
||
/// An optional introductory description for the changelog. | ||
final Content? description; | ||
|
||
/// A list of releases, typically in reverse chronological order. | ||
final List<Release> releases; | ||
|
||
Changelog({ | ||
this.title, | ||
this.description, | ||
required this.releases, | ||
}); | ||
} | ||
|
||
/// Represents a single version entry in the changelog, | ||
/// such as '[1.2.0] - 2025-07-10' or the 'Unreleased' section. | ||
class Release { | ||
/// The version string or section title (e.g., '1.2.0', 'Unreleased'). | ||
final String version; | ||
|
||
/// The HTML anchor value (`id` attribute). | ||
final String? anchor; | ||
|
||
/// The text of the header after the version. | ||
final String? label; | ||
|
||
/// The release date for this version. | ||
/// `null` if it's the 'Unreleased' section or is missing | ||
final DateTime? date; | ||
|
||
/// The additional text of the label, without the [date] part (if present). | ||
final String? note; | ||
|
||
/// The content of the release. | ||
final Content content; | ||
|
||
Release({ | ||
required this.version, | ||
this.anchor, | ||
this.label, | ||
this.date, | ||
this.note, | ||
required this.content, | ||
}); | ||
} | ||
|
||
/// Describes an arbitrary content (e.g. a changelog description or inside an entry). | ||
isoos marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// | ||
/// If the content is specified as parsed HTML nodes, the class will store it as-is, | ||
/// and serialize them only when needed. | ||
class Content { | ||
String? _asText; | ||
html.Node? _asNode; | ||
|
||
Content.fromHtmlText(String text) : _asText = text; | ||
Content.fromParsedHtml(List<html.Node> nodes) { | ||
_asNode = html.DocumentFragment(); | ||
for (final node in nodes) { | ||
_asNode!.append(node); | ||
} | ||
} | ||
|
||
late final asHtmlText = () { | ||
if (_asText != null) return _asText!; | ||
final root = _asNode is html.DocumentFragment | ||
? _asNode as html.DocumentFragment | ||
: html.DocumentFragment() | ||
..append(_asNode!); | ||
return root.outerHtml; | ||
}(); | ||
|
||
late final asHtmlNode = () { | ||
if (_asNode != null) return _asNode!; | ||
return html_parser.parseFragment(_asText!); | ||
}(); | ||
} | ||
|
||
/// Parses the changelog with pre-configured options. | ||
class ChangelogParser { | ||
final _acceptedHeaderTags = ['h1', 'h2', 'h3', 'h4']; | ||
final bool _strictLevels; | ||
final int _partOfLevelThreshold; | ||
|
||
ChangelogParser({ | ||
bool strictLevels = false, | ||
int partOfLevelThreshold = 2, | ||
}) : _strictLevels = strictLevels, | ||
_partOfLevelThreshold = partOfLevelThreshold; | ||
|
||
Changelog parseMarkdown(String input) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the input get sanitized in this pipeline, or does that happen later? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sanitization is the last step, anything we do here, will get sanitized at the end. Note: since the |
||
final nodes = | ||
m.Document(extensionSet: m.ExtensionSet.gitHubWeb).parse(input); | ||
final rawHtml = m.renderToHtml(nodes); | ||
final root = html_parser.parseFragment(rawHtml); | ||
return parseHtmlNodes(root.nodes); | ||
} | ||
|
||
/// Parses markdown nodes into a [Changelog] structure. | ||
Changelog parseHtmlNodes(List<html.Node> input) { | ||
String? title; | ||
Content? description; | ||
final releases = <Release>[]; | ||
|
||
String? firstReleaseLocalName; | ||
_ParsedHeader? current; | ||
|
||
var nodes = <html.Node>[]; | ||
void finalizeNodes() { | ||
if (current == null) { | ||
description = Content.fromParsedHtml(nodes); | ||
if (description!.asHtmlText.trim().isEmpty) { | ||
description = null; | ||
} | ||
} else { | ||
releases.add(Release( | ||
version: current.version, | ||
anchor: current.anchor, | ||
label: current.label, | ||
date: current.date, | ||
note: current.note, | ||
content: Content.fromParsedHtml(nodes), | ||
)); | ||
} | ||
nodes = <html.Node>[]; | ||
} | ||
|
||
for (final node in [...input]) { | ||
if (node is html.Element && | ||
_acceptedHeaderTags.contains(node.localName)) { | ||
if (_strictLevels && | ||
firstReleaseLocalName != null && | ||
node.localName != firstReleaseLocalName) { | ||
continue; | ||
} | ||
final headerText = _extractText(node).trim(); | ||
|
||
// Check if this looks like a version header first | ||
final parsed = _tryParseAsHeader(node, headerText); | ||
|
||
final isNewVersion = parsed != null && | ||
releases.every((r) => r.version != parsed.version) && | ||
current?.version != parsed.version; | ||
final isPartOfCurrent = current != null && | ||
parsed != null && | ||
current.level + _partOfLevelThreshold <= parsed.level; | ||
if (isNewVersion && !isPartOfCurrent) { | ||
firstReleaseLocalName ??= node.localName!; | ||
finalizeNodes(); | ||
current = parsed; | ||
continue; | ||
} | ||
|
||
// only consider as title if it's h1 and we haven't found any versions yet | ||
if (node.localName == 'h1' && title == null && current == null) { | ||
title = headerText; | ||
continue; | ||
} | ||
} | ||
|
||
// collect nodes for description (before any version) or current release | ||
nodes.add(node); | ||
} | ||
|
||
// complete last section | ||
finalizeNodes(); | ||
|
||
return Changelog( | ||
title: title, | ||
description: description, | ||
releases: releases, | ||
); | ||
} | ||
|
||
String _extractText(html.Node node) { | ||
if (node is html.Text) { | ||
return node.text; | ||
} else if (node is html.Element) { | ||
return node.nodes.map(_extractText).join(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I worry that by doing recursive calls here we are subject to crash by stack-overflow... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, it is probably a not really necessary item here, switching to always do |
||
} else { | ||
return node.text ?? ''; | ||
} | ||
} | ||
|
||
/// Parses the release header line or return `null` when no version part was recognized. | ||
/// | ||
/// Handles some of the common formats: | ||
/// - `1.2.0` | ||
/// - `v1.2.0` | ||
/// - `[1.2.0] - 2025-07-14` | ||
/// - `unreleased` | ||
/// - `next release (...)` | ||
_ParsedHeader? _tryParseAsHeader(html.Element elem, String input) { | ||
final level = _acceptedHeaderTags.indexOf(elem.localName!); | ||
|
||
final anchor = elem.attributes['id']; | ||
// special case: unreleased | ||
final inputLowerCase = input.toLowerCase().trim(); | ||
final unreleasedTexts = ['unreleased', 'next release']; | ||
for (final unreleasedText in unreleasedTexts) { | ||
if (inputLowerCase == unreleasedText) { | ||
return _ParsedHeader(level, 'Unreleased', null, null, anchor, null); | ||
} | ||
if (inputLowerCase.startsWith('$unreleasedText ')) { | ||
String? label = input.substring(unreleasedText.length + 1).trim(); | ||
if (label.isEmpty) { | ||
label = null; | ||
} | ||
return _ParsedHeader(level, 'Unreleased', label, null, anchor, null); | ||
} | ||
} | ||
|
||
// extract version | ||
final versionPart = input.split(' ').firstWhereOrNull((e) => e.isNotEmpty); | ||
if (versionPart == null) { | ||
return null; | ||
} | ||
final version = _parseVersionPart(versionPart.trim()); | ||
if (version == null) { | ||
return null; | ||
} | ||
|
||
// rest of the release header | ||
String? label = | ||
input.substring(input.indexOf(versionPart) + versionPart.length).trim(); | ||
if (label.startsWith('- ')) { | ||
label = label.substring(2).trim(); | ||
} | ||
if (label.isEmpty) { | ||
label = null; | ||
} | ||
|
||
DateTime? date; | ||
String? note; | ||
|
||
if (label != null) { | ||
final parts = label.split(' '); | ||
date = _parseDatePart(parts[0].trim()); | ||
if (date != null) { | ||
parts.removeAt(0); | ||
} | ||
|
||
if (parts.isNotEmpty) { | ||
note = parts.join(' '); | ||
} | ||
} | ||
|
||
return _ParsedHeader(level, version, label, date, | ||
anchor ?? version.replaceAll('.', ''), note); | ||
} | ||
|
||
/// Parses the version part of a release title. | ||
/// | ||
/// Returns the extracted version string, or null if no version was recognized. | ||
String? _parseVersionPart(String input) { | ||
// remove brackets or 'v' if present | ||
if (input.startsWith('[') && input.endsWith(']')) { | ||
input = input.substring(1, input.length - 1).trim(); | ||
} | ||
if (input.startsWith('v')) { | ||
input = input.substring(1).trim(); | ||
} | ||
|
||
// sanity check if it's a valid semantic version | ||
try { | ||
final version = Version.parse(input); | ||
if (!version.isEmpty && !version.isAny) { | ||
return input; | ||
} | ||
} on FormatException catch (_) {} | ||
|
||
return null; | ||
} | ||
|
||
final _yyyymmddDateFormats = <RegExp>[ | ||
RegExp(r'^(\d{4})-(\d{2})-(\d{2})$'), // 2025-07-10 | ||
RegExp(r'^(\d{4})/(\d{2})/(\d{2})$'), // 2025/07/10 | ||
]; | ||
|
||
/// Parses the date part of a release title. | ||
/// | ||
/// Returns the parsed date or null if no date was recognized. | ||
/// | ||
/// Note: currently only date formats that start with a year are recognized. | ||
DateTime? _parseDatePart(String input) { | ||
if (input.startsWith('(') && input.endsWith(')')) { | ||
input = input.substring(1, input.length - 1); | ||
} | ||
for (final format in _yyyymmddDateFormats) { | ||
final match = format.matchAsPrefix(input); | ||
if (match == null) continue; | ||
final year = int.parse(match.group(1)!); | ||
final month = int.parse(match.group(2)!); | ||
final day = int.parse(match.group(3)!); | ||
final date = DateTime(year, month, day); | ||
// sanity check for overflow dates | ||
if (date.year != year || date.month != month || date.day != day) { | ||
continue; | ||
} | ||
return date; | ||
} | ||
|
||
return null; | ||
} | ||
} | ||
|
||
class _ParsedHeader { | ||
final int level; | ||
final String version; | ||
final String? label; | ||
final DateTime? date; | ||
final String? anchor; | ||
final String? note; | ||
|
||
_ParsedHeader( | ||
this.level, this.version, this.label, this.date, this.anchor, this.note); | ||
} |
Uh oh!
There was an error while loading. Please reload this page.