|
| 1 | +// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file |
| 2 | +// for details. All rights reserved. Use of this source code is governed by a |
| 3 | +// BSD-style license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +/// The library provides support for parsing `CHANGELOG.md` files formatted |
| 6 | +/// with Markdown. It converts the file's content into a structured [Changelog] |
| 7 | +/// object, which encapsulates individual [Release] entries. |
| 8 | +
|
| 9 | +/// The [ChangelogParser] accommodates various formatting styles. It can |
| 10 | +/// effectively parse changelogs with inconsistent header levels or those |
| 11 | +/// that include additional information beyond just the version number in |
| 12 | +/// the release header. |
| 13 | +/// |
| 14 | +/// The parser is designed to support the widely adopted "Keep a Changelog" |
| 15 | +/// format (see https://keepachangelog.com/en/1.1.0/ for details). |
| 16 | +/// Additionally, it has been tested with a diverse set of changelog files |
| 17 | +/// available as part of the packages on https://pub.dev/. |
| 18 | +library; |
| 19 | + |
| 20 | +import 'package:collection/collection.dart'; |
| 21 | +import 'package:html/dom.dart' as html; |
| 22 | +import 'package:html/parser.dart' as html_parser; |
| 23 | +import 'package:pub_semver/pub_semver.dart'; |
| 24 | + |
| 25 | +/// Represents the entire changelog, containing a list of releases. |
| 26 | +class Changelog { |
| 27 | + /// The main title of the changelog (e.g., 'Changelog'). |
| 28 | + final String? title; |
| 29 | + |
| 30 | + /// An optional introductory description for the changelog. |
| 31 | + final Content? description; |
| 32 | + |
| 33 | + /// A list of releases, typically in reverse chronological order. |
| 34 | + final List<Release> releases; |
| 35 | + |
| 36 | + Changelog({ |
| 37 | + this.title, |
| 38 | + this.description, |
| 39 | + required this.releases, |
| 40 | + }); |
| 41 | +} |
| 42 | + |
| 43 | +/// Represents a single version entry in the changelog, |
| 44 | +/// such as '[1.2.0] - 2025-07-10' or the 'Unreleased' section. |
| 45 | +class Release { |
| 46 | + /// The version string or section title (e.g., '1.2.0', 'Unreleased'). |
| 47 | + final String version; |
| 48 | + |
| 49 | + /// The HTML anchor value (`id` attribute). |
| 50 | + final String? anchor; |
| 51 | + |
| 52 | + /// The text of the header after the version. |
| 53 | + final String? label; |
| 54 | + |
| 55 | + /// The release date for this version. |
| 56 | + /// `null` if it's the 'Unreleased' section or is missing |
| 57 | + final DateTime? date; |
| 58 | + |
| 59 | + /// The additional text of the label, without the [date] part (if present). |
| 60 | + final String? note; |
| 61 | + |
| 62 | + /// The content of the release. |
| 63 | + final Content content; |
| 64 | + |
| 65 | + Release({ |
| 66 | + required this.version, |
| 67 | + this.anchor, |
| 68 | + this.label, |
| 69 | + this.date, |
| 70 | + this.note, |
| 71 | + required this.content, |
| 72 | + }); |
| 73 | +} |
| 74 | + |
| 75 | +/// Describes an arbitrary piece of content (e.g. the description of a single version). |
| 76 | +/// |
| 77 | +/// If the content is specified as parsed HTML nodes, the class will store it as-is, |
| 78 | +/// and serialize them only when needed. |
| 79 | +class Content { |
| 80 | + String? _asText; |
| 81 | + html.Node? _asNode; |
| 82 | + |
| 83 | + Content.fromHtmlText(String text) : _asText = text; |
| 84 | + Content.fromParsedHtml(List<html.Node> nodes) { |
| 85 | + _asNode = html.DocumentFragment(); |
| 86 | + for (final node in nodes) { |
| 87 | + _asNode!.append(node); |
| 88 | + } |
| 89 | + } |
| 90 | + |
| 91 | + late final asHtmlText = () { |
| 92 | + if (_asText != null) return _asText!; |
| 93 | + final root = _asNode is html.DocumentFragment |
| 94 | + ? _asNode as html.DocumentFragment |
| 95 | + : html.DocumentFragment() |
| 96 | + ..append(_asNode!); |
| 97 | + return root.outerHtml; |
| 98 | + }(); |
| 99 | + |
| 100 | + late final asHtmlNode = () { |
| 101 | + if (_asNode != null) return _asNode!; |
| 102 | + return html_parser.parseFragment(_asText!); |
| 103 | + }(); |
| 104 | +} |
| 105 | + |
| 106 | +/// Parses the changelog with pre-configured options. |
| 107 | +class ChangelogParser { |
| 108 | + final _acceptedHeaderTags = ['h1', 'h2', 'h3', 'h4']; |
| 109 | + final bool _strictLevels; |
| 110 | + final int _partOfLevelThreshold; |
| 111 | + |
| 112 | + ChangelogParser({ |
| 113 | + bool strictLevels = false, |
| 114 | + int partOfLevelThreshold = 2, |
| 115 | + }) : _strictLevels = strictLevels, |
| 116 | + _partOfLevelThreshold = partOfLevelThreshold; |
| 117 | + |
| 118 | + /// Parses markdown nodes into a [Changelog] structure. |
| 119 | + Changelog parseHtmlNodes(List<html.Node> input) { |
| 120 | + String? title; |
| 121 | + Content? description; |
| 122 | + final releases = <Release>[]; |
| 123 | + |
| 124 | + String? firstReleaseLocalName; |
| 125 | + _ParsedHeader? current; |
| 126 | + |
| 127 | + var nodes = <html.Node>[]; |
| 128 | + void finalizeNodes() { |
| 129 | + if (current == null) { |
| 130 | + description = Content.fromParsedHtml(nodes); |
| 131 | + if (description!.asHtmlText.trim().isEmpty) { |
| 132 | + description = null; |
| 133 | + } |
| 134 | + } else { |
| 135 | + releases.add(Release( |
| 136 | + version: current.version, |
| 137 | + anchor: current.anchor, |
| 138 | + label: current.label, |
| 139 | + date: current.date, |
| 140 | + note: current.note, |
| 141 | + content: Content.fromParsedHtml(nodes), |
| 142 | + )); |
| 143 | + } |
| 144 | + nodes = <html.Node>[]; |
| 145 | + } |
| 146 | + |
| 147 | + for (final node in [...input]) { |
| 148 | + if (node is html.Element && |
| 149 | + _acceptedHeaderTags.contains(node.localName)) { |
| 150 | + if (_strictLevels && |
| 151 | + firstReleaseLocalName != null && |
| 152 | + node.localName != firstReleaseLocalName) { |
| 153 | + continue; |
| 154 | + } |
| 155 | + final headerText = node.text.trim(); |
| 156 | + |
| 157 | + // Check if this looks like a version header first |
| 158 | + final parsed = _tryParseAsHeader(node, headerText); |
| 159 | + |
| 160 | + final isNewVersion = parsed != null && |
| 161 | + releases.every((r) => r.version != parsed.version) && |
| 162 | + current?.version != parsed.version; |
| 163 | + final isPartOfCurrent = current != null && |
| 164 | + parsed != null && |
| 165 | + current.level + _partOfLevelThreshold <= parsed.level; |
| 166 | + if (isNewVersion && !isPartOfCurrent) { |
| 167 | + firstReleaseLocalName ??= node.localName!; |
| 168 | + finalizeNodes(); |
| 169 | + current = parsed; |
| 170 | + continue; |
| 171 | + } |
| 172 | + |
| 173 | + // only consider as title if it's h1 and we haven't found any versions yet |
| 174 | + if (node.localName == 'h1' && title == null && current == null) { |
| 175 | + title = headerText; |
| 176 | + continue; |
| 177 | + } |
| 178 | + } |
| 179 | + |
| 180 | + // collect nodes for description (before any version) or current release |
| 181 | + nodes.add(node); |
| 182 | + } |
| 183 | + |
| 184 | + // complete last section |
| 185 | + finalizeNodes(); |
| 186 | + |
| 187 | + return Changelog( |
| 188 | + title: title, |
| 189 | + description: description, |
| 190 | + releases: releases, |
| 191 | + ); |
| 192 | + } |
| 193 | + |
| 194 | + /// Parses the release header line or return `null` when no version part was recognized. |
| 195 | + /// |
| 196 | + /// Handles some of the common formats: |
| 197 | + /// - `1.2.0` |
| 198 | + /// - `v1.2.0` |
| 199 | + /// - `[1.2.0] - 2025-07-14` |
| 200 | + /// - `unreleased` |
| 201 | + /// - `next release (...)` |
| 202 | + _ParsedHeader? _tryParseAsHeader(html.Element elem, String input) { |
| 203 | + final level = _acceptedHeaderTags.indexOf(elem.localName!); |
| 204 | + |
| 205 | + final anchor = elem.attributes['id']; |
| 206 | + // special case: unreleased |
| 207 | + final inputLowerCase = input.toLowerCase().trim(); |
| 208 | + final unreleasedTexts = ['unreleased', 'next release']; |
| 209 | + for (final unreleasedText in unreleasedTexts) { |
| 210 | + if (inputLowerCase == unreleasedText) { |
| 211 | + return _ParsedHeader(level, 'Unreleased', null, null, anchor, null); |
| 212 | + } |
| 213 | + if (inputLowerCase.startsWith('$unreleasedText ')) { |
| 214 | + String? label = input.substring(unreleasedText.length + 1).trim(); |
| 215 | + if (label.isEmpty) { |
| 216 | + label = null; |
| 217 | + } |
| 218 | + return _ParsedHeader(level, 'Unreleased', label, null, anchor, null); |
| 219 | + } |
| 220 | + } |
| 221 | + |
| 222 | + // extract version |
| 223 | + final versionPart = input.split(' ').firstWhereOrNull((e) => e.isNotEmpty); |
| 224 | + if (versionPart == null) { |
| 225 | + return null; |
| 226 | + } |
| 227 | + final version = _parseVersionPart(versionPart.trim()); |
| 228 | + if (version == null) { |
| 229 | + return null; |
| 230 | + } |
| 231 | + |
| 232 | + // rest of the release header |
| 233 | + String? label = |
| 234 | + input.substring(input.indexOf(versionPart) + versionPart.length).trim(); |
| 235 | + if (label.startsWith('- ')) { |
| 236 | + label = label.substring(2).trim(); |
| 237 | + } |
| 238 | + if (label.isEmpty) { |
| 239 | + label = null; |
| 240 | + } |
| 241 | + |
| 242 | + DateTime? date; |
| 243 | + String? note; |
| 244 | + |
| 245 | + if (label != null) { |
| 246 | + final parts = label.split(' '); |
| 247 | + date = _parseDatePart(parts[0].trim()); |
| 248 | + if (date != null) { |
| 249 | + parts.removeAt(0); |
| 250 | + } |
| 251 | + |
| 252 | + if (parts.isNotEmpty) { |
| 253 | + note = parts.join(' '); |
| 254 | + } |
| 255 | + } |
| 256 | + |
| 257 | + return _ParsedHeader(level, version, label, date, |
| 258 | + anchor ?? version.replaceAll('.', ''), note); |
| 259 | + } |
| 260 | + |
| 261 | + /// Parses the version part of a release title. |
| 262 | + /// |
| 263 | + /// Returns the extracted version string, or null if no version was recognized. |
| 264 | + String? _parseVersionPart(String input) { |
| 265 | + // remove brackets or 'v' if present |
| 266 | + if (input.startsWith('[') && input.endsWith(']')) { |
| 267 | + input = input.substring(1, input.length - 1).trim(); |
| 268 | + } |
| 269 | + if (input.startsWith('v')) { |
| 270 | + input = input.substring(1).trim(); |
| 271 | + } |
| 272 | + |
| 273 | + // sanity check if it's a valid semantic version |
| 274 | + try { |
| 275 | + final version = Version.parse(input); |
| 276 | + if (!version.isEmpty && !version.isAny) { |
| 277 | + return input; |
| 278 | + } |
| 279 | + } on FormatException catch (_) {} |
| 280 | + |
| 281 | + return null; |
| 282 | + } |
| 283 | + |
| 284 | + final _yyyymmddDateFormats = <RegExp>[ |
| 285 | + RegExp(r'^(\d{4})-(\d{2})-(\d{2})$'), // 2025-07-10 |
| 286 | + RegExp(r'^(\d{4})/(\d{2})/(\d{2})$'), // 2025/07/10 |
| 287 | + ]; |
| 288 | + |
| 289 | + /// Parses the date part of a release title. |
| 290 | + /// |
| 291 | + /// Returns the parsed date or null if no date was recognized. |
| 292 | + /// |
| 293 | + /// Note: currently only date formats that start with a year are recognized. |
| 294 | + DateTime? _parseDatePart(String input) { |
| 295 | + if (input.startsWith('(') && input.endsWith(')')) { |
| 296 | + input = input.substring(1, input.length - 1); |
| 297 | + } |
| 298 | + for (final format in _yyyymmddDateFormats) { |
| 299 | + final match = format.matchAsPrefix(input); |
| 300 | + if (match == null) continue; |
| 301 | + final year = int.parse(match.group(1)!); |
| 302 | + final month = int.parse(match.group(2)!); |
| 303 | + final day = int.parse(match.group(3)!); |
| 304 | + final date = DateTime(year, month, day); |
| 305 | + // sanity check for overflow dates |
| 306 | + if (date.year != year || date.month != month || date.day != day) { |
| 307 | + continue; |
| 308 | + } |
| 309 | + return date; |
| 310 | + } |
| 311 | + |
| 312 | + return null; |
| 313 | + } |
| 314 | +} |
| 315 | + |
| 316 | +class _ParsedHeader { |
| 317 | + final int level; |
| 318 | + final String version; |
| 319 | + final String? label; |
| 320 | + final DateTime? date; |
| 321 | + final String? anchor; |
| 322 | + final String? note; |
| 323 | + |
| 324 | + _ParsedHeader( |
| 325 | + this.level, this.version, this.label, this.date, this.anchor, this.note); |
| 326 | +} |
0 commit comments