Skip to content

Commit 76c5443

Browse files
authored
Fixing license coverage calculation by using LCS token sequence on the final match. (#1485)
1 parent 3f59d0e commit 76c5443

20 files changed

+163
-419
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
- Fixed small ambiguity in the order of license match evaluation.
44
- Resolve `analysis_options.yaml` dev dependency to expose transitive formatter options.
5+
- Fixed license coverage calculation.
56

67
## 0.22.23
78

lib/src/license.dart

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import 'package:meta/meta.dart';
1010
import 'package:path/path.dart' as p;
1111
import 'package:source_span/source_span.dart';
1212

13+
import 'license_detection/lcs.dart';
1314
import 'license_detection/license_detector.dart' hide License, Range;
1415
import 'model.dart';
1516

@@ -42,11 +43,6 @@ Future<List<License>> detectLicenseInFile(
4243
return licenses;
4344
}
4445

45-
/// Characters and expression that are accepted as non-relevant range gaps,
46-
/// and consecutive [Range] values can be merged if they have only these
47-
/// between.
48-
final _rangeMergeRegexp = RegExp(r'^[\s\.\-\(\)\*]+$');
49-
5046
/// Returns the license(s) detected from the [SPDX-corpus][1].
5147
///
5248
/// [1]: https://spdx.org/licenses/
@@ -62,34 +58,31 @@ Future<List<License>> detectLicenseInContent(
6258
}
6359

6460
List<int> buildCoverages(LicenseMatch match) {
65-
final ranges = <({int start, int end})>[];
66-
// ignore: invalid_use_of_visible_for_testing_member
67-
for (final token in match.tokens) {
61+
final common = longestCommonSubsequence(
62+
// ignore: invalid_use_of_visible_for_testing_member
63+
unknown: match.tokens,
64+
// ignore: invalid_use_of_visible_for_testing_member
65+
known: match.license.tokens,
66+
);
67+
68+
final ranges = <({int index, int start, int end})>[];
69+
for (final token in common) {
6870
// check to merge into last range
6971
final last = ranges.lastOrNull;
70-
if (last != null) {
71-
var mergeWithLast = false;
72-
if (last.end == token.span.start.offset) {
73-
mergeWithLast = true;
74-
} else {
75-
final textBetween = content.substring(
76-
last.end,
77-
token.span.start.offset,
78-
);
79-
if (_rangeMergeRegexp.matchAsPrefix(textBetween) != null) {
80-
mergeWithLast = true;
81-
}
82-
}
83-
if (mergeWithLast) {
84-
ranges[ranges.length - 1] = (
85-
start: last.start,
86-
end: token.span.end.offset,
87-
);
88-
continue;
89-
}
72+
if (last != null && last.index + 1 == token.unknown.index) {
73+
ranges[ranges.length - 1] = (
74+
index: token.unknown.index,
75+
start: last.start,
76+
end: token.unknown.span.end.offset,
77+
);
78+
continue;
9079
}
9180
// fallback: start a new range
92-
ranges.add((start: token.span.start.offset, end: token.span.end.offset));
81+
ranges.add((
82+
index: token.unknown.index,
83+
start: token.unknown.span.start.offset,
84+
end: token.unknown.span.end.offset,
85+
));
9386
}
9487
return ranges.expand((e) => [e.start, e.end]).toList();
9588
}

lib/src/license_detection/confidence.dart

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ LicenseMatch licenseMatch(
3535
"Can't compare the licenses due to different granularity",
3636
);
3737
}
38+
// TODO: use the token-based LCS algorithm from `lcs.dart` (https://github.com/dart-lang/pana/issues/1487)
3839
final diffs = getDiffs(
3940
unknownLicense.tokens,
4041
knownLicense.tokens,

lib/src/license_detection/lcs.dart

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
// ignore_for_file: invalid_use_of_visible_for_testing_member
6+
7+
import 'dart:math';
8+
9+
import 'license_detector.dart' show Token;
10+
11+
/// A pair representing matching tokens from unknown to known sequences.
12+
typedef TokenPair = ({Token unknown, Token known});
13+
14+
/// Calculates the longest common subsequence (LCS) between [unknown] and [known] token lists.
15+
///
16+
/// Tokens are compared using their normalized [Token.value]. Returns the longest
17+
/// common subsequence as a list of [TokenPair] objects containing references to
18+
/// both the unknown and known tokens that match.
19+
List<TokenPair> longestCommonSubsequence({
20+
required List<Token> unknown,
21+
required List<Token> known,
22+
}) {
23+
final maxPrefixLength = min(unknown.length, known.length);
24+
if (maxPrefixLength == 0) {
25+
return [];
26+
}
27+
28+
final matchedPrefix = <TokenPair>[];
29+
for (var i = 0; i < maxPrefixLength; i++) {
30+
final utoken = unknown[i];
31+
final ktoken = known[i];
32+
if (utoken.value != ktoken.value) {
33+
break;
34+
}
35+
matchedPrefix.add((unknown: utoken, known: ktoken));
36+
}
37+
38+
final maxPostfixLength = maxPrefixLength - matchedPrefix.length;
39+
final matchedPostfix = <TokenPair>[];
40+
var matchedPostfixCount = 0;
41+
for (var i = 1; i <= maxPostfixLength; i++) {
42+
final utoken = unknown[unknown.length - i];
43+
final ktoken = known[known.length - i];
44+
if (utoken.value != ktoken.value) {
45+
break;
46+
}
47+
matchedPostfixCount++;
48+
}
49+
for (var i = matchedPostfixCount; i >= 1; i--) {
50+
final utoken = unknown[unknown.length - i];
51+
final ktoken = known[known.length - i];
52+
matchedPostfix.add((unknown: utoken, known: ktoken));
53+
}
54+
55+
List<Token> trimList(List<Token> list) {
56+
if (matchedPrefix.isEmpty && matchedPostfix.isEmpty) {
57+
return list;
58+
}
59+
return list
60+
.skip(matchedPrefix.length)
61+
.take(list.length - matchedPrefix.length - matchedPostfix.length)
62+
.toList();
63+
}
64+
65+
return [
66+
...matchedPrefix,
67+
..._dynamicLcs(unknown: trimList(unknown), known: trimList(known)),
68+
...matchedPostfix,
69+
];
70+
}
71+
72+
Iterable<TokenPair> _dynamicLcs({
73+
required List<Token> unknown,
74+
required List<Token> known,
75+
}) {
76+
if (unknown.isEmpty || known.isEmpty) {
77+
return [];
78+
}
79+
final m = unknown.length;
80+
final n = known.length;
81+
82+
// table to store lengths of LCS
83+
final table = List.generate(m + 1, (_) => List.filled(n + 1, 0));
84+
for (var i = 1; i <= m; i++) {
85+
for (var j = 1; j <= n; j++) {
86+
if (unknown[i - 1].value == known[j - 1].value) {
87+
table[i][j] = table[i - 1][j - 1] + 1;
88+
} else {
89+
table[i][j] = max(table[i - 1][j], table[i][j - 1]);
90+
}
91+
}
92+
}
93+
94+
// backtrack to construct the sequence
95+
final matchesBackwards = <TokenPair>[];
96+
var i = m;
97+
var j = n;
98+
99+
while (i > 0 && j > 0) {
100+
if (unknown[i - 1].value == known[j - 1].value) {
101+
// building backwards, will need to reverse the list
102+
matchesBackwards.add((unknown: unknown[i - 1], known: known[j - 1]));
103+
i--;
104+
j--;
105+
} else if (table[i - 1][j] > table[i][j - 1]) {
106+
i--;
107+
} else {
108+
j--;
109+
}
110+
}
111+
return matchesBackwards.reversed;
112+
}

test/goldens/end2end/_dummy_pkg-1.0.0-null-safety.1.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@
3737
},
3838
"coverages": [
3939
63,
40-
836,
41-
838,
40+
596,
41+
609,
4242
1523
4343
]
4444
}
@@ -116,8 +116,8 @@
116116
},
117117
"coverages": [
118118
63,
119-
836,
120-
838,
119+
596,
120+
609,
121121
1523
122122
]
123123
}

test/goldens/end2end/async-2.11.0.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@
4747
},
4848
"coverages": [
4949
44,
50-
816,
51-
818,
50+
577,
51+
589,
5252
1503
5353
]
5454
}
@@ -154,8 +154,8 @@
154154
},
155155
"coverages": [
156156
44,
157-
816,
158-
818,
157+
577,
158+
589,
159159
1503
160160
]
161161
}

test/goldens/end2end/audio_service-0.18.17.json

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,6 @@
8787
},
8888
"coverages": [
8989
14,
90-
24,
91-
25,
92-
27,
93-
28,
94-
34,
95-
1011,
96-
1019,
97-
1020,
98-
1022,
99-
1023,
10090
1036
10191
]
10292
}
@@ -242,16 +232,6 @@
242232
},
243233
"coverages": [
244234
14,
245-
24,
246-
25,
247-
27,
248-
28,
249-
34,
250-
1011,
251-
1019,
252-
1020,
253-
1022,
254-
1023,
255235
1036
256236
]
257237
}

test/goldens/end2end/bulma_min-0.7.4.json

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,6 @@
3838
},
3939
"coverages": [
4040
24,
41-
34,
42-
35,
43-
37,
44-
38,
45-
44,
46-
1015,
47-
1029,
48-
1030,
49-
1032,
50-
1033,
5141
1046
5242
]
5343
}
@@ -132,16 +122,6 @@
132122
},
133123
"coverages": [
134124
24,
135-
34,
136-
35,
137-
37,
138-
38,
139-
44,
140-
1015,
141-
1029,
142-
1030,
143-
1032,
144-
1033,
145125
1046
146126
]
147127
}

test/goldens/end2end/dnd-2.0.1.json

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,6 @@
4343
},
4444
"coverages": [
4545
59,
46-
193,
47-
195,
48-
655,
49-
657,
5046
1097
5147
]
5248
}
@@ -140,10 +136,6 @@
140136
},
141137
"coverages": [
142138
59,
143-
193,
144-
195,
145-
655,
146-
657,
147139
1097
148140
]
149141
}

test/goldens/end2end/gg-1.0.12.json

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,6 @@
6969
},
7070
"coverages": [
7171
14,
72-
24,
73-
25,
74-
27,
75-
28,
76-
34,
77-
1020,
78-
1022,
79-
1023,
80-
1026,
81-
1027,
8272
1036
8373
]
8474
}
@@ -221,16 +211,6 @@
221211
},
222212
"coverages": [
223213
14,
224-
24,
225-
25,
226-
27,
227-
28,
228-
34,
229-
1020,
230-
1022,
231-
1023,
232-
1026,
233-
1027,
234214
1036
235215
]
236216
}

0 commit comments

Comments
 (0)