Skip to content

Commit 2dff8fe

Browse files
authored
Update the trend scoring function (#8831)
1 parent 95b4920 commit 2dff8fe

File tree

3 files changed

+149
-147
lines changed

3 files changed

+149
-147
lines changed

app/lib/service/download_counts/computations.dart

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Future<Map<String, double>> computeTrend() async {
3131
(await downloadCountsBackend.lookupDownloadCountData(name))
3232
?.totalCounts ??
3333
[0];
34+
3435
res[name] = computeTrendScore(downloads);
3536
}
3637
return res;

app/lib/service/download_counts/package_trends.dart

Lines changed: 79 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,61 +6,36 @@ import 'dart:math';
66

77
const analysisWindowDays = 30;
88
const totalTrendWindowDays = 330;
9-
const minThirtyDaysDownloadThreshold = 30000;
109

11-
/// Calculates the relative daily growth rate of a package's downloads.
10+
/// The total download count over 30 days at which a package is considered
11+
/// moderately popular.
12+
const popularityMidpoint = 30000.0;
13+
14+
/// Calculates the exponential growth rate of a package's downloads.
1215
///
13-
/// Given a list with total daily downloads ([totalDownloads]), where the most
14-
/// recent day's data is at index 0, this function analyzes the downloads trend
15-
/// over the last ([analysisWindowDays]) days to determine how fast a package is
16-
/// growing relative to its own current download volume.
16+
/// Given a list with total daily downloads ([downloads]), where the most
17+
/// recent day's data is at index 0, this function performs a
18+
/// linear regression on the log-transformed download counts over the last
19+
/// [analysisWindowDays].
1720
///
18-
/// A positive value indicates an upward trend in downloads, while a negative
19-
/// value indicates a downward trend. The magnitude represents the growth (or
20-
/// decline) rate normalized by the average daily downloads, allowing for
21-
/// comparison across packages of different popularity. For example, a slope of
22-
/// +10 downloads/day is more significant for a package with 100 average daily
23-
/// downloads (10% relative growth) than for a package with 10000 average daily
24-
/// downloads (0.1% relative growth).
25-
double computeRelativeGrowthRate(List<int> totalDownloads) {
26-
if (totalDownloads.isEmpty) {
21+
/// The resulting slope represents the continuous daily growth rate. A positive
22+
/// slope indicates exponential growth, while a negative slope indicates
23+
/// exponential decline. For example, a slope of `0.1` corresponds to a growth
24+
/// of approximately 10.5% per day.
25+
double computeRelativeGrowthRate(List<int> downloads) {
26+
if (downloads.length < 2) {
2727
return 0;
2828
}
29-
final List<int> data;
30-
if (totalDownloads.length < analysisWindowDays) {
31-
data = [
32-
...totalDownloads,
33-
...List.filled(analysisWindowDays - totalDownloads.length, 0)
34-
];
35-
} else {
36-
data = totalDownloads;
37-
}
38-
39-
final recentDownloads = data.sublist(0, analysisWindowDays);
40-
41-
final averageRecentDownloads =
42-
recentDownloads.reduce((prev, element) => prev + element) /
43-
recentDownloads.length;
44-
45-
final m = min(totalDownloads.length, totalTrendWindowDays);
46-
final averageTotalDownloads =
47-
totalDownloads.sublist(0, m).reduce((prev, element) => prev + element) /
48-
m;
4929

50-
if (averageRecentDownloads == 0 || averageTotalDownloads == 0) {
51-
return 0;
52-
}
30+
final analysisData = downloads.length > analysisWindowDays
31+
? downloads.sublist(0, analysisWindowDays)
32+
: downloads;
5333

5434
// We reverse the recentDownloads list for regression, since the first entry
5535
// is the newest point in time. By reversing, we pass the data in
5636
// chronological order.
57-
final growthRate =
58-
calculateLinearRegressionSlope(recentDownloads.reversed.toList());
59-
60-
// Normalize slope by average downloads to represent relative growth.
61-
// This measures how much the download count is growing relative to its
62-
// current volume.
63-
return growthRate / averageTotalDownloads;
37+
return calculateLinearRegressionSlope(
38+
safeLogTransform(analysisData).reversed.toList());
6439
}
6540

6641
/// Computes the slope of the best-fit line for a given list of data points
@@ -102,20 +77,67 @@ double calculateLinearRegressionSlope(List<num> yValues) {
10277
return (n * sumXY - sumX * sumY) / denominator;
10378
}
10479

105-
/// Computes a trend score for a package, factoring in both its recent
106-
/// relative growth rate and its overall download volume.
80+
/// Computes a trend score for a package, factoring in both its recent relative
81+
/// growth rate and its overall download volume.
10782
///
108-
/// This score is designed to balance how quickly a package is growing
109-
/// ([computeRelativeGrowthRate]) against its existing popularity. Popularity is
110-
/// assessed by comparing the sum of its downloads over the available history
111-
/// (up to [analysisWindowDays]) against a [minThirtyDaysDownloadThreshold].
83+
/// This function sanitizes the download history by trimming any trailing zeros
84+
/// in [totalDownloads], which represent the time before the package was
85+
/// published. This ensures the trend is only calculated over the package's
86+
/// active lifetime.
87+
///
88+
/// The final score is designed to balance how quickly a package is growing
89+
/// against its existing popularity. Popularity is assessed using a sigmoid
90+
/// function where the midpoint is defined by [popularityMidpoint].
11291
double computeTrendScore(List<int> totalDownloads) {
113-
final n = min(analysisWindowDays, totalDownloads.length);
114-
final thirtydaySum = totalDownloads.isEmpty
92+
final lastNonZeroIndex = totalDownloads.lastIndexWhere((e) => e != 0);
93+
94+
// We trim trailing zeros to ensure an accurate calculation of the trend. The
95+
// zeros represent the time before the package was published. Leaving them in
96+
// would artificially flatten the calculated growth rate.
97+
final downloads = lastNonZeroIndex >= 0
98+
? totalDownloads.sublist(0, lastNonZeroIndex + 1)
99+
: <int>[];
100+
101+
final n = min(analysisWindowDays, downloads.length);
102+
final thirtydaySum = downloads.isEmpty
115103
? 0
116-
: totalDownloads.sublist(0, n).reduce((prev, element) => prev + element);
117-
final dampening = min(thirtydaySum / minThirtyDaysDownloadThreshold, 1.0);
118-
final relativGrowth = computeRelativeGrowthRate(totalDownloads);
104+
: downloads.sublist(0, n).reduce((prev, element) => prev + element);
105+
final sigmoid = calculateSigmoidScaleScore(total30Downloads: thirtydaySum);
106+
107+
return computeRelativeGrowthRate(downloads) * sigmoid;
108+
}
109+
110+
/// Transforms a list of numbers to their natural logarithm.
111+
///
112+
/// Non-positive numbers (<= 0) are treated as 1 before the logarithm is taken,
113+
/// resulting in a log value of 0.0.
114+
List<double> safeLogTransform(List<int> numbers) {
115+
double myLog(int number) {
116+
if (number <= 0) {
117+
return 0.0;
118+
}
119+
return log(number);
120+
}
121+
122+
return numbers.map(myLog).toList();
123+
}
119124

120-
return relativGrowth * dampening * dampening;
125+
/// Calculates a dampening score between 0.0 and 1.0 based on download volume.
126+
///
127+
/// This uses a sigmoid function to create a smooth "S"-shaped curve. Packages
128+
/// with very low download counts get a score near 0, while packages with high
129+
/// download counts get a score near 1.
130+
///
131+
/// The function takes the total number of downloads in the last 30 days
132+
/// ([total30Downloads]) and the parameter [midpoint] (defaults to
133+
/// [popularityMidpoint]) at which the score is exactly 0.5 and
134+
/// [steepness] controlling how quickly the score transitions from 0 to 1.
135+
/// Higher values create a steeper, more sudden transition.
136+
double calculateSigmoidScaleScore({
137+
required int total30Downloads,
138+
double midpoint = popularityMidpoint,
139+
double steepness = 0.00015,
140+
}) {
141+
final double exponent = -steepness * (total30Downloads - midpoint);
142+
return 1 / (1 + exp(exponent));
121143
}

app/test/service/download_counts/package_trends_test.dart

Lines changed: 69 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
// for details. All rights reserved. Use of this source code is governed by a
33
// BSD-style license that can be found in the LICENSE file.
44

5-
import 'dart:math';
6-
75
import 'package:pub_dev/service/download_counts/package_trends.dart';
86
import 'package:test/test.dart';
97

@@ -35,39 +33,33 @@ void main() {
3533

3634
test('calculates positive relative growth rate for positive trend', () {
3735
// Input list (newest first): [1645, 1635, ..., 1355] (30 values)
38-
// Average = 1500 for the first 30 values. Slope: 10.
39-
final downloads = <int>[
40-
...List<int>.generate(analysisWindowDays * 2, (i) => 1645 - (i * 10)),
41-
...List.filled(300, 0)
42-
];
43-
final avg = downloads.reduce((prev, element) => prev + element) / 330;
44-
final expectedRate = 10.0 / avg;
45-
expect(computeRelativeGrowthRate(downloads), expectedRate);
36+
final downloads =
37+
List<int>.generate(analysisWindowDays, (i) => 1645 - (i * 10));
38+
final expectedRate = 0.0066800225103267686;
39+
expect(computeRelativeGrowthRate(downloads), closeTo(expectedRate, 1e-9));
4640
});
4741

4842
test('calculates negative relative growth rate for negative trend', () {
4943
// Input list (newest first): [1355, 1365, ..., 1645]
50-
// Average = 1500. Slope: -10.
5144
final downloads =
5245
List<int>.generate(analysisWindowDays, (i) => 1355 + (i * 10));
53-
final expectedRate = -10.0 / 1500.0;
54-
expect(computeRelativeGrowthRate(downloads), expectedRate);
46+
final expectedRate = -0.0066800225103267686;
47+
expect(computeRelativeGrowthRate(downloads), closeTo(expectedRate, 1e-9));
5548
});
5649

5750
test(
5851
'calculates positive relative growth for data barely meeting threshold',
5952
() {
6053
// Input list (newest first): [1016, 1015, ..., 987]
61-
// Average: 1001.5. Slope: 1.
6254
final downloads =
6355
List<int>.generate(analysisWindowDays, (i) => 1016 - i * 1);
64-
final expectedRate = 1.0 / 1001.5;
56+
final expectedRate = 0.000998546932871653;
6557
expect(computeRelativeGrowthRate(downloads), closeTo(expectedRate, 1e-9));
6658
});
6759

6860
test('should handle fluctuating data with a slight positive overall trend',
6961
() {
70-
// Newest first. Average 1135.
62+
// Newest first.
7163
final downloads = <int>[
7264
1300,
7365
1250,
@@ -100,91 +92,78 @@ void main() {
10092
1020,
10193
970
10294
];
103-
final expectedRate = 683250.0 / 67425.0 / 1135.0;
104-
expect(computeRelativeGrowthRate(downloads), expectedRate);
95+
final expectedRate = 0.008963997580330865;
96+
expect(computeRelativeGrowthRate(downloads), closeTo(expectedRate, 1e-9));
10597
});
10698
});
10799
group('computeTrendScore', () {
108-
test('Short history, very low sum, positive growth -> heavily dampened',
109-
() {
100+
test('Short history, very low sum, -> heavily dampened', () {
110101
final downloads = [100, 50];
111-
// For relativeGrowth:
112-
// Padded data: [100, 50, 0...0] (28 zeros)
113-
// avg = (100 + 50) / 2 = 75.
114-
// growthRate = 63750 / 67425
115-
final expectedDampening = min(1.0, 150 / 30000);
116-
final expectedRelativeGrowth = (63750 / 67425) / 75;
117-
final expectedScore =
118-
expectedRelativeGrowth * expectedDampening * expectedDampening;
119-
expect(computeTrendScore(downloads), expectedScore);
102+
final totalSum = 150;
103+
104+
final expectedRelativeGrowth = 0.69315;
105+
final expectedDampening =
106+
calculateSigmoidScaleScore(total30Downloads: totalSum);
107+
final expectedScore = expectedRelativeGrowth * expectedDampening;
108+
109+
expect(computeTrendScore(downloads), closeTo(expectedScore, 0.0001));
120110
});
121-
});
122111

123-
test('Full history, sum meets threshold, positive growth -> no dampening',
124-
() {
125-
final downloads =
126-
List<int>.generate(analysisWindowDays, (i) => 1645 - (i * 10));
127-
// For relativeGrowth:
128-
// data: [1645, 1635, ..., 1355]
129-
// avg = 1500,
130-
// growthrate = 10
131-
final expectedDampening = min(1.0, 45000 / 30000);
132-
final expectedRelativeGrowth = 10 / 1500;
133-
final expectedScore =
134-
expectedRelativeGrowth * expectedDampening * expectedDampening;
135-
expect(computeTrendScore(downloads), expectedScore);
136-
});
112+
test('Full history, positive growth -> almost no dampening', () {
113+
final downloads = // [1645, 1635, ..., 1355]
114+
List<int>.generate(analysisWindowDays, (i) => 1645 - (i * 10));
115+
final totalSum = downloads.reduce((a, b) => a + b); // 45000
137116

138-
test('Negative growth, sum meets threshold -> no dampening', () {
139-
final downloads =
140-
List<int>.generate(analysisWindowDays, (i) => 1355 + (i * 10));
141-
// For relativeGrowth:
142-
// data: [1645, 1635, ..., 1355]
143-
// avg = 1500,
144-
// growthrate = -10
145-
final expectedDampening = min(1.0, 45000 / 30000);
146-
final expectedRelativeGrowth = -10.0 / 1500;
147-
final expectedScore =
148-
expectedRelativeGrowth * expectedDampening * expectedDampening;
149-
expect(computeTrendScore(downloads), expectedScore);
150-
});
151-
test('Full history, sum below threshold, positive growth -> dampened', () {
152-
final downloads =
153-
List<int>.generate(analysisWindowDays, (i) => 645 - (i * 10));
154-
// For relativeGrowth:
155-
// data: [645,..., 345, 355]
156-
// avg = 500
157-
// growthrate = 10
158-
final expectedDampening = min(1.0, 15000 / 30000);
159-
final expectedRelativeGrowth = 10.0 / 500.0;
160-
final expectedScore =
161-
expectedRelativeGrowth * expectedDampening * expectedDampening;
162-
163-
expect(computeTrendScore(downloads), expectedScore);
164-
});
117+
final expectedRelativeGrowth = 0.006673;
118+
final expectedDampening =
119+
calculateSigmoidScaleScore(total30Downloads: totalSum);
120+
final expectedScore = expectedRelativeGrowth * expectedDampening;
165121

166-
test('Empty totalDownloads list -> score 0', () {
167-
final downloads = <int>[];
168-
expect(computeTrendScore(downloads), 0);
169-
});
122+
expect(computeTrendScore(downloads), closeTo(expectedScore, 0.0001));
123+
});
170124

171-
test('Full history, all zero downloads -> score 0', () {
172-
final downloads = List<int>.filled(analysisWindowDays, 0);
173-
expect(computeTrendScore(downloads), 0);
174-
});
125+
test('Full history, negative growth -> almost no dampening', () {
126+
final downloads = // [1355, 1365, ..., 1645]
127+
List<int>.generate(analysisWindowDays, (i) => 1355 + (i * 10));
128+
final totalSum = downloads.reduce((a, b) => a + b); // 45000
129+
final expectedRelativeGrowth = -0.006673;
130+
final expectedDampening =
131+
calculateSigmoidScaleScore(total30Downloads: totalSum);
132+
final expectedScore = expectedRelativeGrowth * expectedDampening;
175133

176-
test('ThirtyDaySum just below threshold correctly, flat growth', () {
177-
final downloads = List<int>.filled(analysisWindowDays, 999);
178-
expect(computeTrendScore(downloads), 0);
179-
});
134+
expect(computeTrendScore(downloads), closeTo(expectedScore, 0.0001));
135+
});
136+
137+
test('Full history, sum below threshold, positive growth -> dampened', () {
138+
final downloads = // [645, ... , 355]
139+
List<int>.generate(analysisWindowDays, (i) => 645 - (i * 10));
140+
final totalSum = downloads.reduce((a, b) => a + b);
141+
final expectedRelativeGrowth = 0.020373587410745377;
142+
final expectedDampening =
143+
calculateSigmoidScaleScore(total30Downloads: totalSum);
144+
final expectedScore = expectedRelativeGrowth * expectedDampening;
145+
146+
expect(computeTrendScore(downloads), closeTo(expectedScore, 0.0001));
147+
});
148+
149+
test('Empty totalDownloads list -> score 0', () {
150+
final downloads = <int>[];
151+
expect(computeTrendScore(downloads), 0);
152+
});
180153

181-
test('Short history, high sum meets threshold -> no dampening', () {
182-
final downloads = List<int>.filled(15, 2000);
183-
final expectedDampening = min(1.0, 30000 / 30000);
184-
final expectedRelativeGrowth = (6750000 / 67425) / 2000;
185-
final expectedScore =
186-
expectedRelativeGrowth * expectedDampening * expectedDampening;
154+
test('Full history, all zero downloads -> score 0', () {
155+
final downloads = List<int>.filled(analysisWindowDays, 0);
156+
expect(computeTrendScore(downloads), 0);
157+
});
187158

188-
expect(computeTrendScore(downloads), expectedScore);
159+
test('Full history, sum just below threshold, flat growth', () {
160+
final downloads = List<int>.filled(analysisWindowDays, 999);
161+
expect(computeTrendScore(downloads), closeTo(0.0, 0.0001));
162+
});
163+
164+
test('Short history, high sum meets threshold, flat growth', () {
165+
final downloads = List<int>.filled(15, 2000);
166+
expect(computeTrendScore(downloads), closeTo(0.0, 0.0001));
167+
});
189168
});
190169
}

0 commit comments

Comments
 (0)