Update the trend scoring function (#8831)

szakarias · web-flow · commit 2dff8fe3ed63 · 2025-06-26T11:32:07.000+02:00
diff --git a/app/lib/service/download_counts/computations.dart b/app/lib/service/download_counts/computations.dart
@@ -31,6 +31,7 @@ Future<Map<String, double>> computeTrend() async {
         (await downloadCountsBackend.lookupDownloadCountData(name))
                 ?.totalCounts ??
             [0];
+
     res[name] = computeTrendScore(downloads);
   }
   return res;
diff --git a/app/lib/service/download_counts/package_trends.dart b/app/lib/service/download_counts/package_trends.dart
@@ -6,61 +6,36 @@ import 'dart:math';
 
 const analysisWindowDays = 30;
 const totalTrendWindowDays = 330;
-const minThirtyDaysDownloadThreshold = 30000;
 
-/// Calculates the relative daily growth rate of a package's downloads.
+/// The total download count over 30 days at which a package is considered
+/// moderately popular.
+const popularityMidpoint = 30000.0;
+
+/// Calculates the exponential growth rate of a package's downloads.
 ///
-/// Given a list with total daily downloads ([totalDownloads]), where the most
-/// recent day's data is at index 0, this function analyzes the downloads trend
-/// over the last ([analysisWindowDays]) days to determine how fast a package is
-/// growing relative to its own current download volume.
+/// Given a list with total daily downloads ([downloads]), where the most
+/// recent day's data is at index 0, this function performs a
+/// linear regression on the log-transformed download counts over the last
+/// [analysisWindowDays].
 ///
-/// A positive value indicates an upward trend in downloads, while a negative
-/// value indicates a downward trend. The magnitude represents the growth (or
-/// decline) rate normalized by the average daily downloads, allowing for
-/// comparison across packages of different popularity. For example, a slope of
-/// +10 downloads/day is more significant for a package with 100 average daily
-/// downloads (10% relative growth) than for a package with 10000 average daily
-/// downloads (0.1% relative growth).
-double computeRelativeGrowthRate(List<int> totalDownloads) {
-  if (totalDownloads.isEmpty) {
+/// The resulting slope represents the continuous daily growth rate. A positive
+/// slope indicates exponential growth, while a negative slope indicates
+/// exponential decline. For example, a slope of `0.1` corresponds to a growth
+/// of approximately 10.5% per day.
+double computeRelativeGrowthRate(List<int> downloads) {
+  if (downloads.length < 2) {
     return 0;
   }
-  final List<int> data;
-  if (totalDownloads.length < analysisWindowDays) {
-    data = [
-      ...totalDownloads,
-      ...List.filled(analysisWindowDays - totalDownloads.length, 0)
-    ];
-  } else {
-    data = totalDownloads;
-  }
-
-  final recentDownloads = data.sublist(0, analysisWindowDays);
-
-  final averageRecentDownloads =
-      recentDownloads.reduce((prev, element) => prev + element) /
-          recentDownloads.length;
-
-  final m = min(totalDownloads.length, totalTrendWindowDays);
-  final averageTotalDownloads =
-      totalDownloads.sublist(0, m).reduce((prev, element) => prev + element) /
-          m;
 
-  if (averageRecentDownloads == 0 || averageTotalDownloads == 0) {
-    return 0;
-  }
+  final analysisData = downloads.length > analysisWindowDays
+      ? downloads.sublist(0, analysisWindowDays)
+      : downloads;
 
   // We reverse the recentDownloads list for regression, since the first entry
   // is the newest point in time. By reversing, we pass the data in
   // chronological order.
-  final growthRate =
-      calculateLinearRegressionSlope(recentDownloads.reversed.toList());
-
-  // Normalize slope by average downloads to represent relative growth.
-  // This measures how much the download count is growing relative to its
-  // current volume.
-  return growthRate / averageTotalDownloads;
+  return calculateLinearRegressionSlope(
+      safeLogTransform(analysisData).reversed.toList());
 }
 
 /// Computes the slope of the best-fit line for a given list of data points
@@ -102,20 +77,67 @@ double calculateLinearRegressionSlope(List<num> yValues) {
   return (n * sumXY - sumX * sumY) / denominator;
 }
 
-/// Computes a trend score for a package, factoring in both its recent
-/// relative growth rate and its overall download volume.
+/// Computes a trend score for a package, factoring in both its recent relative
+/// growth rate and its overall download volume.
 ///
-/// This score is designed to balance how quickly a package is growing
-/// ([computeRelativeGrowthRate]) against its existing popularity. Popularity is
-/// assessed by comparing the sum of its downloads over the available history
-/// (up to [analysisWindowDays]) against a [minThirtyDaysDownloadThreshold].
+/// This function sanitizes the download history by trimming any trailing zeros
+/// in [totalDownloads], which represent the time before the package was
+/// published. This ensures the trend is only calculated over the package's
+/// active lifetime.
+///
+/// The final score is designed to balance how quickly a package is growing
+/// against its existing popularity. Popularity is assessed using a sigmoid
+/// function where the midpoint is defined by [popularityMidpoint].
 double computeTrendScore(List<int> totalDownloads) {
-  final n = min(analysisWindowDays, totalDownloads.length);
-  final thirtydaySum = totalDownloads.isEmpty
+  final lastNonZeroIndex = totalDownloads.lastIndexWhere((e) => e != 0);
+
+  // We trim trailing zeros to ensure an accurate calculation of the trend. The
+  // zeros represent the time before the package was published. Leaving them in
+  // would artificially flatten the calculated growth rate.
+  final downloads = lastNonZeroIndex >= 0
+      ? totalDownloads.sublist(0, lastNonZeroIndex + 1)
+      : <int>[];
+
+  final n = min(analysisWindowDays, downloads.length);
+  final thirtydaySum = downloads.isEmpty
       ? 0
-      : totalDownloads.sublist(0, n).reduce((prev, element) => prev + element);
-  final dampening = min(thirtydaySum / minThirtyDaysDownloadThreshold, 1.0);
-  final relativGrowth = computeRelativeGrowthRate(totalDownloads);
+      : downloads.sublist(0, n).reduce((prev, element) => prev + element);
+  final sigmoid = calculateSigmoidScaleScore(total30Downloads: thirtydaySum);
+
+  return computeRelativeGrowthRate(downloads) * sigmoid;
+}
+
+/// Transforms a list of numbers to their natural logarithm.
+///
+/// Non-positive numbers (<= 0) are treated as 1 before the logarithm is taken,
+/// resulting in a log value of 0.0.
+List<double> safeLogTransform(List<int> numbers) {
+  double myLog(int number) {
+    if (number <= 0) {
+      return 0.0;
+    }
+    return log(number);
+  }
+
+  return numbers.map(myLog).toList();
+}
 
-  return relativGrowth * dampening * dampening;
+/// Calculates a dampening score between 0.0 and 1.0 based on download volume.
+///
+/// This uses a sigmoid function to create a smooth "S"-shaped curve. Packages
+/// with very low download counts get a score near 0, while packages with high
+/// download counts get a score near 1.
+///
+/// The function takes the total number of downloads in the last 30 days
+/// ([total30Downloads]) and the parameter [midpoint] (defaults to
+/// [popularityMidpoint]) at which the score is exactly 0.5 and
+/// [steepness] controlling how quickly the score transitions from 0 to 1.
+/// Higher values create a steeper, more sudden transition.
+double calculateSigmoidScaleScore({
+  required int total30Downloads,
+  double midpoint = popularityMidpoint,
+  double steepness = 0.00015,
+}) {
+  final double exponent = -steepness * (total30Downloads - midpoint);
+  return 1 / (1 + exp(exponent));
 }
diff --git a/app/test/service/download_counts/package_trends_test.dart b/app/test/service/download_counts/package_trends_test.dart
@@ -2,8 +2,6 @@
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.
 
-import 'dart:math';
-
 import 'package:pub_dev/service/download_counts/package_trends.dart';
 import 'package:test/test.dart';
 
@@ -35,39 +33,33 @@ void main() {
 
     test('calculates positive relative growth rate for positive trend', () {
       // Input list (newest first):  [1645, 1635, ..., 1355] (30 values)
-      // Average = 1500 for the first 30 values. Slope: 10.
-      final downloads = <int>[
-        ...List<int>.generate(analysisWindowDays * 2, (i) => 1645 - (i * 10)),
-        ...List.filled(300, 0)
-      ];
-      final avg = downloads.reduce((prev, element) => prev + element) / 330;
-      final expectedRate = 10.0 / avg;
-      expect(computeRelativeGrowthRate(downloads), expectedRate);
+      final downloads =
+          List<int>.generate(analysisWindowDays, (i) => 1645 - (i * 10));
+      final expectedRate = 0.0066800225103267686;
+      expect(computeRelativeGrowthRate(downloads), closeTo(expectedRate, 1e-9));
     });
 
     test('calculates negative relative growth rate for negative trend', () {
       // Input list (newest first):  [1355, 1365, ..., 1645]
-      // Average = 1500. Slope: -10.
       final downloads =
           List<int>.generate(analysisWindowDays, (i) => 1355 + (i * 10));
-      final expectedRate = -10.0 / 1500.0;
-      expect(computeRelativeGrowthRate(downloads), expectedRate);
+      final expectedRate = -0.0066800225103267686;
+      expect(computeRelativeGrowthRate(downloads), closeTo(expectedRate, 1e-9));
     });
 
     test(
         'calculates positive relative growth for data barely meeting threshold',
         () {
       // Input list (newest first): [1016, 1015, ..., 987]
-      // Average: 1001.5. Slope: 1.
       final downloads =
           List<int>.generate(analysisWindowDays, (i) => 1016 - i * 1);
-      final expectedRate = 1.0 / 1001.5;
+      final expectedRate = 0.000998546932871653;
       expect(computeRelativeGrowthRate(downloads), closeTo(expectedRate, 1e-9));
     });
 
     test('should handle fluctuating data with a slight positive overall trend',
         () {
-      // Newest first. Average 1135.
+      // Newest first.
       final downloads = <int>[
         1300,
         1250,
@@ -100,91 +92,78 @@ void main() {
         1020,
         970
       ];
-      final expectedRate = 683250.0 / 67425.0 / 1135.0;
-      expect(computeRelativeGrowthRate(downloads), expectedRate);
+      final expectedRate = 0.008963997580330865;
+      expect(computeRelativeGrowthRate(downloads), closeTo(expectedRate, 1e-9));
     });
   });
   group('computeTrendScore', () {
-    test('Short history, very low sum, positive growth -> heavily dampened',
-        () {
+    test('Short history, very low sum, -> heavily dampened', () {
       final downloads = [100, 50];
-      // For relativeGrowth:
-      //   Padded data: [100, 50, 0...0] (28 zeros)
-      //   avg = (100 + 50) / 2 = 75.
-      //   growthRate = 63750 / 67425
-      final expectedDampening = min(1.0, 150 / 30000);
-      final expectedRelativeGrowth = (63750 / 67425) / 75;
-      final expectedScore =
-          expectedRelativeGrowth * expectedDampening * expectedDampening;
-      expect(computeTrendScore(downloads), expectedScore);
+      final totalSum = 150;
+
+      final expectedRelativeGrowth = 0.69315;
+      final expectedDampening =
+          calculateSigmoidScaleScore(total30Downloads: totalSum);
+      final expectedScore = expectedRelativeGrowth * expectedDampening;
+
+      expect(computeTrendScore(downloads), closeTo(expectedScore, 0.0001));
     });
-  });
 
-  test('Full history, sum meets threshold, positive growth -> no dampening',
-      () {
-    final downloads =
-        List<int>.generate(analysisWindowDays, (i) => 1645 - (i * 10));
-    // For relativeGrowth:
-    //   data: [1645, 1635, ..., 1355]
-    //   avg = 1500,
-    //   growthrate = 10
-    final expectedDampening = min(1.0, 45000 / 30000);
-    final expectedRelativeGrowth = 10 / 1500;
-    final expectedScore =
-        expectedRelativeGrowth * expectedDampening * expectedDampening;
-    expect(computeTrendScore(downloads), expectedScore);
-  });
+    test('Full history, positive growth -> almost no dampening', () {
+      final downloads = // [1645, 1635, ..., 1355]
+          List<int>.generate(analysisWindowDays, (i) => 1645 - (i * 10));
+      final totalSum = downloads.reduce((a, b) => a + b); // 45000
 
-  test('Negative growth, sum meets threshold -> no dampening', () {
-    final downloads =
-        List<int>.generate(analysisWindowDays, (i) => 1355 + (i * 10));
-    // For relativeGrowth:
-    //   data: [1645, 1635, ..., 1355]
-    //   avg = 1500,
-    //   growthrate = -10
-    final expectedDampening = min(1.0, 45000 / 30000);
-    final expectedRelativeGrowth = -10.0 / 1500;
-    final expectedScore =
-        expectedRelativeGrowth * expectedDampening * expectedDampening;
-    expect(computeTrendScore(downloads), expectedScore);
-  });
-  test('Full history, sum below threshold, positive growth -> dampened', () {
-    final downloads =
-        List<int>.generate(analysisWindowDays, (i) => 645 - (i * 10));
-    // For relativeGrowth:
-    //   data: [645,..., 345, 355]
-    //   avg = 500
-    //   growthrate = 10
-    final expectedDampening = min(1.0, 15000 / 30000);
-    final expectedRelativeGrowth = 10.0 / 500.0;
-    final expectedScore =
-        expectedRelativeGrowth * expectedDampening * expectedDampening;
-
-    expect(computeTrendScore(downloads), expectedScore);
-  });
+      final expectedRelativeGrowth = 0.006673;
+      final expectedDampening =
+          calculateSigmoidScaleScore(total30Downloads: totalSum);
+      final expectedScore = expectedRelativeGrowth * expectedDampening;
 
-  test('Empty totalDownloads list -> score 0', () {
-    final downloads = <int>[];
-    expect(computeTrendScore(downloads), 0);
-  });
+      expect(computeTrendScore(downloads), closeTo(expectedScore, 0.0001));
+    });
 
-  test('Full history, all zero downloads -> score 0', () {
-    final downloads = List<int>.filled(analysisWindowDays, 0);
-    expect(computeTrendScore(downloads), 0);
-  });
+    test('Full history, negative growth -> almost no dampening', () {
+      final downloads = // [1355, 1365, ..., 1645]
+          List<int>.generate(analysisWindowDays, (i) => 1355 + (i * 10));
+      final totalSum = downloads.reduce((a, b) => a + b); // 45000
+      final expectedRelativeGrowth = -0.006673;
+      final expectedDampening =
+          calculateSigmoidScaleScore(total30Downloads: totalSum);
+      final expectedScore = expectedRelativeGrowth * expectedDampening;
 
-  test('ThirtyDaySum just below threshold correctly, flat growth', () {
-    final downloads = List<int>.filled(analysisWindowDays, 999);
-    expect(computeTrendScore(downloads), 0);
-  });
+      expect(computeTrendScore(downloads), closeTo(expectedScore, 0.0001));
+    });
+
+    test('Full history, sum below threshold, positive growth -> dampened', () {
+      final downloads = // [645, ... , 355]
+          List<int>.generate(analysisWindowDays, (i) => 645 - (i * 10));
+      final totalSum = downloads.reduce((a, b) => a + b);
+      final expectedRelativeGrowth = 0.020373587410745377;
+      final expectedDampening =
+          calculateSigmoidScaleScore(total30Downloads: totalSum);
+      final expectedScore = expectedRelativeGrowth * expectedDampening;
+
+      expect(computeTrendScore(downloads), closeTo(expectedScore, 0.0001));
+    });
+
+    test('Empty totalDownloads list -> score 0', () {
+      final downloads = <int>[];
+      expect(computeTrendScore(downloads), 0);
+    });
 
-  test('Short history, high sum meets threshold -> no dampening', () {
-    final downloads = List<int>.filled(15, 2000);
-    final expectedDampening = min(1.0, 30000 / 30000);
-    final expectedRelativeGrowth = (6750000 / 67425) / 2000;
-    final expectedScore =
-        expectedRelativeGrowth * expectedDampening * expectedDampening;
+    test('Full history, all zero downloads -> score 0', () {
+      final downloads = List<int>.filled(analysisWindowDays, 0);
+      expect(computeTrendScore(downloads), 0);
+    });
 
-    expect(computeTrendScore(downloads), expectedScore);
+    test('Full history, sum just below threshold, flat growth', () {
+      final downloads = List<int>.filled(analysisWindowDays, 999);
+      expect(computeTrendScore(downloads), closeTo(0.0, 0.0001));
+    });
+
+    test('Short history, high sum meets threshold, flat growth', () {
+      final downloads = List<int>.filled(15, 2000);
+      expect(computeTrendScore(downloads), closeTo(0.0, 0.0001));
+    });
   });
 }

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@ Future<Map<String, double>> computeTrend() async {`
`31`	`31`	`(await downloadCountsBackend.lookupDownloadCountData(name))`
`32`	`32`	`?.totalCounts ??`
`33`	`33`	`[0];`
	`34`	`+`
`34`	`35`	`res[name] = computeTrendScore(downloads);`
`35`	`36`	`}`
`36`	`37`	`return res;`