From 62b8606a93d72f3df44977257f31cac9d612a0be Mon Sep 17 00:00:00 2001 From: Sarah Zakarias Date: Wed, 7 May 2025 10:24:10 +0000 Subject: [PATCH] Add function and task for computing and uploading package trends --- .../service/download_counts/computations.dart | 30 +++++++ .../download_counts/package_trends.dart | 21 +++++ app/lib/tool/neat_task/pub_dev_tasks.dart | 6 ++ .../download_counts/computations_test.dart | 63 +++++++++++++- ...fake_download_counts_data_for_trend1.jsonl | 2 + ...fake_download_counts_data_for_trend2.jsonl | 2 + .../download_counts/package_trends_test.dart | 85 +++++++++++++++++++ 7 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 app/test/service/download_counts/fake_download_counts_data_for_trend1.jsonl create mode 100644 app/test/service/download_counts/fake_download_counts_data_for_trend2.jsonl diff --git a/app/lib/service/download_counts/computations.dart b/app/lib/service/download_counts/computations.dart index c8504b1279..1173738423 100644 --- a/app/lib/service/download_counts/computations.dart +++ b/app/lib/service/download_counts/computations.dart @@ -6,15 +6,45 @@ import 'dart:math'; import 'package:_pub_shared/data/download_counts_data.dart'; import 'package:gcloud/storage.dart'; +import 'package:pub_dev/package/backend.dart'; import 'package:pub_dev/service/download_counts/backend.dart'; import 'package:pub_dev/service/download_counts/download_counts.dart'; import 'package:pub_dev/service/download_counts/models.dart'; +import 'package:pub_dev/service/download_counts/package_trends.dart'; import 'package:pub_dev/shared/configuration.dart'; import 'package:pub_dev/shared/storage.dart'; import 'package:pub_dev/shared/utils.dart'; import '../../shared/redis_cache.dart' show cache; +Future computeTrendScoreTask() async { + final trendScores = await computeTrend(); + await uploadTrendScores(trendScores); +} + +Future> computeTrend() async { + final res = {}; + + await for (final pkg in packageBackend.allPackages()) { + final name = pkg.name!; + final downloads = + (await downloadCountsBackend.lookupDownloadCountData(name)) + ?.totalCounts ?? + [0]; + res[name] = computeTrendScore(downloads); + } + return res; +} + +final trendScoreFileName = 'trend-scores.json'; + +Future uploadTrendScores(Map trends) async { + final reportsBucket = + storageService.bucket(activeConfiguration.reportsBucketName!); + await uploadBytesWithRetry( + reportsBucket, trendScoreFileName, jsonUtf8Encoder.convert(trends)); +} + Future compute30DaysTotalTask() async { final allDownloadCounts = await downloadCountsBackend.listAllDownloadCounts(); final totals = await compute30DayTotals(allDownloadCounts); diff --git a/app/lib/service/download_counts/package_trends.dart b/app/lib/service/download_counts/package_trends.dart index 2a34af3691..690306536c 100644 --- a/app/lib/service/download_counts/package_trends.dart +++ b/app/lib/service/download_counts/package_trends.dart @@ -2,7 +2,10 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. +import 'dart:math'; + const analysisWindowDays = 30; +const minThirtyDaysDownloadThreshold = 30000; /// Calculates the relative daily growth rate of a package's downloads. /// @@ -89,3 +92,21 @@ double calculateLinearRegressionSlope(List yValues) { } return (n * sumXY - sumX * sumY) / denominator; } + +/// Computes a trend score for a package, factoring in both its recent +/// relative growth rate and its overall download volume. +/// +/// This score is designed to balance how quickly a package is growing +/// ([computeRelativeGrowthRate]) against its existing popularity. Popularity is +/// assessed by comparing the sum of its downloads over the available history +/// (up to [analysisWindowDays]) against a [minThirtyDaysDownloadThreshold]. +double computeTrendScore(List totalDownloads) { + final n = min(analysisWindowDays, totalDownloads.length); + final thirtydaySum = totalDownloads.isEmpty + ? 0 + : totalDownloads.sublist(0, n).reduce((prev, element) => prev + element); + final dampening = min(thirtydaySum / minThirtyDaysDownloadThreshold, 1.0); + final relativGrowth = computeRelativeGrowthRate(totalDownloads); + + return relativGrowth * dampening * dampening; +} diff --git a/app/lib/tool/neat_task/pub_dev_tasks.dart b/app/lib/tool/neat_task/pub_dev_tasks.dart index f566118100..48c1f50d32 100644 --- a/app/lib/tool/neat_task/pub_dev_tasks.dart +++ b/app/lib/tool/neat_task/pub_dev_tasks.dart @@ -211,6 +211,12 @@ List createPeriodicTaskSchedulers({ task: compute30DaysTotalTask, ), + _daily( + name: 'compute-trend-scores', + isRuntimeVersioned: false, + task: computeTrendScoreTask, + ), + _daily( name: 'count-topics', isRuntimeVersioned: false, diff --git a/app/test/service/download_counts/computations_test.dart b/app/test/service/download_counts/computations_test.dart index a45cbf62e5..a4c2ab2d36 100644 --- a/app/test/service/download_counts/computations_test.dart +++ b/app/test/service/download_counts/computations_test.dart @@ -2,19 +2,23 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. import 'dart:convert'; +import 'dart:io'; import 'package:basics/basics.dart'; import 'package:gcloud/storage.dart'; +import 'package:path/path.dart' as path; import 'package:pub_dev/fake/backend/fake_download_counts.dart'; import 'package:pub_dev/service/download_counts/backend.dart'; import 'package:pub_dev/service/download_counts/computations.dart'; +import 'package:pub_dev/service/download_counts/package_trends.dart'; +import 'package:pub_dev/service/download_counts/sync_download_counts.dart'; import 'package:pub_dev/shared/configuration.dart'; import 'package:test/test.dart'; import '../../shared/test_services.dart'; void main() { - group('', () { + group('30 days download counts', () { testWithProfile('compute download counts 30-days totals', fn: () async { final pkg = 'foo'; final versionsCounts = { @@ -119,7 +123,9 @@ void main() { expect(downloadCountsBackend.lookup30DaysTotalCounts('baz'), 150); expect(downloadCountsBackend.lookup30DaysTotalCounts('bax'), isNull); }); + }); + group('weekly download counts', () { testWithProfile('compute weekly', fn: () async { final pkg = 'foo'; final date = DateTime.parse('1986-02-16'); @@ -276,4 +282,59 @@ void main() { } }); }); + group('trends', () { + testWithProfile('compute trend', fn: () async { + String date(int i) => i < 10 ? '2024-01-0$i' : '2024-01-$i'; + + for (int i = 1; i < 16; i++) { + final d = DateTime.parse(date(i)); + final downloadCountsJsonFileName = + 'daily_download_counts/${date(i)}T00:00:00Z/data-000000000000.jsonl'; + await uploadFakeDownloadCountsToBucket( + downloadCountsJsonFileName, + path.join( + Directory.current.path, + 'test', + 'service', + 'download_counts', + 'fake_download_counts_data_for_trend1.jsonl')); + await processDownloadCounts(d); + } + for (int i = 16; i < 31; i++) { + final d = DateTime.parse(date(i)); + final downloadCountsJsonFileName = + 'daily_download_counts/${date(i)}T00:00:00Z/data-000000000000.jsonl'; + await uploadFakeDownloadCountsToBucket( + downloadCountsJsonFileName, + path.join( + Directory.current.path, + 'test', + 'service', + 'download_counts', + 'fake_download_counts_data_for_trend2.jsonl')); + await processDownloadCounts(d); + } + final neonTrend = computeTrendScore( + [...List.filled(15, 2000), ...List.filled(15, 1000)]); + final oxygenTrend = computeTrendScore( + [...List.filled(15, 5000), ...List.filled(15, 3000)]); + + expect(await computeTrend(), + {'flutter_titanium': 0.0, 'neon': neonTrend, 'oxygen': oxygenTrend}); + }); + + testWithProfile('succesful trends upload', fn: () async { + final trends = {'foo': 1.0, 'bar': 3.0, 'baz': 2.0}; + await uploadTrendScores(trends); + + final data = await storageService + .bucket(activeConfiguration.reportsBucketName!) + .read(trendScoreFileName) + .transform(utf8.decoder) + .transform(json.decoder) + .single; + + expect(data, trends); + }); + }); } diff --git a/app/test/service/download_counts/fake_download_counts_data_for_trend1.jsonl b/app/test/service/download_counts/fake_download_counts_data_for_trend1.jsonl new file mode 100644 index 0000000000..197ecdd7c5 --- /dev/null +++ b/app/test/service/download_counts/fake_download_counts_data_for_trend1.jsonl @@ -0,0 +1,2 @@ +{"package":"oxygen","total":"3000","per_version":[{"version":"1.0.0","count":"1000"},{"version":"1.2.0","count":"1000"},{"version":"2.0.0-dev","count":"1000"}]} +{"package":"neon","total":"1000","per_version":[{"version":"1.0.0","count":"1000"}]} diff --git a/app/test/service/download_counts/fake_download_counts_data_for_trend2.jsonl b/app/test/service/download_counts/fake_download_counts_data_for_trend2.jsonl new file mode 100644 index 0000000000..d4c17f7dec --- /dev/null +++ b/app/test/service/download_counts/fake_download_counts_data_for_trend2.jsonl @@ -0,0 +1,2 @@ +{"package":"oxygen","total":"5000","per_version":[{"version":"1.0.0","count":"3000"},{"version":"1.2.0","count":"1000"},{"version":"2.0.0-dev","count":"1000"}]} +{"package":"neon","total":"2000","per_version":[{"version":"1.0.0","count":"2000"}]} diff --git a/app/test/service/download_counts/package_trends_test.dart b/app/test/service/download_counts/package_trends_test.dart index 461a520a27..48f4d1a82c 100644 --- a/app/test/service/download_counts/package_trends_test.dart +++ b/app/test/service/download_counts/package_trends_test.dart @@ -2,6 +2,8 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. +import 'dart:math'; + import 'package:pub_dev/service/download_counts/package_trends.dart'; import 'package:test/test.dart'; @@ -99,4 +101,87 @@ void main() { expect(computeRelativeGrowthRate(downloads), expectedRate); }); }); + group('computeTrendScore', () { + test('Short history, very low sum, positive growth -> heavily dampened', + () { + final downloads = [100, 50]; + // For relativeGrowth: + // Padded data: [100, 50, 0...0] (28 zeros) + // avg = 150/30 = 5 + // growthRate = 63750 / 67425 + final expectedDampening = min(1.0, 150 / 30000); + final expectedRelativeGrowth = 63750 / 67425 / 5; + final expectedScore = + expectedRelativeGrowth * expectedDampening * expectedDampening; + expect(computeTrendScore(downloads), expectedScore); + }); + }); + + test('Full history, sum meets threshold, positive growth -> no dampening', + () { + final downloads = + List.generate(analysisWindowDays, (i) => 1645 - (i * 10)); + // For relativeGrowth: + // data: [1645, 1635, ..., 1355] + // avg = 1500, + // growthrate = 10 + final expectedDampening = min(1.0, 45000 / 30000); + final expectedRelativeGrowth = 10 / 1500; + final expectedScore = + expectedRelativeGrowth * expectedDampening * expectedDampening; + expect(computeTrendScore(downloads), expectedScore); + }); + + test('Negative growth, sum meets threshold -> no dampening', () { + final downloads = + List.generate(analysisWindowDays, (i) => 1355 + (i * 10)); + // For relativeGrowth: + // data: [1645, 1635, ..., 1355] + // avg = 1500, + // growthrate = -10 + final expectedDampening = min(1.0, 45000 / 30000); + final expectedRelativeGrowth = -10.0 / 1500; + final expectedScore = + expectedRelativeGrowth * expectedDampening * expectedDampening; + expect(computeTrendScore(downloads), expectedScore); + }); + test('Full history, sum below threshold, positive growth -> dampened', () { + final downloads = + List.generate(analysisWindowDays, (i) => 645 - (i * 10)); + // For relativeGrowth: + // data: [645,..., 345, 355] + // avg = 500 + // growthrate = 10 + final expectedDampening = min(1.0, 15000 / 30000); + final expectedRelativeGrowth = 10.0 / 500.0; + final expectedScore = + expectedRelativeGrowth * expectedDampening * expectedDampening; + + expect(computeTrendScore(downloads), expectedScore); + }); + + test('Empty totalDownloads list -> score 0', () { + final downloads = []; + expect(computeTrendScore(downloads), 0); + }); + + test('Full history, all zero downloads -> score 0', () { + final downloads = List.filled(analysisWindowDays, 0); + expect(computeTrendScore(downloads), 0); + }); + + test('ThirtyDaySum just below threshold correctly, flat growth', () { + final downloads = List.filled(analysisWindowDays, 999); + expect(computeTrendScore(downloads), 0); + }); + + test('Short history, high sum meets threshold -> no dampening', () { + final downloads = List.filled(15, 2000); + final expectedDampening = min(1.0, 30000 / 30000); + final expectedRelativeGrowth = 6750000 / 67425 / 1000; + final expectedScore = + expectedRelativeGrowth * expectedDampening * expectedDampening; + + expect(computeTrendScore(downloads), expectedScore); + }); }