Skip to content

Commit e3c40af

Browse files
authored
feat(metrics): Store all UMA metrics regardless of feature mapping (#1742)
* feat(metrics): Store all UMA metrics regardless of feature mapping Previously, the UMA metric consumer would only store histogram metrics that could be successfully mapped to an existing web feature ID. This meant that metrics for new or unmapped features were discarded. This change modifies the data ingestion logic to ensure all metrics are saved: 1. All histogram metrics for a given day are now unconditionally saved to the `DailyChromiumHistogramMetrics` table. 2. A separate step then attempts to associate these metrics with known web features in the `FeatureChromiumHistogramMetrics` table. 3. The `LatestFeatureChromiumHistogramMetrics` table is now updated using a synchronization operation, ensuring it accurately reflects the latest data and handles removals correctly. This separation makes the data ingestion more robust, preventing data loss and allowing feature mappings to be corrected in the future without losing historical metric data. * manual changes
1 parent 5f24f87 commit e3c40af

File tree

8 files changed

+334
-271
lines changed

8 files changed

+334
-271
lines changed

lib/gcpspanner/client.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,6 @@ var (
5959
// ErrUsageMetricUpsertNoHistogramFound indicates that the chromium histogram metric
6060
// was not found when attempting to upsert a usage metric.
6161
ErrUsageMetricUpsertNoHistogramFound = errors.New("histogram not found when upserting usage metric")
62-
63-
// ErrUsageMetricUpsertNoHistogramEnumFound indicates that the chromium histogram enum
64-
// was not found when attempting to upsert a usage metric. This typically occurs when
65-
// the histogram name associated with the metric is not found, possibly due to
66-
// a draft or obsolete feature for which the corresponding enum ID has not been created.
67-
ErrUsageMetricUpsertNoHistogramEnumFound = errors.New("histogram enum not found when upserting usage metric")
6862
)
6963

7064
// entitySynchronizer specific errors.

lib/gcpspanner/daily_chromium_histogram_metrics.go

Lines changed: 122 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,10 @@ import (
1919
"errors"
2020
"log/slog"
2121
"math/big"
22-
"time"
2322

2423
"cloud.google.com/go/civil"
2524
"cloud.google.com/go/spanner"
2625
"github.com/GoogleChrome/webstatus.dev/lib/metricdatatypes"
27-
"google.golang.org/api/iterator"
2826
)
2927

3028
const dailyChromiumHistogramMetricsTable = "DailyChromiumHistogramMetrics"
@@ -46,179 +44,162 @@ type spannerDailyChromiumHistogramMetric struct {
4644
ChromiumHistogramEnumValueID string `spanner:"ChromiumHistogramEnumValueID"`
4745
}
4846

49-
// getLatestDailyChromiumMetricDate retrieves the Date of the latest usage stats for the given feature.
50-
func getLatestDailyChromiumMetricDate(
47+
// StoreDailyChromiumHistogramMetrics stores a slice of daily chromium histogram metrics in a bulk insert.
48+
func (c *Client) StoreDailyChromiumHistogramMetrics(
5149
ctx context.Context,
52-
txn *spanner.ReadWriteTransaction,
53-
chromiumHistogramEnumValueID string) (*civil.Date, error) {
54-
stmt := spanner.NewStatement(`
55-
SELECT
56-
dchm.Day as Date
57-
FROM LatestDailyChromiumHistogramMetrics l
58-
JOIN DailyChromiumHistogramMetrics dchm
59-
ON l.ChromiumHistogramEnumValueID = dchm.ChromiumHistogramEnumValueID
60-
WHERE l.ChromiumHistogramEnumValueID = @chromiumHistogramEnumValueID`)
61-
62-
stmt.Params = map[string]interface{}{
63-
"chromiumHistogramEnumValueID": chromiumHistogramEnumValueID,
64-
}
50+
histogramName metricdatatypes.HistogramName,
51+
metrics map[int64]DailyChromiumHistogramMetric) error {
52+
chromiumHistogramEnumID, err := c.GetIDFromChromiumHistogramKey(ctx, string(histogramName))
53+
if err != nil {
54+
slog.ErrorContext(ctx, "unable to find histogram key id from histogram name", "name", string(histogramName))
6555

66-
iter := txn.Query(ctx, stmt)
67-
defer iter.Stop()
56+
return errors.Join(err, ErrUsageMetricUpsertNoHistogramFound)
57+
}
6858

69-
row, err := iter.Next()
70-
if err != nil {
71-
if errors.Is(err, iterator.Done) {
72-
// No row found, return zero time
73-
zeroTime := time.Time{}
74-
zeroDate := civil.DateOf(zeroTime)
59+
producerFn := func(metricChan chan<- spannerDailyChromiumHistogramMetric) {
60+
for bucketID, metric := range metrics {
61+
chromiumHistogramEnumValueID, err := c.GetIDFromChromiumHistogramEnumValueKey(
62+
ctx, *chromiumHistogramEnumID, bucketID)
63+
if err != nil {
64+
slog.WarnContext(ctx, "unable to find histogram value id. likely a draft or obsolete feature. will skip",
65+
"id", *chromiumHistogramEnumID,
66+
"bucketID", bucketID)
7567

76-
return &zeroDate, errors.Join(ErrQueryReturnedNoResults, err)
68+
continue
69+
}
70+
metricChan <- spannerDailyChromiumHistogramMetric{
71+
DailyChromiumHistogramMetric: metric,
72+
ChromiumHistogramEnumValueID: *chromiumHistogramEnumValueID,
73+
}
7774
}
78-
slog.ErrorContext(ctx, "error querying for latest run time", "error", err)
75+
}
7976

80-
return nil, err
77+
toMutationFn := func(m spannerDailyChromiumHistogramMetric) (*spanner.Mutation, error) {
78+
return spanner.InsertOrUpdateStruct(dailyChromiumHistogramMetricsTable, m)
8179
}
8280

83-
var date civil.Date
84-
if err := row.Columns(&date); err != nil {
85-
slog.ErrorContext(ctx, "error extracting date", "error", err)
81+
return runConcurrentBatch(ctx, c, producerFn, dailyChromiumHistogramMetricsTable, toMutationFn)
82+
}
8683

87-
return nil, err
88-
}
84+
// Implements the syncableEntityMapper interface for WebFeature and SpannerLatestDailyChromiumHistogramMetric.
85+
type latestDailyChromiumHistogramMetricMapper struct{}
8986

90-
return &date, nil
87+
// Key for the latestDailyChromiumHistogramMetricMapper.
88+
type latestDailyChromiumHistogramMetricKey struct {
89+
WebFeatureID string
90+
ChromiumHistogramEnumValueID string
9191
}
9292

93-
func getWebFeatureIDByChromiumHistogramEnumValueID(
94-
ctx context.Context,
95-
txn *spanner.ReadWriteTransaction,
96-
chromiumHistogramEnumValueID string) (*string, error) {
97-
stmt := spanner.NewStatement(`
98-
SELECT
99-
WebFeatureID
100-
FROM WebFeatureChromiumHistogramEnumValues
101-
WHERE chromiumHistogramEnumValueID = @chromiumHistogramEnumValueID`)
93+
// Table returns the name of the Spanner table.
94+
func (m latestDailyChromiumHistogramMetricMapper) Table() string {
95+
return LatestDailyChromiumHistogramMetricsTable
96+
}
10297

103-
stmt.Params = map[string]interface{}{
104-
"chromiumHistogramEnumValueID": chromiumHistogramEnumValueID,
105-
}
98+
// SelectAll returns a statement to select all LatestDailyChromiumHistogramMetrics.
99+
func (m latestDailyChromiumHistogramMetricMapper) SelectAll() spanner.Statement {
100+
return spanner.NewStatement(`
101+
SELECT
102+
WebFeatureID, ChromiumHistogramEnumValueID, Day
103+
FROM LatestDailyChromiumHistogramMetrics`)
104+
}
106105

107-
iter := txn.Query(ctx, stmt)
108-
defer iter.Stop()
106+
// GetKeyFromExternal returns the business key from an external struct.
107+
func (m latestDailyChromiumHistogramMetricMapper) GetKeyFromExternal(
108+
in SpannerLatestDailyChromiumHistogramMetric) latestDailyChromiumHistogramMetricKey {
109+
return latestDailyChromiumHistogramMetricKey{in.WebFeatureID, in.ChromiumHistogramEnumValueID}
110+
}
109111

110-
row, err := iter.Next()
111-
if err != nil {
112-
if errors.Is(err, iterator.Done) {
113-
return nil, errors.Join(ErrQueryReturnedNoResults, err)
114-
}
115-
slog.ErrorContext(ctx, "error querying for web feature ID", "error", err)
112+
// GetKeyFromInternal returns the business key from an internal Spanner struct.
113+
func (m latestDailyChromiumHistogramMetricMapper) GetKeyFromInternal(
114+
in SpannerLatestDailyChromiumHistogramMetric) latestDailyChromiumHistogramMetricKey {
115+
return latestDailyChromiumHistogramMetricKey{in.WebFeatureID, in.ChromiumHistogramEnumValueID}
116+
}
116117

117-
return nil, err
118+
// MergeAndCheckChanged will merge the entity and return if the entity has changed.
119+
func (m latestDailyChromiumHistogramMetricMapper) MergeAndCheckChanged(
120+
in SpannerLatestDailyChromiumHistogramMetric,
121+
existing SpannerLatestDailyChromiumHistogramMetric,
122+
) (SpannerLatestDailyChromiumHistogramMetric, bool) {
123+
merged := SpannerLatestDailyChromiumHistogramMetric{
124+
WebFeatureID: existing.WebFeatureID,
125+
ChromiumHistogramEnumValueID: existing.ChromiumHistogramEnumValueID,
126+
Day: in.Day,
118127
}
119-
var featureID string
120-
if err := row.Columns(&featureID); err != nil {
121-
slog.ErrorContext(ctx, "error extracting date", "error", err)
122128

123-
return nil, err
124-
}
129+
hasChanged := merged.Day != existing.Day
125130

126-
return &featureID, nil
131+
return merged, hasChanged
127132
}
128133

129-
// shouldUpsertLatestDailyChromiumUsageMetric determines whether the latest metric should be upserted based on
130-
// date comparison.
131-
func shouldUpsertLatestDailyChromiumUsageMetric(existingDate *civil.Date, newDate civil.Date) bool {
132-
return existingDate == nil || existingDate.IsZero() || newDate.After(*existingDate)
134+
// GetChildDeleteKeyMutations is a no-op for this table.
135+
func (m latestDailyChromiumHistogramMetricMapper) GetChildDeleteKeyMutations(
136+
_ context.Context,
137+
_ *Client,
138+
_ []SpannerLatestDailyChromiumHistogramMetric,
139+
) ([]ChildDeleteKeyMutations, error) {
140+
return nil, nil
133141
}
134142

135-
// UpsertDailyChromiumHistogramMetric upserts a daily chromium histogram metric.
136-
//
137-
// Errors:
138-
// - ErrQueryReturnedNoResults: If the histogram key or value ID is not found.
139-
// - ErrInternalQueryFailure: If any internal query fails during the process.
140-
// - ErrUsageMetricUpsertNoFeatureIDFound: If no feature ID is found while
141-
// attempting to upsert the latest daily chromium usage metric.
142-
// - ErrUsageMetricUpsertNoHistogramFound: If the histogram is not found
143-
// - ErrUsageMetricUpsertNoHistogramEnumFound: If a particular enum in the histogram is not found.
144-
func (c *Client) UpsertDailyChromiumHistogramMetric(
145-
ctx context.Context,
146-
histogramName metricdatatypes.HistogramName,
147-
bucketID int64,
148-
metric DailyChromiumHistogramMetric) error {
149-
// TODO: When we have a generic way to do batch upserts, change this to accept an array of metrics.
150-
chromiumHistogramEnumID, err := c.GetIDFromChromiumHistogramKey(ctx, string(histogramName))
151-
if err != nil {
152-
slog.ErrorContext(ctx, "unable to find histogram key id from histogram name", "name", string(histogramName))
153-
154-
return errors.Join(err, ErrUsageMetricUpsertNoHistogramFound)
155-
}
156-
chromiumHistogramEnumValueID, err := c.GetIDFromChromiumHistogramEnumValueKey(
157-
ctx, *chromiumHistogramEnumID, bucketID)
158-
if err != nil {
159-
if errors.Is(err, ErrQueryReturnedNoResults) {
160-
slog.WarnContext(ctx, "unable to find histogram value id. likely a draft or obsolete feature. will skip",
161-
"id", *chromiumHistogramEnumID,
162-
"bucketID", bucketID)
163-
164-
return errors.Join(err, ErrUsageMetricUpsertNoHistogramEnumFound)
165-
}
143+
// DeleteMutation creates a Spanner delete mutation.
144+
func (m latestDailyChromiumHistogramMetricMapper) DeleteMutation(
145+
in SpannerLatestDailyChromiumHistogramMetric) *spanner.Mutation {
146+
return spanner.Delete(LatestDailyChromiumHistogramMetricsTable,
147+
spanner.Key{in.WebFeatureID, in.ChromiumHistogramEnumValueID})
148+
}
166149

167-
slog.ErrorContext(ctx, "unable to find histogram value id",
168-
"id", *chromiumHistogramEnumID,
169-
"bucketID", bucketID)
150+
// SyncLatestDailyChromiumHistogramMetrics reconciles the LatestDailyChromiumHistogramMetrics table.
151+
func (c *Client) SyncLatestDailyChromiumHistogramMetrics(ctx context.Context) error {
152+
slog.InfoContext(ctx, "Starting latest daily chromium histogram metrics synchronization")
153+
synchronizer := newEntitySynchronizer[latestDailyChromiumHistogramMetricMapper,
154+
SpannerLatestDailyChromiumHistogramMetric,
155+
SpannerLatestDailyChromiumHistogramMetric,
156+
latestDailyChromiumHistogramMetricKey](c)
170157

158+
desiredState, err := c.getDesiredLatestDailyChromiumHistogramMetrics(ctx)
159+
if err != nil {
171160
return err
172161
}
173162

174-
_, err = c.ReadWriteTransaction(ctx, func(ctx context.Context, txn *spanner.ReadWriteTransaction) error {
175-
var mutations []*spanner.Mutation
176-
m0, err := spanner.InsertOrUpdateStruct(
177-
dailyChromiumHistogramMetricsTable,
178-
spannerDailyChromiumHistogramMetric{
179-
DailyChromiumHistogramMetric: metric,
180-
ChromiumHistogramEnumValueID: *chromiumHistogramEnumValueID,
181-
})
182-
if err != nil {
183-
return err
184-
}
185-
mutations = append(mutations, m0)
163+
return synchronizer.Sync(ctx, desiredState)
164+
}
186165

187-
existingDate, err := getLatestDailyChromiumMetricDate(ctx, txn, *chromiumHistogramEnumValueID)
188-
if err != nil {
189-
if !errors.Is(err, ErrQueryReturnedNoResults) { // Handle errors other than "not found"
190-
return errors.Join(ErrInternalQueryFailure, err)
191-
}
192-
}
166+
func (c *Client) getDesiredLatestDailyChromiumHistogramMetrics(
167+
ctx context.Context) ([]SpannerLatestDailyChromiumHistogramMetric, error) {
168+
txn := c.ReadOnlyTransaction()
169+
defer txn.Close()
193170

194-
if shouldUpsertLatestDailyChromiumUsageMetric(existingDate, metric.Day) {
195-
featureID, err := getWebFeatureIDByChromiumHistogramEnumValueID(ctx, txn, *chromiumHistogramEnumValueID)
196-
if err != nil {
197-
if errors.Is(err, ErrQueryReturnedNoResults) {
198-
return errors.Join(err, ErrUsageMetricUpsertNoFeatureIDFound)
199-
}
171+
stmt := spanner.NewStatement(`
172+
WITH LatestMetrics AS (
173+
SELECT
174+
ChromiumHistogramEnumValueID,
175+
MAX(Day) AS MaxDay
176+
FROM DailyChromiumHistogramMetrics
177+
GROUP BY ChromiumHistogramEnumValueID
178+
)
179+
SELECT
180+
w.WebFeatureID,
181+
w.ChromiumHistogramEnumValueID,
182+
l.MaxDay AS Day
183+
FROM WebFeatureChromiumHistogramEnumValues w
184+
JOIN LatestMetrics l ON w.ChromiumHistogramEnumValueID = l.ChromiumHistogramEnumValueID
185+
`)
200186

201-
return errors.Join(ErrInternalQueryFailure, err)
202-
}
203-
m1, err := spanner.InsertOrUpdateStruct(
204-
LatestDailyChromiumHistogramMetricsTable,
205-
SpannerLatestDailyChromiumHistogramMetric{
206-
WebFeatureID: *featureID,
207-
ChromiumHistogramEnumValueID: *chromiumHistogramEnumValueID,
208-
Day: metric.Day,
209-
})
210-
if err != nil {
211-
return errors.Join(ErrInternalQueryFailure, err)
212-
}
213-
mutations = append(mutations, m1)
214-
}
215-
err = txn.BufferWrite(mutations)
216-
if err != nil {
217-
return errors.Join(ErrInternalQueryFailure, err)
187+
iter := txn.Query(ctx, stmt)
188+
defer iter.Stop()
189+
190+
var desiredState []SpannerLatestDailyChromiumHistogramMetric
191+
err := iter.Do(func(r *spanner.Row) error {
192+
var metric SpannerLatestDailyChromiumHistogramMetric
193+
if err := r.ToStruct(&metric); err != nil {
194+
return err
218195
}
196+
desiredState = append(desiredState, metric)
219197

220198
return nil
221199
})
200+
if err != nil {
201+
return nil, err
202+
}
222203

223-
return err
204+
return desiredState, nil
224205
}

0 commit comments

Comments
 (0)