Skip to content

Commit be6cdb8

Browse files
committed
backend/instances.go: fix instance stats query
The instance stats query contained a bug in which it only returned instances that have _last_ checked in within the given time window instead of all _active_ instances during the window. As a result, instance count was skewed and much lower than the actual amount of active instances. This change updates the quey from: last_check_for_updates > (ts - window_size) AND last_check_for_updates < ts to last_check_for_updates > (ts - window_size) AND instance_created_ts < ts . The change also updates the default window size from 2 hours to one day to better match our legacy instance count metrics. Signed-off-by: Thilo Fromm <thilofromm@microsoft.com>
1 parent 4e3eadd commit be6cdb8

File tree

1 file changed

+14
-8
lines changed

1 file changed

+14
-8
lines changed

backend/pkg/api/instances.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ const (
4848
)
4949

5050
const (
51-
validityInterval postgresDuration = "1 days"
52-
defaultInterval time.Duration = 2 * time.Hour
51+
validityInterval postgresDuration = "1 days"
52+
defaultStatsInterval time.Duration = 24 * time.Hour
5353
)
5454

5555
// Instance represents an instance running one or more applications for which
@@ -646,19 +646,24 @@ func (api *API) instanceStatusHistoryQuery(instanceID, appID, groupID string, li
646646

647647
// GetDefaultInterval returns the default interval used for instance stats queries.
648648
func (api *API) GetDefaultInterval() time.Duration {
649-
return defaultInterval
649+
return defaultStatsInterval
650650
}
651651

652-
// instanceStatsQuery returns a SelectDataset prepared to return all instances
653-
// that have been checked in during a given duration from a given time.
652+
// instanceStatsQuery returns a SelectDataset prepared to return all active instances within a time period.
653+
//
654+
// This is somewhat complicated by the fact that an instance created _before_ or _during_ our window
655+
// can have checked in _after_ the window we're supposed to check. last_check_for_updates gets updated every
656+
// time an instance checks for an update. Hence we filter for instances that:
657+
// 1. have been created before timestamp 't'
658+
// 2. have checked in _during_ or _after_ the time period
654659
func (api *API) instanceStatsQuery(t *time.Time, duration *time.Duration) *goqu.SelectDataset {
655660
if t == nil {
656661
now := time.Now().UTC()
657662
t = &now
658663
}
659664

660665
if duration == nil {
661-
d := defaultInterval
666+
d := defaultStatsInterval
662667
duration = &d
663668
}
664669

@@ -710,12 +715,13 @@ func (api *API) instanceStatsQuery(t *time.Time, duration *time.Duration) *goqu.
710715
Else("").
711716
As("arch"),
712717
goqu.C("version").As("version"),
713-
goqu.COUNT("*").As("instances")).
718+
goqu.COUNT("*").As("instances")).Distinct().
714719
Join(goqu.T("groups"), goqu.On(goqu.C("group_id").Eq(goqu.T("groups").Col("id")))).
715720
Join(goqu.T("channel"), goqu.On(goqu.T("groups").Col("channel_id").Eq(goqu.T("channel").Col("id")))).
721+
Join(goqu.T("instance"), goqu.On(goqu.T("instance_application").Col("instance_id").Eq(goqu.T("instance").Col("id")))).
716722
Where(
717723
goqu.C("last_check_for_updates").Gt(timestampMinusDuration),
718-
goqu.C("last_check_for_updates").Lte(timestamp)).
724+
goqu.T("instance").Col("created_ts").Lte(timestamp)).
719725
GroupBy(timestamp,
720726
goqu.T("channel").Col("name"),
721727
goqu.T("channel").Col("arch"),

0 commit comments

Comments
 (0)