Skip to content

Commit 5761676

Browse files
committed
sql: estimate table statistic staleness in stats.Refresher
This commit adds an `EstimateStaleness()` method to the table statistic `Refresher`, which estimates the current fraction of stale rows in a given table with the formula: `cur_fraction_stale = (time_since_last_refresh / avg_time_between_refreshes) * target_fraction_stale_rows` Although this isn’t used anywhere yet, it will be useful for logging when scans are misestimated (see #153748). Part of: #153748, #153873 Release note: None
1 parent f520554 commit 5761676

File tree

2 files changed

+240
-0
lines changed

2 files changed

+240
-0
lines changed

pkg/sql/stats/automatic_stats.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,9 @@ type TableStatsTestingKnobs struct {
340340
// perform full statistics refreshes. Useful for testing the partial stats
341341
// refresh logic.
342342
DisableFullStatsRefresh bool
343+
// StubTimeNow allows tests to override the current time, used by
344+
// EstimateStaleness to get the latest stats' age.
345+
StubTimeNow func() time.Time
343346
}
344347

345348
var _ base.ModuleTestingKnobs = &TableStatsTestingKnobs{}
@@ -893,6 +896,56 @@ func (r *Refresher) NotifyMutation(table catalog.TableDescriptor, rowsAffected i
893896
}
894897
}
895898

899+
// EstimateStaleness returns an estimate fraction of stale rows in the given
900+
// table based on how long it has been since the last full statistics refresh,
901+
// and the average time between refreshes.
902+
func (r *Refresher) EstimateStaleness(ctx context.Context, tableID descpb.ID) (float64, error) {
903+
desc := r.getTableDescriptor(ctx, tableID)
904+
if desc == nil {
905+
return 0, errors.New("could not access the table descriptor")
906+
}
907+
if !autostatsCollectionAllowed(desc, r.st) {
908+
return 0, errors.New("automatic stats collection is not allowed for this table")
909+
}
910+
911+
var forecast *bool
912+
// NB: we pass nil boolean as 'forecast' argument in order to not invalidate
913+
// the stats cache entry since we don't care whether there is a forecast or
914+
// not in the stats.
915+
tableStats, err := r.cache.getTableStatsFromCache(ctx, tableID, forecast, nil /* udtCols */, nil /* typeResolver */)
916+
if err != nil {
917+
return 0, err
918+
}
919+
920+
// Find the most recent full statistic
921+
var stat *TableStatistic
922+
for _, s := range tableStats {
923+
if !s.IsPartial() && !s.IsForecast() && !s.IsMerged() {
924+
stat = s
925+
break
926+
}
927+
}
928+
if stat == nil {
929+
return 0, errors.New("no full statistics available")
930+
}
931+
932+
var explicitSettings *catpb.AutoStatsSettings
933+
if s, ok := r.settingOverrides[tableID]; ok {
934+
explicitSettings = &s
935+
}
936+
staleTargetFraction := r.autoStatsFractionStaleRows(explicitSettings)
937+
938+
avgRefreshTime := avgFullRefreshTime(tableStats)
939+
statsAge := timeutil.Since(stat.CreatedAt)
940+
if r.knobs != nil && r.knobs.StubTimeNow != nil {
941+
statsAge = r.knobs.StubTimeNow().Sub(stat.CreatedAt)
942+
}
943+
staleFraction :=
944+
float64(statsAge) / float64(avgRefreshTime) * staleTargetFraction
945+
946+
return staleFraction, nil
947+
}
948+
896949
// maybeRefreshStats implements the core logic described in the comment for
897950
// Refresher. It is called by the background Refresher thread.
898951
// explicitSettings, if non-nil, holds any autostats cluster setting overrides

pkg/sql/stats/automatic_stats_test.go

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,3 +1063,190 @@ func TestRefresherReadOnlyShutdown(t *testing.T) {
10631063
// Wait for shutdown - this should complete without hanging.
10641064
readOnlyRefresher.WaitForAutoStatsShutdown(ctx)
10651065
}
1066+
1067+
func TestEstimateStaleness(t *testing.T) {
1068+
defer leaktest.AfterTest(t)()
1069+
defer log.Scope(t).Close(t)
1070+
ctx := context.Background()
1071+
1072+
srv, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{})
1073+
defer srv.Stopper().Stop(ctx)
1074+
s := srv.ApplicationLayer()
1075+
codec, st := s.Codec(), s.ClusterSettings()
1076+
1077+
evalCtx := eval.NewTestingEvalContext(st)
1078+
defer evalCtx.Stop(ctx)
1079+
1080+
AutomaticStatisticsClusterMode.Override(ctx, &st.SV, false)
1081+
1082+
sqlRun := sqlutils.MakeSQLRunner(sqlDB)
1083+
sqlRun.Exec(t,
1084+
`CREATE DATABASE t;
1085+
CREATE TABLE t.a (k INT PRIMARY KEY);
1086+
INSERT INTO t.a VALUES (1);`)
1087+
1088+
internalDB := s.InternalDB().(descs.DB)
1089+
table := desctestutils.TestingGetPublicTableDescriptor(s.DB(), codec, "t", "a")
1090+
cache := NewTableStatisticsCache(
1091+
10, /* cacheSize */
1092+
s.ClusterSettings(),
1093+
s.InternalDB().(descs.DB),
1094+
s.AppStopper(),
1095+
)
1096+
require.NoError(t, cache.Start(ctx, codec, s.RangeFeedFactory().(*rangefeed.Factory)))
1097+
1098+
// curTime is used as the current time throughout the test to ensure that the
1099+
// calculated staleness is consistent even if there are delays due to
1100+
// running the test under race.
1101+
curTime := timeutil.Now().Round(time.Hour)
1102+
knobs := &TableStatsTestingKnobs{
1103+
StubTimeNow: func() time.Time { return curTime },
1104+
}
1105+
refresher := MakeRefresher(s.AmbientCtx(), st, internalDB, cache, time.Microsecond /* asOfTime */, knobs, false /* readOnlyTenant */)
1106+
1107+
checkEstimatedStaleness := func(expected float64) error {
1108+
return testutils.SucceedsSoonError(func() error {
1109+
actual, err := refresher.EstimateStaleness(ctx,
1110+
table.GetID())
1111+
if err != nil {
1112+
return err
1113+
}
1114+
if actual != expected {
1115+
return fmt.Errorf("expected EstimateStaleness %f but found %f",
1116+
expected, actual)
1117+
}
1118+
return nil
1119+
})
1120+
}
1121+
1122+
insertStat := func(
1123+
txn *kv.Txn, name string, columnIDs *tree.DArray, createdAt *tree.DTimestamp,
1124+
) error {
1125+
_, err := internalDB.Executor().Exec(
1126+
ctx, "insert-statistic", txn,
1127+
`INSERT INTO system.table_statistics (
1128+
"tableID",
1129+
"name",
1130+
"columnIDs",
1131+
"createdAt",
1132+
"rowCount",
1133+
"distinctCount",
1134+
"nullCount",
1135+
"avgSize"
1136+
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
1137+
table.GetID(),
1138+
name,
1139+
columnIDs,
1140+
createdAt,
1141+
100000, /* rowCount */
1142+
1, /* distinctCount */
1143+
0, /* nullCount */
1144+
4, /* avgSize */
1145+
)
1146+
return err
1147+
}
1148+
1149+
overwriteFullStats := func(startOffsetHours, intervalHours int) error {
1150+
return s.DB().Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
1151+
_, err := internalDB.Executor().Exec(
1152+
ctx, "delete-stats", txn,
1153+
`DELETE FROM system.table_statistics WHERE "tableID" = $1`,
1154+
table.GetID(),
1155+
)
1156+
if err != nil {
1157+
return err
1158+
}
1159+
1160+
for i := 0; i < 5; i++ {
1161+
columnIDsVal := tree.NewDArray(types.Int)
1162+
if err := columnIDsVal.Append(tree.NewDInt(tree.DInt(1))); err != nil {
1163+
return err
1164+
}
1165+
offset := startOffsetHours + i*intervalHours
1166+
createdAt, err := tree.MakeDTimestamp(
1167+
curTime.Add(time.Duration(-offset)*time.Hour), time.Hour,
1168+
)
1169+
if err != nil {
1170+
return err
1171+
}
1172+
if err := insertStat(txn, jobspb.AutoStatsName, columnIDsVal, createdAt); err != nil {
1173+
return err
1174+
}
1175+
}
1176+
return nil
1177+
})
1178+
}
1179+
1180+
// Ensure that we return an error if estimating staleness without any stats.
1181+
_, err := refresher.EstimateStaleness(ctx, table.GetID())
1182+
require.Error(t, err)
1183+
require.Contains(t, err.Error(), "no full statistics available")
1184+
1185+
// Ensure that we return an error if estimating staleness on a table that
1186+
// doesn't allow auto stats.
1187+
descTableStats := desctestutils.TestingGetPublicTableDescriptor(s.DB(),
1188+
codec, "system", "table_statistics")
1189+
_, err = refresher.EstimateStaleness(ctx, descTableStats.GetID())
1190+
require.Error(t, err)
1191+
require.Contains(t, err.Error(), "automatic stats collection is not allowed for this table")
1192+
1193+
// Create stats with 10-hour intervals, the most recent being 5 hours old.
1194+
if err = overwriteFullStats(
1195+
5, /* startOffsetHours */
1196+
10, /* intervalHours */
1197+
); err != nil {
1198+
t.Fatal(err)
1199+
}
1200+
1201+
// With default settings (fraction_stale_rows = 0.2) and the latest full stat
1202+
// being 5 hours old (half of avgRefreshTime of 10 hours), we expect 10%
1203+
// staleness.
1204+
if err = checkEstimatedStaleness(0.1); err != nil {
1205+
t.Fatal(err)
1206+
}
1207+
1208+
fractionStaleRows := 0.4
1209+
explicitSettings := catpb.AutoStatsSettings{FractionStaleRows: &fractionStaleRows}
1210+
refresher.settingOverrides[table.GetID()] = explicitSettings
1211+
1212+
// With fraction_stale_rows = 0.4 and the latest full stat being 5 hours old
1213+
// (half of avgRefreshTime of 10 hours), we expect 20% staleness.
1214+
if err = checkEstimatedStaleness(0.2); err != nil {
1215+
t.Fatal(err)
1216+
}
1217+
1218+
// Reset fraction_stale_rows to default (0.2)
1219+
delete(refresher.settingOverrides, table.GetID())
1220+
1221+
// Delete old stats and create stats with 3-hour intervals, the most recent
1222+
// being 15 hours old.
1223+
if err = overwriteFullStats(
1224+
15, /* startOffsetHours */
1225+
3, /* intervalHours */
1226+
); err != nil {
1227+
t.Fatal(err)
1228+
}
1229+
1230+
// With default settings (fraction_stale_rows = 0.2) and the latest full stat
1231+
// being 15 hours old (5 times the avgRefreshTime of 3 hours), we expect 100%
1232+
// staleness.
1233+
if err = checkEstimatedStaleness(1.0); err != nil {
1234+
t.Fatal(err)
1235+
}
1236+
1237+
// Delete old stats and create stats with 2-hour intervals, the most recent
1238+
// being 15 hours old.
1239+
if err = overwriteFullStats(
1240+
15, /* startOffsetHours */
1241+
2, /* intervalHours */
1242+
); err != nil {
1243+
t.Fatal(err)
1244+
}
1245+
1246+
// With default settings (fraction_stale_rows = 0.2) and the latest full stat
1247+
// being 15 hours old (7.5 times the avgRefreshTime of 2 hours), we expect
1248+
// 150% staleness.
1249+
if err = checkEstimatedStaleness(1.5); err != nil {
1250+
t.Fatal(err)
1251+
}
1252+
}

0 commit comments

Comments
 (0)