|
15 | 15 |
|
16 | 16 | package sql |
17 | 17 |
|
18 | | -import "github.com/cockroachdb/cockroach/pkg/util/syncutil" |
| 18 | +import ( |
| 19 | + "bytes" |
| 20 | + "encoding/json" |
| 21 | + "fmt" |
| 22 | + "time" |
| 23 | + |
| 24 | + "golang.org/x/net/context" |
| 25 | + |
| 26 | + "github.com/cockroachdb/cockroach/pkg/sql/parser" |
| 27 | + "github.com/cockroachdb/cockroach/pkg/util/envutil" |
| 28 | + "github.com/cockroachdb/cockroach/pkg/util/log" |
| 29 | + "github.com/cockroachdb/cockroach/pkg/util/stop" |
| 30 | + "github.com/cockroachdb/cockroach/pkg/util/syncutil" |
| 31 | +) |
19 | 32 |
|
20 | 33 | // appStats holds per-application statistics. |
21 | 34 | type appStats struct { |
22 | 35 | syncutil.Mutex |
23 | 36 |
|
24 | | - stmtCount int |
| 37 | + stmts map[string]*stmtStats |
| 38 | +} |
| 39 | + |
| 40 | +// stmtStats holds per-statement statistics. |
| 41 | +type stmtStats struct { |
| 42 | + syncutil.Mutex |
| 43 | + |
| 44 | + data StatementStatistics |
25 | 45 | } |
26 | 46 |
|
27 | | -func (a *appStats) recordStatement() { |
28 | | - if a == nil { |
| 47 | +// StmtStatsEnable determines whether to collect per-statement |
| 48 | +// statistics. |
| 49 | +// Note: in the future we will want to have this collection enabled at |
| 50 | +// all times. We hide it behind an environment variable until further |
| 51 | +// testing confirms it works and is stable. |
| 52 | +var StmtStatsEnable = envutil.EnvOrDefaultBool( |
| 53 | + "COCKROACH_SQL_STMT_STATS_ENABLE", false, |
| 54 | +) |
| 55 | + |
| 56 | +func (a *appStats) recordStatement( |
| 57 | + stmt parser.Statement, |
| 58 | + distSQLUsed bool, |
| 59 | + automaticRetryCount int, |
| 60 | + numRows int, |
| 61 | + err error, |
| 62 | + parseLat, planLat, runLat, svcLat, ovhLat float64, |
| 63 | +) { |
| 64 | + if a == nil || !StmtStatsEnable { |
| 65 | + return |
| 66 | + } |
| 67 | + |
| 68 | + // Some statements like SET, SHOW etc are not useful to collect |
| 69 | + // stats about. Ignore them. |
| 70 | + if _, ok := stmt.(parser.HiddenFromStats); ok { |
29 | 71 | return |
30 | 72 | } |
| 73 | + |
| 74 | + // Extend the statement key with a character that indicated whether |
| 75 | + // there was an error and/or whether the query was distributed, so |
| 76 | + // that we use separate buckets for the different situations. |
| 77 | + var buf bytes.Buffer |
| 78 | + if err != nil { |
| 79 | + buf.WriteByte('!') |
| 80 | + } |
| 81 | + if distSQLUsed { |
| 82 | + buf.WriteByte('+') |
| 83 | + } |
| 84 | + parser.FormatNode(&buf, parser.FmtSimple, stmt) |
| 85 | + stmtKey := buf.String() |
| 86 | + |
| 87 | + // Get the statistics object. |
| 88 | + s := a.getStatsForStmt(stmtKey) |
| 89 | + |
| 90 | + // Collect the per-statement statistics. |
| 91 | + s.Lock() |
| 92 | + s.data.Count++ |
| 93 | + if err != nil { |
| 94 | + s.data.LastErr = err.Error() |
| 95 | + } |
| 96 | + if automaticRetryCount == 0 { |
| 97 | + s.data.FirstAttemptCount++ |
| 98 | + } else if int64(automaticRetryCount) > s.data.MaxRetries { |
| 99 | + s.data.MaxRetries = int64(automaticRetryCount) |
| 100 | + } |
| 101 | + s.data.NumRows.record(s.data.Count, float64(numRows)) |
| 102 | + s.data.ParseLat.record(s.data.Count, parseLat) |
| 103 | + s.data.PlanLat.record(s.data.Count, planLat) |
| 104 | + s.data.RunLat.record(s.data.Count, runLat) |
| 105 | + s.data.ServiceLat.record(s.data.Count, svcLat) |
| 106 | + s.data.OverheadLat.record(s.data.Count, ovhLat) |
| 107 | + s.Unlock() |
| 108 | +} |
| 109 | + |
| 110 | +// Retrieve the variance of the values. |
| 111 | +func (l *NumericStat) getVariance(count int64) float64 { |
| 112 | + return l.SquaredDiffs / (float64(count) - 1) |
| 113 | +} |
| 114 | + |
| 115 | +func (l *NumericStat) record(count int64, val float64) { |
| 116 | + delta := val - l.Mean |
| 117 | + l.Mean += delta / float64(count) |
| 118 | + l.SquaredDiffs += delta * (val - l.Mean) |
| 119 | +} |
| 120 | + |
| 121 | +// getStatsForStmt retrieves the per-stmt stat object. |
| 122 | +func (a *appStats) getStatsForStmt(stmtKey string) *stmtStats { |
31 | 123 | a.Lock() |
32 | | - a.stmtCount++ |
| 124 | + // Retrieve the per-statement statistic object, and create it if it |
| 125 | + // doesn't exist yet. |
| 126 | + s, ok := a.stmts[stmtKey] |
| 127 | + if !ok { |
| 128 | + s = &stmtStats{} |
| 129 | + a.stmts[stmtKey] = s |
| 130 | + } |
33 | 131 | a.Unlock() |
| 132 | + return s |
34 | 133 | } |
35 | 134 |
|
36 | 135 | // sqlStats carries per-application statistics for all applications on |
@@ -59,7 +158,89 @@ func (s *sqlStats) getStatsForApplication(appName string) *appStats { |
59 | 158 | if a, ok := s.apps[appName]; ok { |
60 | 159 | return a |
61 | 160 | } |
62 | | - a := &appStats{} |
| 161 | + a := &appStats{stmts: make(map[string]*stmtStats)} |
63 | 162 | s.apps[appName] = a |
64 | 163 | return a |
65 | 164 | } |
| 165 | + |
| 166 | +// resetStats clears all the stored per-app and per-statement |
| 167 | +// statistics. |
| 168 | +func (s *sqlStats) resetStats(ctx context.Context) { |
| 169 | + // Note: we do not clear the entire s.apps map here. We would need |
| 170 | + // to do so to prevent problems with a runaway client running `SET |
| 171 | + // APPLICATION_NAME=...` with a different name every time. However, |
| 172 | + // any ongoing open client session at the time of the reset has |
| 173 | + // cached a pointer to its appStats struct and would thus continue |
| 174 | + // to report its stats in an object now invisible to the other tools |
| 175 | + // (virtual table, marshalling, etc.). It's a judgement call, but |
| 176 | + // for now we prefer to see more data and thus not clear the map, at |
| 177 | + // the risk of seeing the map grow unboundedly with the number of |
| 178 | + // different application_names seen so far. |
| 179 | + |
| 180 | + s.Lock() |
| 181 | + // Clear the per-apps maps manually, |
| 182 | + // because any SQL session currently open has cached the |
| 183 | + // pointer to its appStats object and will continue to |
| 184 | + // accumulate data using that until it closes (or changes its |
| 185 | + // application_name). |
| 186 | + for appName, a := range s.apps { |
| 187 | + a.Lock() |
| 188 | + |
| 189 | + // Save the existing data to logs. |
| 190 | + // TODO(knz/dt): instead of dumping the stats to the log, save |
| 191 | + // them in a SQL table so they can be inspected by the DBA and/or |
| 192 | + // the UI. |
| 193 | + dumpStmtStats(ctx, appName, a.stmts) |
| 194 | + |
| 195 | + // Clear the map, to release the memory; make the new map somewhat |
| 196 | + // already large for the likely future workload. |
| 197 | + a.stmts = make(map[string]*stmtStats, len(a.stmts)/2) |
| 198 | + a.Unlock() |
| 199 | + } |
| 200 | + s.Unlock() |
| 201 | +} |
| 202 | + |
| 203 | +// Save the existing data for an application to the info log. |
| 204 | +func dumpStmtStats(ctx context.Context, appName string, stats map[string]*stmtStats) { |
| 205 | + if len(stats) == 0 { |
| 206 | + return |
| 207 | + } |
| 208 | + var buf bytes.Buffer |
| 209 | + fmt.Fprintf(&buf, "Statistics for %q:\n", appName) |
| 210 | + for key, s := range stats { |
| 211 | + s.Lock() |
| 212 | + json, err := json.Marshal(s.data) |
| 213 | + s.Unlock() |
| 214 | + if err != nil { |
| 215 | + log.Errorf(ctx, "error while marshaling stats for %q // %q: %v", appName, key, err) |
| 216 | + continue |
| 217 | + } |
| 218 | + fmt.Fprintf(&buf, "%q: %s\n", key, json) |
| 219 | + } |
| 220 | + log.Info(ctx, buf.String()) |
| 221 | +} |
| 222 | + |
| 223 | +// StmtStatsResetFrequency is the frequency at which per-app and |
| 224 | +// per-statement statistics are cleared from memory, to avoid |
| 225 | +// unlimited memory growth. |
| 226 | +var StmtStatsResetFrequency = envutil.EnvOrDefaultDuration( |
| 227 | + "COCKROACH_SQL_STMT_STATS_RESET_INTERVAL", 1*time.Hour, |
| 228 | +) |
| 229 | + |
| 230 | +// startResetWorker ensures that the data is removed from memory |
| 231 | +// periodically, so as to avoid memory blow-ups. |
| 232 | +func (s *sqlStats) startResetWorker(stopper *stop.Stopper) { |
| 233 | + ctx := log.WithLogTag(context.Background(), "sql-stats", nil) |
| 234 | + stopper.RunWorker(func() { |
| 235 | + ticker := time.NewTicker(StmtStatsResetFrequency) |
| 236 | + defer ticker.Stop() |
| 237 | + for { |
| 238 | + select { |
| 239 | + case <-ticker.C: |
| 240 | + s.resetStats(ctx) |
| 241 | + case <-stopper.ShouldStop(): |
| 242 | + return |
| 243 | + } |
| 244 | + } |
| 245 | + }) |
| 246 | +} |
0 commit comments