Skip to content

Commit 69521f9

Browse files
authored
Merge pull request cockroachdb#14181 from knz/collect-queries
sql: per-statement statistics, step two
2 parents fbde0fb + 8f4434f commit 69521f9

File tree

10 files changed

+1254
-59
lines changed

10 files changed

+1254
-59
lines changed

pkg/sql/app_stats.go

Lines changed: 187 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,121 @@
1515

1616
package sql
1717

18-
import "github.com/cockroachdb/cockroach/pkg/util/syncutil"
18+
import (
19+
"bytes"
20+
"encoding/json"
21+
"fmt"
22+
"time"
23+
24+
"golang.org/x/net/context"
25+
26+
"github.com/cockroachdb/cockroach/pkg/sql/parser"
27+
"github.com/cockroachdb/cockroach/pkg/util/envutil"
28+
"github.com/cockroachdb/cockroach/pkg/util/log"
29+
"github.com/cockroachdb/cockroach/pkg/util/stop"
30+
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
31+
)
1932

2033
// appStats holds per-application statistics.
2134
type appStats struct {
2235
syncutil.Mutex
2336

24-
stmtCount int
37+
stmts map[string]*stmtStats
38+
}
39+
40+
// stmtStats holds per-statement statistics.
41+
type stmtStats struct {
42+
syncutil.Mutex
43+
44+
data StatementStatistics
2545
}
2646

27-
func (a *appStats) recordStatement() {
28-
if a == nil {
47+
// StmtStatsEnable determines whether to collect per-statement
48+
// statistics.
49+
// Note: in the future we will want to have this collection enabled at
50+
// all times. We hide it behind an environment variable until further
51+
// testing confirms it works and is stable.
52+
var StmtStatsEnable = envutil.EnvOrDefaultBool(
53+
"COCKROACH_SQL_STMT_STATS_ENABLE", false,
54+
)
55+
56+
func (a *appStats) recordStatement(
57+
stmt parser.Statement,
58+
distSQLUsed bool,
59+
automaticRetryCount int,
60+
numRows int,
61+
err error,
62+
parseLat, planLat, runLat, svcLat, ovhLat float64,
63+
) {
64+
if a == nil || !StmtStatsEnable {
65+
return
66+
}
67+
68+
// Some statements like SET, SHOW etc are not useful to collect
69+
// stats about. Ignore them.
70+
if _, ok := stmt.(parser.HiddenFromStats); ok {
2971
return
3072
}
73+
74+
// Extend the statement key with a character that indicated whether
75+
// there was an error and/or whether the query was distributed, so
76+
// that we use separate buckets for the different situations.
77+
var buf bytes.Buffer
78+
if err != nil {
79+
buf.WriteByte('!')
80+
}
81+
if distSQLUsed {
82+
buf.WriteByte('+')
83+
}
84+
parser.FormatNode(&buf, parser.FmtSimple, stmt)
85+
stmtKey := buf.String()
86+
87+
// Get the statistics object.
88+
s := a.getStatsForStmt(stmtKey)
89+
90+
// Collect the per-statement statistics.
91+
s.Lock()
92+
s.data.Count++
93+
if err != nil {
94+
s.data.LastErr = err.Error()
95+
}
96+
if automaticRetryCount == 0 {
97+
s.data.FirstAttemptCount++
98+
} else if int64(automaticRetryCount) > s.data.MaxRetries {
99+
s.data.MaxRetries = int64(automaticRetryCount)
100+
}
101+
s.data.NumRows.record(s.data.Count, float64(numRows))
102+
s.data.ParseLat.record(s.data.Count, parseLat)
103+
s.data.PlanLat.record(s.data.Count, planLat)
104+
s.data.RunLat.record(s.data.Count, runLat)
105+
s.data.ServiceLat.record(s.data.Count, svcLat)
106+
s.data.OverheadLat.record(s.data.Count, ovhLat)
107+
s.Unlock()
108+
}
109+
110+
// Retrieve the variance of the values.
111+
func (l *NumericStat) getVariance(count int64) float64 {
112+
return l.SquaredDiffs / (float64(count) - 1)
113+
}
114+
115+
func (l *NumericStat) record(count int64, val float64) {
116+
delta := val - l.Mean
117+
l.Mean += delta / float64(count)
118+
l.SquaredDiffs += delta * (val - l.Mean)
119+
}
120+
121+
// getStatsForStmt retrieves the per-stmt stat object.
122+
func (a *appStats) getStatsForStmt(stmtKey string) *stmtStats {
31123
a.Lock()
32-
a.stmtCount++
124+
// Retrieve the per-statement statistic object, and create it if it
125+
// doesn't exist yet.
126+
s, ok := a.stmts[stmtKey]
127+
if !ok {
128+
s = &stmtStats{}
129+
a.stmts[stmtKey] = s
130+
}
33131
a.Unlock()
132+
return s
34133
}
35134

36135
// sqlStats carries per-application statistics for all applications on
@@ -59,7 +158,89 @@ func (s *sqlStats) getStatsForApplication(appName string) *appStats {
59158
if a, ok := s.apps[appName]; ok {
60159
return a
61160
}
62-
a := &appStats{}
161+
a := &appStats{stmts: make(map[string]*stmtStats)}
63162
s.apps[appName] = a
64163
return a
65164
}
165+
166+
// resetStats clears all the stored per-app and per-statement
167+
// statistics.
168+
func (s *sqlStats) resetStats(ctx context.Context) {
169+
// Note: we do not clear the entire s.apps map here. We would need
170+
// to do so to prevent problems with a runaway client running `SET
171+
// APPLICATION_NAME=...` with a different name every time. However,
172+
// any ongoing open client session at the time of the reset has
173+
// cached a pointer to its appStats struct and would thus continue
174+
// to report its stats in an object now invisible to the other tools
175+
// (virtual table, marshalling, etc.). It's a judgement call, but
176+
// for now we prefer to see more data and thus not clear the map, at
177+
// the risk of seeing the map grow unboundedly with the number of
178+
// different application_names seen so far.
179+
180+
s.Lock()
181+
// Clear the per-apps maps manually,
182+
// because any SQL session currently open has cached the
183+
// pointer to its appStats object and will continue to
184+
// accumulate data using that until it closes (or changes its
185+
// application_name).
186+
for appName, a := range s.apps {
187+
a.Lock()
188+
189+
// Save the existing data to logs.
190+
// TODO(knz/dt): instead of dumping the stats to the log, save
191+
// them in a SQL table so they can be inspected by the DBA and/or
192+
// the UI.
193+
dumpStmtStats(ctx, appName, a.stmts)
194+
195+
// Clear the map, to release the memory; make the new map somewhat
196+
// already large for the likely future workload.
197+
a.stmts = make(map[string]*stmtStats, len(a.stmts)/2)
198+
a.Unlock()
199+
}
200+
s.Unlock()
201+
}
202+
203+
// Save the existing data for an application to the info log.
204+
func dumpStmtStats(ctx context.Context, appName string, stats map[string]*stmtStats) {
205+
if len(stats) == 0 {
206+
return
207+
}
208+
var buf bytes.Buffer
209+
fmt.Fprintf(&buf, "Statistics for %q:\n", appName)
210+
for key, s := range stats {
211+
s.Lock()
212+
json, err := json.Marshal(s.data)
213+
s.Unlock()
214+
if err != nil {
215+
log.Errorf(ctx, "error while marshaling stats for %q // %q: %v", appName, key, err)
216+
continue
217+
}
218+
fmt.Fprintf(&buf, "%q: %s\n", key, json)
219+
}
220+
log.Info(ctx, buf.String())
221+
}
222+
223+
// StmtStatsResetFrequency is the frequency at which per-app and
224+
// per-statement statistics are cleared from memory, to avoid
225+
// unlimited memory growth.
226+
var StmtStatsResetFrequency = envutil.EnvOrDefaultDuration(
227+
"COCKROACH_SQL_STMT_STATS_RESET_INTERVAL", 1*time.Hour,
228+
)
229+
230+
// startResetWorker ensures that the data is removed from memory
231+
// periodically, so as to avoid memory blow-ups.
232+
func (s *sqlStats) startResetWorker(stopper *stop.Stopper) {
233+
ctx := log.WithLogTag(context.Background(), "sql-stats", nil)
234+
stopper.RunWorker(func() {
235+
ticker := time.NewTicker(StmtStatsResetFrequency)
236+
defer ticker.Stop()
237+
for {
238+
select {
239+
case <-ticker.C:
240+
s.resetStats(ctx)
241+
case <-stopper.ShouldStop():
242+
return
243+
}
244+
}
245+
})
246+
}

0 commit comments

Comments
 (0)