Skip to content

Commit ed8c021

Browse files
committed
sql/inspect: convert internal errors to inspect issues
Previously, internal errors during index consistency checks would fail the entire job. Now these errors are converted to structured inspect issues with detailed context. Closes #148299 Release note: none Epic: None
1 parent ac24cb9 commit ed8c021

File tree

3 files changed

+79
-8
lines changed

3 files changed

+79
-8
lines changed

pkg/sql/inspect/index_consistency_check.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ type indexConsistencyCheck struct {
4949
// queries join. The actual resulting rows from the RowContainer is
5050
// twice this plus the error_type column.
5151
columns []catalog.Column
52+
53+
// lastQuery stores the SQL query executed for this check to help
54+
// debug internal errors by providing context about the span bounds.
55+
lastQuery string
5256
}
5357

5458
var _ inspectCheck = (*indexConsistencyCheck)(nil)
@@ -190,6 +194,9 @@ func (c *indexConsistencyCheck) Start(
190194
colNames(pkColumns), colNames(otherColumns), c.tableDesc.GetID(), c.secIndex, c.priIndex.GetID(), predicate,
191195
)
192196

197+
// Store the query for error reporting
198+
c.lastQuery = checkQuery
199+
193200
it, err := c.flowCtx.Cfg.DB.Executor().QueryIteratorEx(
194201
ctx, "inspect-index-consistency-check", nil, /* txn */
195202
sessiondata.InternalExecutorOverride{
@@ -221,7 +228,29 @@ func (c *indexConsistencyCheck) Next(
221228

222229
ok, err := c.rowIter.Next(ctx)
223230
if err != nil {
224-
return nil, errors.Wrap(err, "fetching next row in index consistency check")
231+
// Close the iterator to prevent further usage. The close may emit the
232+
// internal error too, but we only need to capture it once.
233+
_ = c.Close(ctx)
234+
c.exhaustedIter = true
235+
236+
// Convert internal errors to inspect issues rather than failing the entire job.
237+
// This allows us to capture and log data corruption or encoding errors as
238+
// structured issues for investigation.
239+
details := make(map[redact.RedactableString]interface{})
240+
details["error_message"] = err.Error()
241+
details["error_type"] = "internal_query_error"
242+
details["index_name"] = c.secIndex.GetName()
243+
details["query"] = c.lastQuery // Store the query that caused the error
244+
245+
return &inspectIssue{
246+
ErrorType: InternalError,
247+
// TODO(148573): Use the timestamp that we create a protected timestamp for.
248+
AOST: timeutil.Now(),
249+
DatabaseID: c.tableDesc.GetParentID(),
250+
SchemaID: c.tableDesc.GetParentSchemaID(),
251+
ObjectID: c.tableDesc.GetID(),
252+
Details: details,
253+
}, nil
225254
}
226255
if !ok {
227256
c.exhaustedIter = true
@@ -303,10 +332,12 @@ func (c *indexConsistencyCheck) Done(context.Context) bool {
303332
// Close implements the inspectCheck interface.
304333
func (c *indexConsistencyCheck) Close(context.Context) error {
305334
if c.rowIter != nil {
306-
if err := c.rowIter.Close(); err != nil {
335+
// Clear the iter ahead of close to ensure we only attempt the close once.
336+
it := c.rowIter
337+
c.rowIter = nil
338+
if err := it.Close(); err != nil {
307339
return errors.Wrap(err, "closing index consistency check iterator")
308340
}
309-
c.rowIter = nil
310341
}
311342
return nil
312343
}

pkg/sql/inspect/index_consistency_check_test.go

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
2626
"github.com/cockroachdb/cockroach/pkg/util/log"
2727
"github.com/cockroachdb/errors"
28+
"github.com/cockroachdb/redact"
2829
"github.com/stretchr/testify/require"
2930
)
3031

@@ -144,6 +145,10 @@ func TestDetectIndexConsistencyErrors(t *testing.T) {
144145
// expectedErrRegex is the regex pattern that the error message should match.
145146
// If empty then no error is expected.
146147
expectedErrRegex string
148+
// expectedInternalErrorPatterns contains patterns for validating internal error details.
149+
// Each element corresponds to the issue at the same index in expectedIssues.
150+
// For non-internal-error issues, the corresponding element should be nil.
151+
expectedInternalErrorPatterns []map[string]string
147152
}{
148153
{
149154
desc: "happy path sanity",
@@ -192,16 +197,24 @@ func TestDetectIndexConsistencyErrors(t *testing.T) {
192197
expectedErrRegex: expectedInspectFoundInconsistencies,
193198
},
194199
{
195-
desc: "2 ranges, secondary index on 'a', 1 dangling entry",
200+
desc: "2 ranges, secondary index on 'a', 1 dangling entry with internal error",
196201
splitRangeDDL: "ALTER TABLE test.t SPLIT AT VALUES (500)",
197202
indexDDL: []string{
198203
"CREATE INDEX idx_t_a ON test.t (a) STORING (f)",
199204
},
200205
danglingIndexEntryInsertQuery: "SELECT 3, 30, 300, 'd_3', 'e_3', -56.712",
201-
// TODO(148299): We eventually want to detect internal errors. Currently
202-
// this test passes by expecting the internal error pattern rather than
203-
// detecting the dangling entry.
204-
expectedErrRegex: "error decoding 6 bytes: float64 value should be exactly 8 bytes: 5",
206+
expectedIssues: []inspectIssue{
207+
{ErrorType: "internal_error"},
208+
},
209+
expectedErrRegex: expectedInspectFoundInconsistencies,
210+
expectedInternalErrorPatterns: []map[string]string{
211+
{
212+
"error_message": "error decoding.*float64",
213+
"error_type": "internal_query_error",
214+
"index_name": "idx_t_a",
215+
"query": "FROM.*table_",
216+
},
217+
},
205218
},
206219
{
207220
desc: "2 ranges, secondary index on 'b' storing 'f', 1 dangling entry",
@@ -368,6 +381,28 @@ func TestDetectIndexConsistencyErrors(t *testing.T) {
368381
require.NotEqual(t, 0, foundIssue.SchemaID, "expected issue to have a schema ID: %s", expectedIssue)
369382
require.NotEqual(t, 0, foundIssue.ObjectID, "expected issue to have an object ID: %s", expectedIssue)
370383
require.NotEqual(t, time.Time{}, foundIssue.AOST, "expected issue to have an AOST time: %s", expectedIssue)
384+
385+
// Additional validation for internal errors
386+
if foundIssue.ErrorType == "internal_error" {
387+
require.NotNil(t, foundIssue.Details, "internal error should have details")
388+
389+
// Validate patterns if provided for this specific issue
390+
if tc.expectedInternalErrorPatterns != nil && i < len(tc.expectedInternalErrorPatterns) &&
391+
tc.expectedInternalErrorPatterns[i] != nil {
392+
expectedPatterns := tc.expectedInternalErrorPatterns[i]
393+
394+
// Validate each expected pattern
395+
for detailKey, expectedPattern := range expectedPatterns {
396+
redactableKey := redact.RedactableString(detailKey)
397+
require.Contains(t, foundIssue.Details, redactableKey, "internal error should contain detail key: %s", detailKey)
398+
399+
detailValue, ok := foundIssue.Details[redactableKey].(string)
400+
require.True(t, ok, "detail value for key %s should be a string", detailKey)
401+
require.Regexp(t, expectedPattern, detailValue,
402+
"detail %s should match pattern %s, got: %s", detailKey, expectedPattern, detailValue)
403+
}
404+
}
405+
}
371406
}
372407

373408
// Validate job status matches expected outcome

pkg/sql/inspect/issue.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,9 @@ const (
9797
// DanglingSecondaryIndexEntry occurs when a secondary index entry exists
9898
// but the corresponding row in the primary index is missing.
9999
DanglingSecondaryIndexEntry inspectErrorType = "dangling_secondary_index_entry"
100+
101+
// InternalError occurs when an internal error (e.g., data corruption, encoding
102+
// issues) prevents the check from completing normally. These errors indicate
103+
// potential data corruption or other serious issues that require investigation.
104+
InternalError inspectErrorType = "internal_error"
100105
)

0 commit comments

Comments
 (0)