Skip to content

Commit bedfbd7

Browse files
craig[bot]spilchen
andcommitted
Merge #156377
156377: sql/inspect: skip hash precheck for non-encodable types r=spilchen a=spilchen The recent hash precheck optimization for INSPECT (added to speed up index consistency checks) relies on `crdb_internal.datums_to_bytes` to compute row hashes. However, this builtin uses `keyside.Encode` internally, which doesn't support all column types. When INSPECT encountered tables with TSVECTOR, TSQUERY, or VECTOR (PGVector) columns stored in secondary indexes, the hash precheck would fail with "illegal argument N of type <typename>". This caused INSPECT to fail entirely on these tables. This commit adds a check to skip the hash precheck if there are any types that cannot be encoded. Fixes #156175 Epic: CRDB-55075 Release note: none Co-authored-by: Matt Spilchen <[email protected]>
2 parents 9bf71dd + c9bb3e6 commit bedfbd7

File tree

2 files changed

+174
-13
lines changed

2 files changed

+174
-13
lines changed

pkg/sql/inspect/index_consistency_check.go

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/cockroachdb/cockroach/pkg/settings"
1818
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
1919
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
20+
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
2021
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
2122
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
2223
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
@@ -247,20 +248,30 @@ func (c *indexConsistencyCheck) Start(
247248
}
248249

249250
if indexConsistencyHashEnabled.Get(&c.flowCtx.Cfg.Settings.SV) && len(allColNames) > 0 {
250-
match, hashErr := c.hashesMatch(ctx, allColNames, predicate, queryArgs)
251-
if hashErr != nil {
252-
// If hashing fails, we usually fall back to the full check. But if the
253-
// error stems from query construction, that's an internal bug and shouldn't
254-
// be ignored.
255-
if isQueryConstructionError(hashErr) {
256-
return errors.WithAssertionFailure(hashErr)
251+
// The hash precheck uses crdb_internal.datums_to_bytes, which depends on
252+
// keyside.Encode. Skip if any column type isn’t encodable (i.e. TSQUERY, etc.).
253+
if !allColumnsDatumsToBytesCompatible(c.columns) {
254+
log.Dev.Infof(ctx, "skipping hash precheck for index %s: column type not compatible with datums_to_bytes",
255+
c.secIndex.GetName())
256+
} else {
257+
match, hashErr := c.hashesMatch(ctx, allColNames, predicate, queryArgs)
258+
if hashErr != nil {
259+
if isQueryConstructionError(hashErr) {
260+
// If hashing fails and the error stems from query construction,
261+
// that's an internal bug and shouldn't be ignored.
262+
return errors.WithAssertionFailure(hashErr)
263+
}
264+
// For all other hash errors, log and fall back.
265+
log.Dev.Infof(ctx, "hash precheck failed; falling back to full check: %v", hashErr)
257266
}
258-
log.Dev.Infof(ctx, "hash precheck for index consistency did not match; falling back to full check: %v", hashErr)
259-
}
260-
if match {
261-
// Hashes match, no corruption detected - skip the full check.
262-
c.state = checkHashMatched
263-
return nil
267+
if match {
268+
// Hashes match, no corruption detected - skip the full check.
269+
c.state = checkHashMatched
270+
return nil
271+
}
272+
// Hashes don't match - corruption detected. Fall back to full check.
273+
log.Dev.Infof(ctx, "hash precheck detected mismatch for index %s; proceeding with full check",
274+
c.secIndex.GetName())
264275
}
265276
}
266277

@@ -937,3 +948,20 @@ func formatPlaceholders(placeholders []interface{}) []string {
937948
}
938949
return result
939950
}
951+
952+
// allColumnsDatumsToBytesCompatible reports whether all columns can be
953+
// passed to crdb_internal.datums_to_bytes. Returns false if any column
954+
// has a type that cannot be key-encoded using keyside.Encode, which
955+
// datums_to_bytes relies on internally.
956+
//
957+
// REFCURSOR is technically supported by datums_to_bytes, but we still use
958+
// ColumnTypeIsIndexable to avoid duplicating type checks. REFCURSOR is
959+
// uncommon, so this trade-off keeps the code simpler.
960+
func allColumnsDatumsToBytesCompatible(columns []catalog.Column) bool {
961+
for _, col := range columns {
962+
if !colinfo.ColumnTypeIsIndexable(col.GetType()) {
963+
return false
964+
}
965+
}
966+
return true
967+
}

pkg/sql/logictest/testdata/logic_test/inspect

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,3 +585,136 @@ statement ok
585585
DROP TABLE t_aost;
586586

587587
subtest end
588+
589+
subtest inspect_non_key_encodable_types
590+
591+
statement ok
592+
CREATE TABLE tsvector_tbl (
593+
id INT PRIMARY KEY,
594+
a INT,
595+
tsv TSVECTOR NOT NULL,
596+
INDEX idx_a_tsv (a) STORING (tsv)
597+
);
598+
599+
statement ok
600+
INSERT INTO tsvector_tbl VALUES
601+
(1, 10, 'hello world'::TSVECTOR),
602+
(2, 20, 'foo bar'::TSVECTOR);
603+
604+
# First verify that datums_to_bytes does not support TSVECTOR
605+
statement error pq: illegal argument 0 of type tsvector
606+
SELECT crdb_internal.datums_to_bytes(tsv) FROM tsvector_tbl LIMIT 1;
607+
608+
# TSVECTOR cannot be key-encoded, so the hash precheck will be skipped.
609+
# The JOIN-based check should work fine.
610+
statement ok
611+
INSPECT TABLE tsvector_tbl WITH OPTIONS INDEX (idx_a_tsv);
612+
613+
query TI
614+
SELECT * FROM last_inspect_job;
615+
----
616+
succeeded 1
617+
618+
statement ok
619+
CREATE TABLE tsquery_tbl (
620+
id INT PRIMARY KEY,
621+
a INT,
622+
tsq TSQUERY NOT NULL,
623+
INDEX idx_a_tsq (a) STORING (tsq)
624+
);
625+
626+
statement ok
627+
INSERT INTO tsquery_tbl VALUES
628+
(1, 10, 'search & term'::TSQUERY),
629+
(2, 20, 'another | query'::TSQUERY);
630+
631+
# First verify that datums_to_bytes does not support TSQUERY
632+
statement error pq: illegal argument 0 of type tsquery
633+
SELECT crdb_internal.datums_to_bytes(tsq) FROM tsquery_tbl LIMIT 1;
634+
635+
# TSQUERY cannot be key-encoded, so the hash precheck will be skipped.
636+
# The JOIN-based check should work fine.
637+
statement ok
638+
INSPECT TABLE tsquery_tbl WITH OPTIONS INDEX (idx_a_tsq);
639+
640+
query TI
641+
SELECT * FROM last_inspect_job;
642+
----
643+
succeeded 1
644+
645+
statement ok
646+
CREATE TABLE pgvector_tbl (
647+
id INT PRIMARY KEY,
648+
a INT,
649+
vec VECTOR(3) NOT NULL,
650+
INDEX idx_a_vec (a) STORING (vec)
651+
);
652+
653+
statement ok
654+
INSERT INTO pgvector_tbl VALUES
655+
(1, 10, '[1.0, 2.0, 3.0]'::VECTOR(3)),
656+
(2, 20, '[4.0, 5.0, 6.0]'::VECTOR(3));
657+
658+
# First verify that datums_to_bytes does not support VECTOR
659+
statement error pq: illegal argument 0 of type vector
660+
SELECT crdb_internal.datums_to_bytes(vec) FROM pgvector_tbl LIMIT 1;
661+
662+
# VECTOR cannot be key-encoded, so the hash precheck will be skipped.
663+
# The JOIN-based check should work fine.
664+
statement ok
665+
INSPECT TABLE pgvector_tbl WITH OPTIONS INDEX (idx_a_vec);
666+
667+
query TI
668+
SELECT * FROM last_inspect_job;
669+
----
670+
succeeded 1
671+
672+
# Test with multiple problematic types in the same index.
673+
statement ok
674+
CREATE TABLE multi_type_tbl (
675+
id INT PRIMARY KEY,
676+
a INT,
677+
tsv TSVECTOR,
678+
tsq TSQUERY NOT NULL,
679+
vec VECTOR(2),
680+
INDEX idx_a_multi (a) STORING (tsv, tsq, vec)
681+
);
682+
683+
statement ok
684+
INSERT INTO multi_type_tbl VALUES
685+
(1, 10, 'word1 word2'::TSVECTOR, 'search'::TSQUERY, '[1.0, 2.0]'::VECTOR(2)),
686+
(2, 20, NULL, 'query'::TSQUERY, NULL);
687+
688+
# Verify that datums_to_bytes fails on each type individually.
689+
statement error pq: illegal argument 0 of type tsvector
690+
SELECT crdb_internal.datums_to_bytes('word1 word2'::TSVECTOR);
691+
692+
statement error pq: illegal argument 0 of type tsquery
693+
SELECT crdb_internal.datums_to_bytes('search'::TSQUERY);
694+
695+
statement error pq: illegal argument 0 of type vector
696+
SELECT crdb_internal.datums_to_bytes('[1.0, 2.0]'::VECTOR(2));
697+
698+
# Multiple non-key-encodable types: hash precheck will be skipped.
699+
# The JOIN-based check should work fine.
700+
statement ok
701+
INSPECT TABLE multi_type_tbl WITH OPTIONS INDEX (idx_a_multi);
702+
703+
query TI
704+
SELECT * FROM last_inspect_job;
705+
----
706+
succeeded 1
707+
708+
statement ok
709+
DROP TABLE tsvector_tbl;
710+
711+
statement ok
712+
DROP TABLE tsquery_tbl;
713+
714+
statement ok
715+
DROP TABLE pgvector_tbl;
716+
717+
statement ok
718+
DROP TABLE multi_type_tbl;
719+
720+
subtest end

0 commit comments

Comments
 (0)