Skip to content

Commit c9bb3e6

Browse files
committed
sql/inspect: skip hash precheck for non-key-encodable types
The recent hash precheck optimization for INSPECT (added to speed up index consistency checks) relies on crdb_internal.datums_to_bytes to compute row hashes. However, this builtin uses keyside.Encode internally, which doesn't support all column types. When INSPECT encountered tables with TSVECTOR, TSQUERY, or VECTOR (PGVector) columns stored in secondary indexes, the hash precheck would fail with "illegal argument N of type <typename>". This caused INSPECT to fail entirely on these tables. This commit adds a check to skip the hash precheck if there are any types that cannot be encoded. Fixes #156175 Epic: CRDB-55075 Release note: none
1 parent 7a2d18f commit c9bb3e6

File tree

2 files changed

+174
-13
lines changed

2 files changed

+174
-13
lines changed

pkg/sql/inspect/index_consistency_check.go

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/cockroachdb/cockroach/pkg/settings"
1818
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
1919
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
20+
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
2021
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
2122
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
2223
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
@@ -247,20 +248,30 @@ func (c *indexConsistencyCheck) Start(
247248
}
248249

249250
if indexConsistencyHashEnabled.Get(&c.flowCtx.Cfg.Settings.SV) && len(allColNames) > 0 {
250-
match, hashErr := c.hashesMatch(ctx, allColNames, predicate, queryArgs)
251-
if hashErr != nil {
252-
// If hashing fails, we usually fall back to the full check. But if the
253-
// error stems from query construction, that's an internal bug and shouldn't
254-
// be ignored.
255-
if isQueryConstructionError(hashErr) {
256-
return errors.WithAssertionFailure(hashErr)
251+
// The hash precheck uses crdb_internal.datums_to_bytes, which depends on
252+
// keyside.Encode. Skip if any column type isn’t encodable (i.e. TSQUERY, etc.).
253+
if !allColumnsDatumsToBytesCompatible(c.columns) {
254+
log.Dev.Infof(ctx, "skipping hash precheck for index %s: column type not compatible with datums_to_bytes",
255+
c.secIndex.GetName())
256+
} else {
257+
match, hashErr := c.hashesMatch(ctx, allColNames, predicate, queryArgs)
258+
if hashErr != nil {
259+
if isQueryConstructionError(hashErr) {
260+
// If hashing fails and the error stems from query construction,
261+
// that's an internal bug and shouldn't be ignored.
262+
return errors.WithAssertionFailure(hashErr)
263+
}
264+
// For all other hash errors, log and fall back.
265+
log.Dev.Infof(ctx, "hash precheck failed; falling back to full check: %v", hashErr)
257266
}
258-
log.Dev.Infof(ctx, "hash precheck for index consistency did not match; falling back to full check: %v", hashErr)
259-
}
260-
if match {
261-
// Hashes match, no corruption detected - skip the full check.
262-
c.state = checkHashMatched
263-
return nil
267+
if match {
268+
// Hashes match, no corruption detected - skip the full check.
269+
c.state = checkHashMatched
270+
return nil
271+
}
272+
// Hashes don't match - corruption detected. Fall back to full check.
273+
log.Dev.Infof(ctx, "hash precheck detected mismatch for index %s; proceeding with full check",
274+
c.secIndex.GetName())
264275
}
265276
}
266277

@@ -937,3 +948,20 @@ func formatPlaceholders(placeholders []interface{}) []string {
937948
}
938949
return result
939950
}
951+
952+
// allColumnsDatumsToBytesCompatible reports whether all columns can be
953+
// passed to crdb_internal.datums_to_bytes. Returns false if any column
954+
// has a type that cannot be key-encoded using keyside.Encode, which
955+
// datums_to_bytes relies on internally.
956+
//
957+
// REFCURSOR is technically supported by datums_to_bytes, but we still use
958+
// ColumnTypeIsIndexable to avoid duplicating type checks. REFCURSOR is
959+
// uncommon, so this trade-off keeps the code simpler.
960+
func allColumnsDatumsToBytesCompatible(columns []catalog.Column) bool {
961+
for _, col := range columns {
962+
if !colinfo.ColumnTypeIsIndexable(col.GetType()) {
963+
return false
964+
}
965+
}
966+
return true
967+
}

pkg/sql/logictest/testdata/logic_test/inspect

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,3 +585,136 @@ statement ok
585585
DROP TABLE t_aost;
586586

587587
subtest end
588+
589+
subtest inspect_non_key_encodable_types
590+
591+
statement ok
592+
CREATE TABLE tsvector_tbl (
593+
id INT PRIMARY KEY,
594+
a INT,
595+
tsv TSVECTOR NOT NULL,
596+
INDEX idx_a_tsv (a) STORING (tsv)
597+
);
598+
599+
statement ok
600+
INSERT INTO tsvector_tbl VALUES
601+
(1, 10, 'hello world'::TSVECTOR),
602+
(2, 20, 'foo bar'::TSVECTOR);
603+
604+
# First verify that datums_to_bytes does not support TSVECTOR
605+
statement error pq: illegal argument 0 of type tsvector
606+
SELECT crdb_internal.datums_to_bytes(tsv) FROM tsvector_tbl LIMIT 1;
607+
608+
# TSVECTOR cannot be key-encoded, so the hash precheck will be skipped.
609+
# The JOIN-based check should work fine.
610+
statement ok
611+
INSPECT TABLE tsvector_tbl WITH OPTIONS INDEX (idx_a_tsv);
612+
613+
query TI
614+
SELECT * FROM last_inspect_job;
615+
----
616+
succeeded 1
617+
618+
statement ok
619+
CREATE TABLE tsquery_tbl (
620+
id INT PRIMARY KEY,
621+
a INT,
622+
tsq TSQUERY NOT NULL,
623+
INDEX idx_a_tsq (a) STORING (tsq)
624+
);
625+
626+
statement ok
627+
INSERT INTO tsquery_tbl VALUES
628+
(1, 10, 'search & term'::TSQUERY),
629+
(2, 20, 'another | query'::TSQUERY);
630+
631+
# First verify that datums_to_bytes does not support TSQUERY
632+
statement error pq: illegal argument 0 of type tsquery
633+
SELECT crdb_internal.datums_to_bytes(tsq) FROM tsquery_tbl LIMIT 1;
634+
635+
# TSQUERY cannot be key-encoded, so the hash precheck will be skipped.
636+
# The JOIN-based check should work fine.
637+
statement ok
638+
INSPECT TABLE tsquery_tbl WITH OPTIONS INDEX (idx_a_tsq);
639+
640+
query TI
641+
SELECT * FROM last_inspect_job;
642+
----
643+
succeeded 1
644+
645+
statement ok
646+
CREATE TABLE pgvector_tbl (
647+
id INT PRIMARY KEY,
648+
a INT,
649+
vec VECTOR(3) NOT NULL,
650+
INDEX idx_a_vec (a) STORING (vec)
651+
);
652+
653+
statement ok
654+
INSERT INTO pgvector_tbl VALUES
655+
(1, 10, '[1.0, 2.0, 3.0]'::VECTOR(3)),
656+
(2, 20, '[4.0, 5.0, 6.0]'::VECTOR(3));
657+
658+
# First verify that datums_to_bytes does not support VECTOR
659+
statement error pq: illegal argument 0 of type vector
660+
SELECT crdb_internal.datums_to_bytes(vec) FROM pgvector_tbl LIMIT 1;
661+
662+
# VECTOR cannot be key-encoded, so the hash precheck will be skipped.
663+
# The JOIN-based check should work fine.
664+
statement ok
665+
INSPECT TABLE pgvector_tbl WITH OPTIONS INDEX (idx_a_vec);
666+
667+
query TI
668+
SELECT * FROM last_inspect_job;
669+
----
670+
succeeded 1
671+
672+
# Test with multiple problematic types in the same index.
673+
statement ok
674+
CREATE TABLE multi_type_tbl (
675+
id INT PRIMARY KEY,
676+
a INT,
677+
tsv TSVECTOR,
678+
tsq TSQUERY NOT NULL,
679+
vec VECTOR(2),
680+
INDEX idx_a_multi (a) STORING (tsv, tsq, vec)
681+
);
682+
683+
statement ok
684+
INSERT INTO multi_type_tbl VALUES
685+
(1, 10, 'word1 word2'::TSVECTOR, 'search'::TSQUERY, '[1.0, 2.0]'::VECTOR(2)),
686+
(2, 20, NULL, 'query'::TSQUERY, NULL);
687+
688+
# Verify that datums_to_bytes fails on each type individually.
689+
statement error pq: illegal argument 0 of type tsvector
690+
SELECT crdb_internal.datums_to_bytes('word1 word2'::TSVECTOR);
691+
692+
statement error pq: illegal argument 0 of type tsquery
693+
SELECT crdb_internal.datums_to_bytes('search'::TSQUERY);
694+
695+
statement error pq: illegal argument 0 of type vector
696+
SELECT crdb_internal.datums_to_bytes('[1.0, 2.0]'::VECTOR(2));
697+
698+
# Multiple non-key-encodable types: hash precheck will be skipped.
699+
# The JOIN-based check should work fine.
700+
statement ok
701+
INSPECT TABLE multi_type_tbl WITH OPTIONS INDEX (idx_a_multi);
702+
703+
query TI
704+
SELECT * FROM last_inspect_job;
705+
----
706+
succeeded 1
707+
708+
statement ok
709+
DROP TABLE tsvector_tbl;
710+
711+
statement ok
712+
DROP TABLE tsquery_tbl;
713+
714+
statement ok
715+
DROP TABLE pgvector_tbl;
716+
717+
statement ok
718+
DROP TABLE multi_type_tbl;
719+
720+
subtest end

0 commit comments

Comments
 (0)