|
| 1 | +// Copyright 2025 The Cockroach Authors. |
| 2 | +// |
| 3 | +// Use of this software is governed by the CockroachDB Software License |
| 4 | +// included in the /LICENSE file. |
| 5 | + |
| 6 | +package spanutils |
| 7 | + |
| 8 | +import ( |
| 9 | + "bytes" |
| 10 | + "context" |
| 11 | + "strconv" |
| 12 | + |
| 13 | + "github.com/cockroachdb/cockroach/pkg/keys" |
| 14 | + "github.com/cockroachdb/cockroach/pkg/kv" |
| 15 | + "github.com/cockroachdb/cockroach/pkg/roachpb" |
| 16 | + "github.com/cockroachdb/cockroach/pkg/sql/catalog" |
| 17 | + "github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb" |
| 18 | + "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" |
| 19 | + "github.com/cockroachdb/cockroach/pkg/sql/rowenc" |
| 20 | + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" |
| 21 | + "github.com/cockroachdb/cockroach/pkg/sql/types" |
| 22 | + "github.com/cockroachdb/errors" |
| 23 | +) |
| 24 | + |
| 25 | +// QueryBounds defines a logical span with primary key bounds for SQL filtering. |
| 26 | +type QueryBounds struct { |
| 27 | + // Start represent the lower bounds in the SELECT statement. After each |
| 28 | + // SelectQueryBuilder.Run, the start bounds increase to exclude the rows |
| 29 | + // selected in the previous SelectQueryBuilder.Run. |
| 30 | + // |
| 31 | + // For the first SELECT in a span, the start bounds are inclusive because the |
| 32 | + // start bounds are based on the first row >= Span.Key. That row must be |
| 33 | + // included in the first SELECT. For subsequent SELECTS, the start bounds |
| 34 | + // are exclusive to avoid re-selecting the last row from the previous SELECT. |
| 35 | + Start tree.Datums |
| 36 | + // End represents the upper bounds in the SELECT statement. The end bounds |
| 37 | + // never change between each SelectQueryBuilder.Run. |
| 38 | + // |
| 39 | + // For all SELECTS in a span, the end bounds are inclusive even though a |
| 40 | + // span's end key is exclusive because the end bounds are based on the first |
| 41 | + // row < Span.EndKey. |
| 42 | + End tree.Datums |
| 43 | +} |
| 44 | + |
| 45 | +var ( |
| 46 | + startKeyCompareOps = map[catenumpb.IndexColumn_Direction]string{ |
| 47 | + catenumpb.IndexColumn_ASC: ">", |
| 48 | + catenumpb.IndexColumn_DESC: "<", |
| 49 | + } |
| 50 | + endKeyCompareOps = map[catenumpb.IndexColumn_Direction]string{ |
| 51 | + catenumpb.IndexColumn_ASC: "<", |
| 52 | + catenumpb.IndexColumn_DESC: ">", |
| 53 | + } |
| 54 | +) |
| 55 | + |
| 56 | +// SpanToQueryBounds converts a DistSQL span (roachpb.Span) into SQL-layer |
| 57 | +// primary key bounds (QueryBounds) that can be used to generate WHERE clauses |
| 58 | +// for row-level queries. |
| 59 | +// |
| 60 | +// This function performs a best-effort mapping of the key span to logical |
| 61 | +// row-level start and end conditions by scanning the span to find: |
| 62 | +// - The first row (using a forward scan), which defines the lower bound. |
| 63 | +// - The last row (using a reverse scan), which defines the upper bound. |
| 64 | +// |
| 65 | +// The KV keys for those rows are decoded into primary key datums using the |
| 66 | +// provided codec, column descriptors, and directions. The resulting QueryBounds |
| 67 | +// represents a closed interval [Start, End], inclusive on the start and end, |
| 68 | +// matching SQL query semantics. |
| 69 | +// |
| 70 | +// If no rows are found in the span, the function returns hasRows = false. |
| 71 | +// |
| 72 | +// The caller is expected to use these bounds to construct a SQL WHERE clause that |
| 73 | +// limits a query to just the rows in this span. |
| 74 | +func SpanToQueryBounds( |
| 75 | + ctx context.Context, |
| 76 | + kvDB *kv.DB, |
| 77 | + codec keys.SQLCodec, |
| 78 | + pkColIDs catalog.TableColMap, |
| 79 | + pkColTypes []*types.T, |
| 80 | + pkColDirs []catenumpb.IndexColumn_Direction, |
| 81 | + numFamilies int, |
| 82 | + span roachpb.Span, |
| 83 | + alloc *tree.DatumAlloc, |
| 84 | +) (bounds QueryBounds, hasRows bool, _ error) { |
| 85 | + partialStartKey := span.Key |
| 86 | + partialEndKey := span.EndKey |
| 87 | + startKeyValues, err := kvDB.Scan(ctx, partialStartKey, partialEndKey, int64(numFamilies)) |
| 88 | + if err != nil { |
| 89 | + return bounds, false, errors.Wrapf(err, "scan error startKey=%x endKey=%x", []byte(partialStartKey), []byte(partialEndKey)) |
| 90 | + } |
| 91 | + // If span has 0 rows then return early - it will not be processed. |
| 92 | + if len(startKeyValues) == 0 { |
| 93 | + return bounds, false, nil |
| 94 | + } |
| 95 | + endKeyValues, err := kvDB.ReverseScan(ctx, partialStartKey, partialEndKey, int64(numFamilies)) |
| 96 | + if err != nil { |
| 97 | + return bounds, false, errors.Wrapf(err, "reverse scan error startKey=%x endKey=%x", []byte(partialStartKey), []byte(partialEndKey)) |
| 98 | + } |
| 99 | + // If span has 0 rows then return early - it will not be processed. This is |
| 100 | + // checked again here because the calls to Scan and ReverseScan are |
| 101 | + // non-transactional so the row could have been deleted between the calls. |
| 102 | + if len(endKeyValues) == 0 { |
| 103 | + return bounds, false, nil |
| 104 | + } |
| 105 | + bounds.Start, err = rowenc.DecodeIndexKeyToDatums(codec, pkColIDs, pkColTypes, pkColDirs, startKeyValues, alloc) |
| 106 | + if err != nil { |
| 107 | + return bounds, false, errors.Wrapf(err, "decode startKeyValues error on %+v", startKeyValues) |
| 108 | + } |
| 109 | + bounds.End, err = rowenc.DecodeIndexKeyToDatums(codec, pkColIDs, pkColTypes, pkColDirs, endKeyValues, alloc) |
| 110 | + if err != nil { |
| 111 | + return bounds, false, errors.Wrapf(err, "decode endKeyValues error on %+v", endKeyValues) |
| 112 | + } |
| 113 | + return bounds, true, nil |
| 114 | +} |
| 115 | + |
| 116 | +// GetPKColumnTypes returns tableDesc's primary key column types. |
| 117 | +func GetPKColumnTypes( |
| 118 | + tableDesc catalog.TableDescriptor, indexDesc *descpb.IndexDescriptor, |
| 119 | +) ([]*types.T, error) { |
| 120 | + pkColTypes := make([]*types.T, 0, len(indexDesc.KeyColumnIDs)) |
| 121 | + for i, id := range indexDesc.KeyColumnIDs { |
| 122 | + col, err := catalog.MustFindColumnByID(tableDesc, id) |
| 123 | + if err != nil { |
| 124 | + return nil, errors.Wrapf(err, "column index=%d", i) |
| 125 | + } |
| 126 | + pkColTypes = append(pkColTypes, col.GetType()) |
| 127 | + } |
| 128 | + return pkColTypes, nil |
| 129 | +} |
| 130 | + |
| 131 | +// RenderQueryBounds generates SQL predicates over the primary key columns that |
| 132 | +// restrict rows to fall within a given QueryBounds span. It returns a string |
| 133 | +// fragment suitable for use inside a WHERE clause. |
| 134 | +// |
| 135 | +// The span is expressed as a series of OR-ed row-level comparisons that implement |
| 136 | +// lexicographic filtering, e.g.: |
| 137 | +// |
| 138 | +// (pk1, pk2, pk3) >= ($N, $N+1, $N+2) |
| 139 | +// (pk1, pk2, pk3) <= ($M, $M+1, $M+2) |
| 140 | +// |
| 141 | +// This logic does not use the actual bound values. Instead, it assumes the values |
| 142 | +// will be provided later as SQL placeholders ($N), and constructs the comparison |
| 143 | +// expressions accordingly. Column names are SQL-escaped, and each placeholder is |
| 144 | +// annotated with a fully qualified type cast (e.g., ::INT8). |
| 145 | +// |
| 146 | +// The caller is responsible for assigning the placeholder numbering via |
| 147 | +// endPlaceholderOffset and for providing the actual parameter values in a |
| 148 | +// consistent order (typically: cutoff, end bound datums, start bound datums). |
| 149 | +func RenderQueryBounds( |
| 150 | + pkColNames []string, |
| 151 | + pkColDirs []catenumpb.IndexColumn_Direction, |
| 152 | + pkColTypes []*types.T, |
| 153 | + numStartQueryBounds, numEndQueryBounds int, |
| 154 | + startIncl bool, |
| 155 | + endPlaceholderOffset int, |
| 156 | +) (string, error) { |
| 157 | + if len(pkColNames) != len(pkColDirs) || len(pkColNames) != len(pkColTypes) { |
| 158 | + return "", errors.AssertionFailedf( |
| 159 | + "inconsistent PK metadata: %d names, %d dirs, %d types", |
| 160 | + len(pkColNames), len(pkColDirs), len(pkColTypes)) |
| 161 | + } |
| 162 | + if numStartQueryBounds > len(pkColNames) { |
| 163 | + return "", errors.AssertionFailedf( |
| 164 | + "start bound uses %d columns, but only %d PK columns available", |
| 165 | + numStartQueryBounds, len(pkColNames)) |
| 166 | + } |
| 167 | + if numEndQueryBounds > len(pkColNames) { |
| 168 | + return "", errors.AssertionFailedf( |
| 169 | + "end bound uses %d columns, but only %d PK columns available", |
| 170 | + numEndQueryBounds, len(pkColNames)) |
| 171 | + } |
| 172 | + |
| 173 | + var buf bytes.Buffer |
| 174 | + appendPKRangeORChain( |
| 175 | + &buf, |
| 176 | + pkColNames, |
| 177 | + pkColDirs, |
| 178 | + pkColTypes, |
| 179 | + numStartQueryBounds, |
| 180 | + endPlaceholderOffset+numEndQueryBounds, |
| 181 | + startKeyCompareOps, |
| 182 | + startIncl, |
| 183 | + false, // no AND for the first clause |
| 184 | + ) |
| 185 | + appendPKRangeORChain( |
| 186 | + &buf, |
| 187 | + pkColNames, |
| 188 | + pkColDirs, |
| 189 | + pkColTypes, |
| 190 | + numEndQueryBounds, |
| 191 | + endPlaceholderOffset, |
| 192 | + endKeyCompareOps, |
| 193 | + true, // ending is always inclusive |
| 194 | + numStartQueryBounds > 0, // add AND only if start clause was emitted |
| 195 | + ) |
| 196 | + return buf.String(), nil |
| 197 | +} |
| 198 | + |
| 199 | +// appendPKRangeORChain appends SQL expressions to the buffer that restrict rows |
| 200 | +// using a lexicographic comparison over a prefix of primary key columns. |
| 201 | +// It generates a disjunction of ANDed equality and inequality comparisons, |
| 202 | +// using parameter placeholders and explicit type casts. |
| 203 | +func appendPKRangeORChain( |
| 204 | + buf *bytes.Buffer, |
| 205 | + pkColNames []string, |
| 206 | + pkColDirs []catenumpb.IndexColumn_Direction, |
| 207 | + pkColTypes []*types.T, |
| 208 | + numQueryBounds int, |
| 209 | + placeholderOffset int, |
| 210 | + compareOps map[catenumpb.IndexColumn_Direction]string, |
| 211 | + inclusive bool, |
| 212 | + withAnd bool, |
| 213 | +) { |
| 214 | + if numQueryBounds > 0 { |
| 215 | + if withAnd { |
| 216 | + buf.WriteString("\nAND ") |
| 217 | + } |
| 218 | + buf.WriteString("(") |
| 219 | + for i := 0; i < numQueryBounds; i++ { |
| 220 | + isLast := i == numQueryBounds-1 |
| 221 | + buf.WriteString("\n (") |
| 222 | + for j := 0; j < i; j++ { |
| 223 | + buf.WriteString(pkColNames[j]) |
| 224 | + buf.WriteString(" = $") |
| 225 | + buf.WriteString(strconv.Itoa(j + placeholderOffset)) |
| 226 | + buf.WriteString("::") |
| 227 | + buf.WriteString(pkColTypes[j].SQLStringFullyQualified()) |
| 228 | + buf.WriteString(" AND ") |
| 229 | + } |
| 230 | + buf.WriteString(pkColNames[i]) |
| 231 | + buf.WriteString(" ") |
| 232 | + buf.WriteString(compareOps[pkColDirs[i]]) |
| 233 | + if isLast && inclusive { |
| 234 | + buf.WriteString("=") |
| 235 | + } |
| 236 | + buf.WriteString(" $") |
| 237 | + buf.WriteString(strconv.Itoa(i + placeholderOffset)) |
| 238 | + buf.WriteString("::") |
| 239 | + buf.WriteString(pkColTypes[i].SQLStringFullyQualified()) |
| 240 | + buf.WriteString(")") |
| 241 | + if !isLast { |
| 242 | + buf.WriteString(" OR") |
| 243 | + } |
| 244 | + } |
| 245 | + buf.WriteString("\n)") |
| 246 | + } |
| 247 | +} |
0 commit comments