Skip to content

Commit 056c300

Browse files
Shivs11mgartner
authored andcommitted
sql: replicating JSON empty array ordering found in Postgres
Currently, cockroachdb#97928 and cockroachdb#99275 are responsible for laying out a lexicographical ordering for JSON columns to be forward indexable in nature. This ordering is based on the rules posted by Postgres and is in cockroachdb#99849. However, Postgres currently sorts the empty JSON array before any other JSON values. A Postgres bug report for this has been opened: https://www.postgresql.org/message-id/17873-826fdc8bbcace4f1%40postgresql.org This PR intends on replicating the Postgres behavior. Fixes cockroachdb#105668 Epic: CRDB-24501 Release note: None
1 parent 1382b26 commit 056c300

File tree

9 files changed

+133
-50
lines changed

9 files changed

+133
-50
lines changed

docs/tech-notes/jsonb_forward_indexing.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,18 @@ The following rules were kept in mind while designing this form of encoding, as
4444
5. Objects with an equal number of key value pairs are compared in the order:
4545
`key1`, `value1`, `key2`, `value2`, ….
4646

47+
**NOTE:** There is one exception to these rules, which is neither documented by
48+
Postgres, nor mentioned in the source code: empty arrays are the minimum JSON
49+
value. As far as we can tell, this is a Postgres bug that has existed for some
50+
time. We've decided to replicate this behavior to remain consistent with
51+
Postgres. We've filed a [Postgres bug report](https://www.postgresql.org/message-id/17873-826fdc8bbcace4f1%40postgresql.org)
52+
to track the issue.
53+
4754
In order to satisfy property 1 at all times, tags are defined in an increasing order of bytes.
4855
These tags will also have to be defined in a way where the tag representing an object is a large byte representation
4956
for a hexadecimal value (such as 0xff) and the subsequent objects have a value 1 less than the previous one,
50-
where the ordering is described in point 1 above.
57+
where the ordering is described in point 1 above. There is a special tag for empty JSON arrays
58+
in order to handle the special case of empty arrays being ordered before all other JSON values.
5159

5260
Additionally, tags representing terminators will also be defined. There will be two terminators, one for the ascending designation and the other for the descending one, and will be required to denote the end of a key encoding of the following JSON values: Objects, Arrays, Number and Strings. JSON Boolean and JSON Null are not required to have the terminator since they do not have variable length encoding due to the presence of a single tag (as explained later in this document).
5361

pkg/sql/logictest/testdata/logic_test/json_index

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ INSERT INTO t VALUES
2020
query T
2121
SELECT x FROM t ORDER BY x
2222
----
23+
[]
2324
"a"
2425
"aa"
2526
"abcdefghi"
2627
"b"
2728
1
2829
100
29-
[]
3030
{"a": "b"}
3131

3232

@@ -38,13 +38,13 @@ INSERT INTO t VALUES
3838
query T
3939
SELECT x FROM t@t_pkey ORDER BY x
4040
----
41+
[]
4142
"a"
4243
"aa"
4344
"abcdefghi"
4445
"b"
4546
1
4647
100
47-
[]
4848
{"a": "b"}
4949

5050
# Use the index for point lookups.
@@ -77,12 +77,12 @@ query T
7777
SELECT x FROM t@t_pkey WHERE x > '1' ORDER BY x
7878
----
7979
100
80-
[]
8180
{"a": "b"}
8281

8382
query T
8483
SELECT x FROM t@t_pkey WHERE x < '1' ORDER BY x
8584
----
85+
[]
8686
"a"
8787
"aa"
8888
"abcdefghi"
@@ -92,12 +92,12 @@ SELECT x FROM t@t_pkey WHERE x < '1' ORDER BY x
9292
query T
9393
SELECT x FROM t@t_pkey WHERE x > '1' OR x < '1' ORDER BY x
9494
----
95+
[]
9596
"a"
9697
"aa"
9798
"abcdefghi"
9899
"b"
99100
100
100-
[]
101101
{"a": "b"}
102102

103103
query T
@@ -109,12 +109,12 @@ query T
109109
SELECT x FROM t@t_pkey WHERE x > '1' OR x < '1' ORDER BY x DESC
110110
----
111111
{"a": "b"}
112-
[]
113112
100
114113
"b"
115114
"abcdefghi"
116115
"aa"
117116
"a"
117+
[]
118118

119119
# Adding more primitive JSON values.
120120
statement ok
@@ -129,6 +129,7 @@ INSERT INTO t VALUES
129129
query T
130130
SELECT x FROM t@t_pkey ORDER BY x
131131
----
132+
[]
132133
null
133134
"Testing Punctuation?!."
134135
"a"
@@ -141,18 +142,17 @@ null
141142
100
142143
false
143144
true
144-
[]
145145
{"a": "b"}
146146

147147
query T
148148
SELECT x FROM t@t_pkey WHERE x > 'true' ORDER BY x
149149
----
150-
[]
151150
{"a": "b"}
152151

153152
query T
154153
SELECT x FROM t@t_pkey WHERE x < 'false' ORDER BY x
155154
----
155+
[]
156156
null
157157
"Testing Punctuation?!."
158158
"a"
@@ -330,12 +330,12 @@ query T
330330
SELECT x FROM t@t_pkey ORDER BY x
331331
----
332332
NULL
333+
[]
333334
null
334335
"crdb"
335336
1
336337
false
337338
true
338-
[]
339339
[1, 2, 3]
340340
{}
341341
{"a": "b", "c": "d"}
@@ -346,24 +346,24 @@ SELECT x FROM t@t_pkey ORDER BY x DESC
346346
{"a": "b", "c": "d"}
347347
{}
348348
[1, 2, 3]
349-
[]
350349
true
351350
false
352351
1
353352
"crdb"
354353
null
354+
[]
355355
NULL
356356

357357
# Test to show JSON Null is different from NULL.
358358
query T
359359
SELECT x FROM t@t_pkey WHERE x IS NOT NULL ORDER BY x
360360
----
361+
[]
361362
null
362363
"crdb"
363364
1
364365
false
365366
true
366-
[]
367367
[1, 2, 3]
368368
{}
369369
{"a": "b", "c": "d"}
@@ -446,12 +446,12 @@ INSERT INTO t VALUES
446446
query T
447447
SELECT x FROM t@i ORDER BY x;
448448
----
449+
[]
449450
null
450451
"crdb"
451452
1
452453
false
453454
true
454-
[]
455455
[null]
456456
[1]
457457
[{"a": "b"}]

pkg/sql/opt/exec/execbuilder/testdata/json

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ vectorized: true
205205
• scan
206206
missing stats
207207
table: t@t_pkey
208-
spans: [/'null' - /'null'] [/'""' - /'""'] [/'[]' - /'[]'] [/'{}' - /'{}']
208+
spans: [/'[]' - /'[]'] [/'null' - /'null'] [/'""' - /'""'] [/'{}' - /'{}']
209209

210210
# Multicolumn index, including JSONB
211211

@@ -252,20 +252,20 @@ INSERT INTO composite VALUES (1, '1.00'::JSONB), (2, '1'::JSONB), (3, '2'::JSONB
252252
(4, '3.0'::JSONB), (5, '"a"'::JSONB)
253253
----
254254
CPut /Table/108/1/1/0 -> /TUPLE/
255-
InitPut /Table/108/2/"G*\x02\x00\x00\x89\x88" -> /BYTES/0x2f0f0c200000002000000403348964
255+
InitPut /Table/108/2/"H*\x02\x00\x00\x89\x88" -> /BYTES/0x2f0f0c200000002000000403348964
256256
CPut /Table/108/1/2/0 -> /TUPLE/
257-
InitPut /Table/108/2/"G*\x02\x00\x00\x8a\x88" -> /BYTES/
257+
InitPut /Table/108/2/"H*\x02\x00\x00\x8a\x88" -> /BYTES/
258258
CPut /Table/108/1/3/0 -> /TUPLE/
259-
InitPut /Table/108/2/"G*\x04\x00\x00\x8b\x88" -> /BYTES/
259+
InitPut /Table/108/2/"H*\x04\x00\x00\x8b\x88" -> /BYTES/
260260
CPut /Table/108/1/4/0 -> /TUPLE/
261-
InitPut /Table/108/2/"G*\x06\x00\x00\x8c\x88" -> /BYTES/0x2f0f0c20000000200000040334891e
261+
InitPut /Table/108/2/"H*\x06\x00\x00\x8c\x88" -> /BYTES/0x2f0f0c20000000200000040334891e
262262
CPut /Table/108/1/5/0 -> /TUPLE/
263-
InitPut /Table/108/2/"F\x12a\x00\x01\x00\x8d\x88" -> /BYTES/
263+
InitPut /Table/108/2/"G\x12a\x00\x01\x00\x8d\x88" -> /BYTES/
264264

265265
query T kvtrace
266266
SELECT j FROM composite where j = '1.00'::JSONB
267267
----
268-
Scan /Table/108/2/"G*\x02\x00\x0{0"-1"}
268+
Scan /Table/108/2/"H*\x02\x00\x0{0"-1"}
269269

270270
query T
271271
SELECT j FROM composite ORDER BY j;

pkg/sql/rowenc/keyside/json.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func decodeJSONKey(buf []byte, dir encoding.Direction) (json.JSON, []byte, error
7979
}
8080
buf = buf[1:] // removing the terminator
8181
jsonVal = json.FromDecimal(dec)
82-
case encoding.JSONArray, encoding.JSONArrayDesc:
82+
case encoding.JSONArray, encoding.JSONArrayDesc, encoding.JsonEmptyArray, encoding.JsonEmptyArrayDesc:
8383
jsonVal, buf, err = decodeJSONArray(buf, dir)
8484
if err != nil {
8585
return nil, nil, errors.NewAssertionErrorWithWrappedErrf(err, "could not decode JSON Array")

pkg/util/encoding/encoding.go

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,18 @@ const (
107107

108108
// Defining different key markers, for the ascending designation,
109109
// for handling different JSON values.
110-
jsonNullKeyMarker = voidMarker + 1
111-
jsonStringKeyMarker = jsonNullKeyMarker + 1
112-
jsonNumberKeyMarker = jsonStringKeyMarker + 1
113-
jsonFalseKeyMarker = jsonNumberKeyMarker + 1
114-
jsonTrueKeyMarker = jsonFalseKeyMarker + 1
115-
jsonArrayKeyMarker = jsonTrueKeyMarker + 1
116-
jsonObjectKeyMarker = jsonArrayKeyMarker + 1
110+
111+
// Postgres currently has a special case (maybe a bug) where the empty JSON
112+
// Array sorts before all other JSON values. See the bug report:
113+
// https://www.postgresql.org/message-id/17873-826fdc8bbcace4f1%40postgresql.org
114+
jsonEmptyArrayKeyMarker = voidMarker + 1
115+
jsonNullKeyMarker = jsonEmptyArrayKeyMarker + 1
116+
jsonStringKeyMarker = jsonNullKeyMarker + 1
117+
jsonNumberKeyMarker = jsonStringKeyMarker + 1
118+
jsonFalseKeyMarker = jsonNumberKeyMarker + 1
119+
jsonTrueKeyMarker = jsonFalseKeyMarker + 1
120+
jsonArrayKeyMarker = jsonTrueKeyMarker + 1
121+
jsonObjectKeyMarker = jsonArrayKeyMarker + 1
117122

118123
arrayKeyTerminator byte = 0x00
119124
arrayKeyDescendingTerminator byte = 0xFF
@@ -127,13 +132,14 @@ const (
127132

128133
// Defining different key markers, for the descending designation,
129134
// for handling different JSON values.
130-
jsonNullKeyDescendingMarker = jsonObjectKeyMarker + 7
131-
jsonStringKeyDescendingMarker = jsonNullKeyDescendingMarker - 1
132-
jsonNumberKeyDescendingMarker = jsonStringKeyDescendingMarker - 1
133-
jsonFalseKeyDescendingMarker = jsonNumberKeyDescendingMarker - 1
134-
jsonTrueKeyDescendingMarker = jsonFalseKeyDescendingMarker - 1
135-
jsonArrayKeyDescendingMarker = jsonTrueKeyDescendingMarker - 1
136-
jsonObjectKeyDescendingMarker = jsonArrayKeyDescendingMarker - 1
135+
jsonEmptyArrayKeyDescendingMarker = jsonObjectKeyMarker + 8
136+
jsonNullKeyDescendingMarker = jsonEmptyArrayKeyDescendingMarker - 1
137+
jsonStringKeyDescendingMarker = jsonNullKeyDescendingMarker - 1
138+
jsonNumberKeyDescendingMarker = jsonStringKeyDescendingMarker - 1
139+
jsonFalseKeyDescendingMarker = jsonNumberKeyDescendingMarker - 1
140+
jsonTrueKeyDescendingMarker = jsonFalseKeyDescendingMarker - 1
141+
jsonArrayKeyDescendingMarker = jsonTrueKeyDescendingMarker - 1
142+
jsonObjectKeyDescendingMarker = jsonArrayKeyDescendingMarker - 1
137143

138144
// Terminators for JSON Key encoding.
139145
jsonKeyTerminator byte = 0x00
@@ -1789,6 +1795,9 @@ const (
17891795
JSONArrayDesc Type = 39
17901796
JSONObject Type = 40
17911797
JSONObjectDesc Type = 41
1798+
// Special case
1799+
JsonEmptyArray Type = 42
1800+
JsonEmptyArrayDesc Type = 43
17921801
)
17931802

17941803
// typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one
@@ -1849,6 +1858,10 @@ func slowPeekType(b []byte) Type {
18491858
return JSONArray
18501859
case m == jsonArrayKeyDescendingMarker:
18511860
return JSONArrayDesc
1861+
case m == jsonEmptyArrayKeyMarker:
1862+
return JsonEmptyArray
1863+
case m == jsonEmptyArrayKeyDescendingMarker:
1864+
return JsonEmptyArrayDesc
18521865
case m == jsonObjectKeyMarker:
18531866
return JSONObject
18541867
case m == jsonObjectKeyDescendingMarker:
@@ -2009,10 +2022,12 @@ func PeekLength(b []byte) (int, error) {
20092022
length, err := getArrayOrJSONLength(b[1:], dir, IsJSONKeyDone)
20102023
return 1 + length, err
20112024
case jsonArrayKeyMarker, jsonArrayKeyDescendingMarker,
2012-
jsonObjectKeyMarker, jsonObjectKeyDescendingMarker:
2025+
jsonObjectKeyMarker, jsonObjectKeyDescendingMarker,
2026+
jsonEmptyArrayKeyMarker, jsonEmptyArrayKeyDescendingMarker:
20132027
dir := Ascending
20142028
if (m == jsonArrayKeyDescendingMarker) ||
2015-
(m == jsonObjectKeyDescendingMarker) {
2029+
(m == jsonObjectKeyDescendingMarker) ||
2030+
(m == jsonEmptyArrayKeyDescendingMarker) {
20162031
dir = Descending
20172032
}
20182033
// removing the starter tag
@@ -3500,11 +3515,17 @@ func EncodeJSONTrueKeyMarker(buf []byte, dir Direction) []byte {
35003515

35013516
// EncodeJSONArrayKeyMarker adds a JSON Array key encoding marker
35023517
// to buf and returns the new buffer.
3503-
func EncodeJSONArrayKeyMarker(buf []byte, dir Direction) []byte {
3518+
func EncodeJSONArrayKeyMarker(buf []byte, dir Direction, arrayLength int64) []byte {
35043519
switch dir {
35053520
case Ascending:
3521+
if arrayLength == 0 {
3522+
return append(buf, jsonEmptyArrayKeyMarker)
3523+
}
35063524
return append(buf, jsonArrayKeyMarker)
35073525
case Descending:
3526+
if arrayLength == 0 {
3527+
return append(buf, jsonEmptyArrayKeyDescendingMarker)
3528+
}
35083529
return append(buf, jsonArrayKeyDescendingMarker)
35093530
default:
35103531
panic("invalid direction")
@@ -3621,15 +3642,15 @@ func ValidateAndConsumeJSONKeyMarker(buf []byte, dir Direction) ([]byte, Type, e
36213642
case Descending:
36223643
switch typ {
36233644
case JSONNullDesc, JSONNumberDesc, JSONStringDesc, JSONFalseDesc,
3624-
JSONTrueDesc, JSONArrayDesc, JSONObjectDesc:
3645+
JSONTrueDesc, JSONArrayDesc, JSONObjectDesc, JsonEmptyArrayDesc:
36253646
return buf[1:], typ, nil
36263647
default:
36273648
return nil, Unknown, errors.Newf("invalid type found %s", typ)
36283649
}
36293650
case Ascending:
36303651
switch typ {
36313652
case JSONNull, JSONNumber, JSONString, JSONFalse, JSONTrue, JSONArray,
3632-
JSONObject:
3653+
JSONObject, JsonEmptyArray:
36333654
return buf[1:], typ, nil
36343655
default:
36353656
return nil, Unknown, errors.Newf("invalid type found %s", typ)

pkg/util/encoding/type_string.go

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/util/json/encoded.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,10 +606,20 @@ func (j *jsonEncoded) AreKeysSorted() bool {
606606
return decoded.AreKeysSorted()
607607
}
608608

609-
func (j *jsonEncoded) Compare(other JSON) (int, error) {
609+
func (j *jsonEncoded) Compare(other JSON) (_ int, err error) {
610610
if other == nil {
611611
return -1, nil
612612
}
613+
// We must first check for the special case of empty arrays, which are the
614+
// minimum JSON value.
615+
switch {
616+
case isEmptyArray(j) && isEmptyArray(other):
617+
return 0, nil
618+
case isEmptyArray(j):
619+
return -1, nil
620+
case isEmptyArray(other):
621+
return 1, nil
622+
}
613623
if cmp := cmpJSONTypes(j.Type(), other.Type()); cmp != 0 {
614624
return cmp, nil
615625
}

0 commit comments

Comments
 (0)