Skip to content

Commit 0c74fb7

Browse files
craig[bot]normanchennjeffswenson
committed
143668: jsonpath: add support for `() is unknown` r=normanchenn a=normanchenn #### jsonpath: extract boolean operations into `evalBoolean` This commit moves the switch statement evaluation of boolean returning operators into a separate `evalBoolean`. This allows for other operators that expect to return a boolean (logical AND/OR/NOT, filters) to do so without needing to do `isBool` checks after an `eval` call. Epic: None Release note: None #### jsonpath: add support for `() is unknown` This commit adds support for using `() is unknown` within jsonpath queries. This returns a boolean - whether or not the enclosed predicate returned unknown. Epic: None Release note (sql change): Add support for `() is unknown` in JSONPath queries. For example, `SELECT jsonb_path_query('{}', '($ < 1) is unknown');`. 143776: logical: use sql writer for immediate mode r=jeffswenson a=jeffswenson This changes the default immediate mode writer to the sql writer. The logical_replication.consumer.immediate_mode_writer setting may be used to switch back to the 'legacy-kv' writer. The SQL writer will now always use batching. Previously, batching was disabled if the table did not include any indexes, but that seems to be a hold over from when the kv writer used batching. Release note: none Part of: #143754 Co-authored-by: Norman Chen <[email protected]> Co-authored-by: Jeff Swenson <[email protected]>
3 parents eabcae0 + 8b4afd2 + de0cd4c commit 0c74fb7

File tree

12 files changed

+243
-116
lines changed

12 files changed

+243
-116
lines changed

pkg/crosscluster/logical/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ go_test(
135135
"//pkg/ccl/changefeedccl/cdctest",
136136
"//pkg/ccl/changefeedccl/changefeedbase",
137137
"//pkg/ccl/storageccl",
138+
"//pkg/clusterversion",
138139
"//pkg/crosscluster/replicationtestutils",
139140
"//pkg/crosscluster/streamclient",
140141
"//pkg/crosscluster/streamclient/randclient",
@@ -149,6 +150,7 @@ go_test(
149150
"//pkg/security/securitytest",
150151
"//pkg/security/username",
151152
"//pkg/server",
153+
"//pkg/settings/cluster",
152154
"//pkg/sql",
153155
"//pkg/sql/catalog",
154156
"//pkg/sql/catalog/catpb",

pkg/crosscluster/logical/logical_replication_job_test.go

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/cockroachdb/cockroach-go/v2/crdb"
2222
"github.com/cockroachdb/cockroach/pkg/base"
2323
"github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/cdcevent"
24+
"github.com/cockroachdb/cockroach/pkg/clusterversion"
2425
"github.com/cockroachdb/cockroach/pkg/crosscluster/replicationtestutils"
2526
"github.com/cockroachdb/cockroach/pkg/crosscluster/streamclient"
2627
_ "github.com/cockroachdb/cockroach/pkg/crosscluster/streamclient/randclient"
@@ -31,6 +32,7 @@ import (
3132
"github.com/cockroachdb/cockroach/pkg/repstream/streampb"
3233
"github.com/cockroachdb/cockroach/pkg/roachpb"
3334
"github.com/cockroachdb/cockroach/pkg/security/username"
35+
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
3436
"github.com/cockroachdb/cockroach/pkg/sql"
3537
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
3638
"github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils"
@@ -1689,7 +1691,8 @@ func GetReverseJobID(
16891691
}
16901692

16911693
type mockBatchHandler struct {
1692-
err error
1694+
err error
1695+
batchSize int
16931696
}
16941697

16951698
var _ BatchHandler = &mockBatchHandler{}
@@ -1707,6 +1710,7 @@ func (m mockBatchHandler) SetSyntheticFailurePercent(_ uint32) {}
17071710
func (m mockBatchHandler) Close(context.Context) {}
17081711
func (m mockBatchHandler) ReportMutations(_ *stats.Refresher) {}
17091712
func (m mockBatchHandler) ReleaseLeases(_ context.Context) {}
1713+
func (m mockBatchHandler) BatchSize() int { return m.batchSize }
17101714

17111715
type mockDLQ int
17121716

@@ -1734,16 +1738,19 @@ func TestFlushErrorHandling(t *testing.T) {
17341738
ctx := context.Background()
17351739
dlq := mockDLQ(0)
17361740
lrw := &logicalReplicationWriterProcessor{
1737-
metrics: MakeMetrics(0).(*Metrics),
1738-
getBatchSize: func() int { return 1 },
1739-
dlqClient: &dlq,
1741+
metrics: MakeMetrics(0).(*Metrics),
1742+
dlqClient: &dlq,
17401743
}
17411744
lrw.purgatory.flush = lrw.flushBuffer
17421745
lrw.purgatory.bytesGauge = lrw.metrics.RetryQueueBytes
17431746
lrw.purgatory.eventsGauge = lrw.metrics.RetryQueueEvents
17441747
lrw.purgatory.debug = &streampb.DebugLogicalConsumerStatus{}
17451748

1746-
lrw.bh = []BatchHandler{(mockBatchHandler{pgerror.New(pgcode.UniqueViolation, "index write conflict")})}
1749+
lrw.bh = []BatchHandler{(mockBatchHandler{
1750+
err: pgerror.New(pgcode.UniqueViolation, "index write conflict"),
1751+
batchSize: 1,
1752+
})}
1753+
17471754
lrw.bhStats = make([]flushStats, 1)
17481755

17491756
lrw.purgatory.byteLimit = func() int64 { return 1 }
@@ -2693,3 +2700,43 @@ func TestFailDestAfterSourceFailure(t *testing.T) {
26932700
dbB.Exec(t, "RESUME JOB $1", jobBID)
26942701
jobutils.WaitForJobToFail(t, dbB, jobBID)
26952702
}
2703+
2704+
func TestGetWriterType(t *testing.T) {
2705+
defer leaktest.AfterTest(t)()
2706+
ctx := context.Background()
2707+
2708+
t.Run("validated-mode", func(t *testing.T) {
2709+
st := cluster.MakeTestingClusterSettings()
2710+
wt, err := getWriterType(ctx, jobspb.LogicalReplicationDetails_Validated, st)
2711+
require.NoError(t, err)
2712+
require.Equal(t, writerTypeSQL, wt)
2713+
})
2714+
2715+
t.Run("immediate-mode-pre-25.2", func(t *testing.T) {
2716+
st := cluster.MakeTestingClusterSettingsWithVersions(
2717+
clusterversion.V25_1.Version(),
2718+
clusterversion.V25_1.Version(),
2719+
true, /* initializeVersion */
2720+
)
2721+
wt, err := getWriterType(ctx, jobspb.LogicalReplicationDetails_Immediate, st)
2722+
require.NoError(t, err)
2723+
require.Equal(t, writerTypeSQL, wt)
2724+
})
2725+
2726+
t.Run("immediate-mode-post-25.2", func(t *testing.T) {
2727+
st := cluster.MakeTestingClusterSettingsWithVersions(
2728+
clusterversion.V25_2.Version(),
2729+
clusterversion.PreviousRelease.Version(),
2730+
true, /* initializeVersion */
2731+
)
2732+
immediateModeWriter.Override(ctx, &st.SV, string(writerTypeSQL))
2733+
wt, err := getWriterType(ctx, jobspb.LogicalReplicationDetails_Immediate, st)
2734+
require.NoError(t, err)
2735+
require.Equal(t, writerTypeSQL, wt)
2736+
2737+
immediateModeWriter.Override(ctx, &st.SV, string(writerTypeLegacyKV))
2738+
wt, err = getWriterType(ctx, jobspb.LogicalReplicationDetails_Immediate, st)
2739+
require.NoError(t, err)
2740+
require.Equal(t, writerTypeLegacyKV, wt)
2741+
})
2742+
}

pkg/crosscluster/logical/logical_replication_writer_processor.go

Lines changed: 62 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ import (
1616
"time"
1717

1818
"github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/cdcevent"
19+
"github.com/cockroachdb/cockroach/pkg/clusterversion"
1920
"github.com/cockroachdb/cockroach/pkg/crosscluster"
2021
"github.com/cockroachdb/cockroach/pkg/crosscluster/streamclient"
2122
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
2223
"github.com/cockroachdb/cockroach/pkg/keys"
2324
"github.com/cockroachdb/cockroach/pkg/repstream/streampb"
2425
"github.com/cockroachdb/cockroach/pkg/roachpb"
2526
"github.com/cockroachdb/cockroach/pkg/settings"
27+
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
2628
"github.com/cockroachdb/cockroach/pkg/sql"
2729
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
2830
"github.com/cockroachdb/cockroach/pkg/sql/catalog/lease"
@@ -93,6 +95,30 @@ var maxChunkSize = settings.RegisterIntSetting(
9395
settings.NonNegativeInt,
9496
)
9597

98+
type writerType string
99+
100+
const (
101+
// writerTypeSQL uses the SQL layer to write replicated rows.
102+
writerTypeSQL writerType = "sql"
103+
// writerTypeLegacyKV uses the legacy KV layer to write rows. The KV writer
104+
// is deprecated because it does not support the full set of features of the
105+
// SQL writer.
106+
writerTypeLegacyKV writerType = "legacy-kv"
107+
)
108+
109+
var immediateModeWriter = settings.RegisterStringSetting(
110+
settings.ApplicationLevel,
111+
"logical_replication.consumer.immediate_mode_writer",
112+
"the writer to use when in immediate mode",
113+
metamorphic.ConstantWithTestChoice("logical_replication.consumer.immediate_mode_writer", string(writerTypeSQL), string(writerTypeLegacyKV)),
114+
settings.WithValidateString(func(sv *settings.Values, val string) error {
115+
if val != string(writerTypeSQL) && val != string(writerTypeLegacyKV) {
116+
return errors.Newf("immediate mode writer must be either 'sql' or 'legacy-kv', got '%s'", val)
117+
}
118+
return nil
119+
}),
120+
)
121+
96122
// logicalReplicationWriterProcessor consumes a cross-cluster replication stream
97123
// by decoding kvs in it to logical changes and applying them by executing DMLs.
98124
type logicalReplicationWriterProcessor struct {
@@ -106,8 +132,6 @@ type logicalReplicationWriterProcessor struct {
106132

107133
configByTable map[descpb.ID]sqlProcessorTableConfig
108134

109-
getBatchSize func() int
110-
111135
streamPartitionClient streamclient.Client
112136

113137
// frontier keeps track of the progress for the spans tracked by this processor
@@ -211,40 +235,9 @@ func newLogicalReplicationWriterProcessor(
211235
}
212236

213237
lrw := &logicalReplicationWriterProcessor{
214-
configByTable: procConfigByDestTableID,
215-
spec: spec,
216-
processorID: processorID,
217-
getBatchSize: func() int {
218-
// TODO(ssd): We set this to 1 since putting more than 1
219-
// row in a KV batch using the new ConditionalPut-based
220-
// conflict resolution would require more complex error
221-
// handling and tracking that we haven't implemented
222-
// yet.
223-
if spec.Mode == jobspb.LogicalReplicationDetails_Immediate {
224-
return 1
225-
}
226-
// We want to decide whether to use implicit txns or not based on
227-
// the schema of the dest table. Benchmarking has shown that
228-
// implicit txns are beneficial on tables with no secondary indexes
229-
// whereas explicit txns are beneficial when at least one secondary
230-
// index is present.
231-
//
232-
// Unfortunately, if we have multiple replication pairs, we don't
233-
// know which tables will be affected by this batch before deciding
234-
// on the batch size, so we'll use a heuristic such that we'll use
235-
// the implicit txns if at least half of the dest tables are
236-
// without the secondary indexes. If we only have a single
237-
// replication pair, then this heuristic gives us the precise
238-
// recommendation.
239-
//
240-
// (Here we have access to the descriptor of the source table, but
241-
// for now we assume that the source and the dest descriptors are
242-
// similar.)
243-
if 2*numTablesWithSecondaryIndexes < len(procConfigByDestTableID) && useImplicitTxns.Get(&flowCtx.Cfg.Settings.SV) {
244-
return 1
245-
}
246-
return int(flushBatchSize.Get(&flowCtx.Cfg.Settings.SV))
247-
},
238+
configByTable: procConfigByDestTableID,
239+
spec: spec,
240+
processorID: processorID,
248241
frontier: frontier,
249242
stopCh: make(chan struct{}),
250243
checkpointCh: make(chan []jobspb.ResolvedSpan),
@@ -725,14 +718,12 @@ func (lrw *logicalReplicationWriterProcessor) setupBatchHandlers(ctx context.Con
725718
var err error
726719
sd := sql.NewInternalSessionData(ctx, flowCtx.Cfg.Settings, "" /* opName */)
727720

728-
if lrw.spec.Mode == jobspb.LogicalReplicationDetails_Immediate {
729-
evalCtx := flowCtx.NewEvalCtx()
730-
evalCtx.SessionDataStack.Push(sd)
731-
rp, err = newKVRowProcessor(ctx, flowCtx.Cfg, evalCtx, lrw.spec, lrw.configByTable)
732-
if err != nil {
733-
return err
734-
}
735-
} else {
721+
writer, err := getWriterType(ctx, lrw.spec.Mode, flowCtx.Cfg.Settings)
722+
if err != nil {
723+
return err
724+
}
725+
switch writer {
726+
case writerTypeSQL:
736727
rp, err = makeSQLProcessor(
737728
ctx, flowCtx.Cfg.Settings, lrw.configByTable,
738729
jobspb.JobID(lrw.spec.JobID),
@@ -747,6 +738,13 @@ func (lrw *logicalReplicationWriterProcessor) setupBatchHandlers(ctx context.Con
747738
if err != nil {
748739
return err
749740
}
741+
case writerTypeLegacyKV:
742+
rp, err = newKVRowProcessor(ctx, flowCtx.Cfg, flowCtx.EvalCtx, lrw.spec, lrw.configByTable)
743+
if err != nil {
744+
return err
745+
}
746+
default:
747+
return errors.AssertionFailedf("unknown logical replication writer type: %s", writer)
750748
}
751749

752750
if streamingKnobs, ok := flowCtx.TestingKnobs().StreamingTestingKnobs.(*sql.StreamingTestingKnobs); ok {
@@ -760,6 +758,24 @@ func (lrw *logicalReplicationWriterProcessor) setupBatchHandlers(ctx context.Con
760758
return nil
761759
}
762760

761+
func getWriterType(
762+
ctx context.Context, mode jobspb.LogicalReplicationDetails_ApplyMode, settings *cluster.Settings,
763+
) (writerType, error) {
764+
switch mode {
765+
case jobspb.LogicalReplicationDetails_Immediate:
766+
// Require v25.2 to use the sql writer by default to ensure that the
767+
// KV origin timestamp validation is available on all nodes.
768+
if settings.Version.IsActive(ctx, clusterversion.V25_2) {
769+
return writerType(immediateModeWriter.Get(&settings.SV)), nil
770+
}
771+
return writerTypeSQL, nil
772+
case jobspb.LogicalReplicationDetails_Validated:
773+
return writerTypeSQL, nil
774+
default:
775+
return "", errors.Newf("unknown logical replication writer type: %s", mode)
776+
}
777+
}
778+
763779
// flushBuffer processes some or all of the events in the passed buffer, and
764780
// zeros out each event in the passed buffer for which it successfully completed
765781
// processing either by applying it or by sending it to a DLQ. If mustProcess is
@@ -987,7 +1003,7 @@ func (t replicationMutationType) String() string {
9871003
func (lrw *logicalReplicationWriterProcessor) flushChunk(
9881004
ctx context.Context, bh BatchHandler, chunk []streampb.StreamEvent_KV, canRetry retryEligibility,
9891005
) (flushStats, error) {
990-
batchSize := lrw.getBatchSize()
1006+
batchSize := bh.BatchSize()
9911007

9921008
lrw.debug.RecordChunkStart()
9931009
defer lrw.debug.RecordChunkComplete()
@@ -1213,20 +1229,14 @@ type BatchHandler interface {
12131229
// or are not applied as a group. If the batch is a single KV it may use an
12141230
// implicit txn.
12151231
HandleBatch(context.Context, []streampb.StreamEvent_KV) (batchStats, error)
1232+
BatchSize() int
12161233
GetLastRow() cdcevent.Row
12171234
SetSyntheticFailurePercent(uint32)
12181235
ReportMutations(*stats.Refresher)
12191236
ReleaseLeases(context.Context)
12201237
Close(context.Context)
12211238
}
12221239

1223-
var useImplicitTxns = settings.RegisterBoolSetting(
1224-
settings.ApplicationLevel,
1225-
"logical_replication.consumer.use_implicit_txns.enabled",
1226-
"determines whether the consumer processes each row in a separate implicit txn",
1227-
metamorphic.ConstantWithTestBool("logical_replication.consumer.use_implicit_txns.enabled", true),
1228-
)
1229-
12301240
func init() {
12311241
rowexec.NewLogicalReplicationWriterProcessor = newLogicalReplicationWriterProcessor
12321242
}

pkg/crosscluster/logical/lww_kv_processor.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ func (p *kvRowProcessor) HandleBatch(
133133
return batchStats{}, errors.AssertionFailedf("TODO: multi-row transactions not supported by the kvRowProcessor")
134134
}
135135

136+
func (p *kvRowProcessor) BatchSize() int {
137+
return 1
138+
}
139+
136140
func (p *kvRowProcessor) processRow(
137141
ctx context.Context,
138142
txn isql.Txn,

pkg/crosscluster/logical/lww_row_processor.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,10 @@ func (srp *sqlRowProcessor) ReleaseLeases(ctx context.Context) {
280280
srp.querier.ReleaseLeases(ctx)
281281
}
282282

283+
func (srp *sqlRowProcessor) BatchSize() int {
284+
return int(flushBatchSize.Get(&srp.settings.SV))
285+
}
286+
283287
func (*sqlRowProcessor) Close(ctx context.Context) {}
284288

285289
var errInjected = errors.New("injected synthetic error")

pkg/settings/registry.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -258,13 +258,14 @@ var retiredSettings = map[InternalKey]struct{}{
258258
"sql.metrics.statement_details.plan_collection.period": {},
259259

260260
// removed as of 25.2
261-
"kv.snapshot_receiver.excise.enabled": {},
262-
"sql.catalog.experimental_use_session_based_leasing": {},
263-
"bulkio.backup.merge_file_buffer_size": {},
264-
"changefeed.new_webhook_sink_enabled": {},
265-
"changefeed.new_webhook_sink.enabled": {},
266-
"changefeed.new_pubsub_sink_enabled": {},
267-
"changefeed.new_pubsub_sink.enabled": {},
261+
"kv.snapshot_receiver.excise.enabled": {},
262+
"sql.catalog.experimental_use_session_based_leasing": {},
263+
"bulkio.backup.merge_file_buffer_size": {},
264+
"changefeed.new_webhook_sink_enabled": {},
265+
"changefeed.new_webhook_sink.enabled": {},
266+
"changefeed.new_pubsub_sink_enabled": {},
267+
"changefeed.new_pubsub_sink.enabled": {},
268+
"logical_replication.consumer.use_implicit_txns.enabled": {},
268269
}
269270

270271
// grandfatheredDefaultSettings is the list of "grandfathered" existing sql.defaults

pkg/sql/logictest/testdata/logic_test/jsonb_path_query

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,3 +1074,18 @@ query T
10741074
SELECT jsonb_path_query('[{"a": 1, "b": 2}, {"a": 1}, {"b": 2}, {}]', '$[*] ? (exists(@.a) && exists(@.b))');
10751075
----
10761076
{"a": 1, "b": 2}
1077+
1078+
query T
1079+
SELECT jsonb_path_query('{}', '(1 + 2 == 3) is unknown');
1080+
----
1081+
false
1082+
1083+
query T
1084+
SELECT jsonb_path_query('{}', '($ < 1) is unknown');
1085+
----
1086+
true
1087+
1088+
query T
1089+
SELECT jsonb_path_query('{}', '(null like_regex "^he.*$") is unknown');
1090+
----
1091+
true

0 commit comments

Comments
 (0)