Skip to content

Commit c7d991f

Browse files
authored
Merge pull request #1497 from ydb-platform/bulk-upsert
Support of BulkUpsert over table client instead table session
2 parents e5f377a + 6b2fbf8 commit c7d991f

File tree

12 files changed

+710
-23
lines changed

12 files changed

+710
-23
lines changed

CHANGELOG.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
* Supported `db.Table().BulkUpsert()` from scv, arrow and ydb rows formats
2+
13
## v3.82.0
2-
* Fixed error on experimental TopicListener.Close
4+
* Fixed error on experimental `TopicListener.Close`
35
* Disabled reporting of `ydb_go_sdk_query_session_count` when metrics are disabled
46
* Disabled reporting of `ydb_go_sdk_ydb_query_session_create_latency` histogram metrics when metrics are disabled
5-
* Allowed skip column for `ScanStruct` by tag `-`
7+
* Allowed skip column for `ScanStruct` by tag `-`
68

79
## v3.81.4
810
* Returned `topicwriter.ErrQueueLimitExceed`, accidental removed at `v3.81.0`
@@ -14,15 +16,15 @@
1416
* Removed `experimantal` comment for query service client
1517

1618
## v3.81.1
17-
* Fixed nil pointer dereference panic on failed `ydb.Open`
19+
* Fixed nil pointer dereference panic on failed `ydb.Open`
1820
* Added ip discovery. Server can show own ip address and target hostname in the ListEndpoint message. These fields are used to bypass DNS resolving.
1921

2022
## v3.81.0
2123
* Added error ErrMessagesPutToInternalQueueBeforeError to topic writer
2224
* Added write to topics within transactions
2325

2426
## v3.80.10
25-
* Added `ydb.WithSessionPoolSessionUsageLimit()` option for limitation max count of session usage
27+
* Added `ydb.WithSessionPoolSessionUsageLimit()` option for limitation max count of session usage
2628
* Refactored experimental topic iterators in `topicsugar` package
2729

2830
## v3.80.9

internal/table/client.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ import (
44
"context"
55

66
"github.com/jonboulle/clockwork"
7+
"github.com/ydb-platform/ydb-go-genproto/Ydb_Table_V1"
78
"google.golang.org/grpc"
89

10+
"github.com/ydb-platform/ydb-go-sdk/v3/internal/allocator"
911
"github.com/ydb-platform/ydb-go-sdk/v3/internal/pool"
1012
"github.com/ydb-platform/ydb-go-sdk/v3/internal/stack"
1113
"github.com/ydb-platform/ydb-go-sdk/v3/internal/table/config"
@@ -264,6 +266,65 @@ func (c *Client) DoTx(ctx context.Context, op table.TxOperation, opts ...table.O
264266
}, config.RetryOptions...)
265267
}
266268

269+
func (c *Client) BulkUpsert(
270+
ctx context.Context,
271+
tableName string,
272+
data table.BulkUpsertData,
273+
opts ...table.Option,
274+
) (finalErr error) {
275+
if c == nil {
276+
return xerrors.WithStackTrace(errNilClient)
277+
}
278+
279+
if c.isClosed() {
280+
return xerrors.WithStackTrace(errClosedClient)
281+
}
282+
283+
a := allocator.New()
284+
defer a.Free()
285+
286+
attempts, config := 0, c.retryOptions(opts...)
287+
config.RetryOptions = append(config.RetryOptions,
288+
retry.WithIdempotent(true),
289+
retry.WithTrace(&trace.Retry{
290+
OnRetry: func(info trace.RetryLoopStartInfo) func(trace.RetryLoopDoneInfo) {
291+
return func(info trace.RetryLoopDoneInfo) {
292+
attempts = info.Attempts
293+
}
294+
},
295+
}),
296+
)
297+
298+
onDone := trace.TableOnBulkUpsert(config.Trace, &ctx,
299+
stack.FunctionID("github.com/ydb-platform/ydb-go-sdk/v3/internal/table.(*Client).BulkUpsert"),
300+
)
301+
defer func() {
302+
onDone(finalErr, attempts)
303+
}()
304+
305+
request, err := data.ToYDB(a, tableName)
306+
if err != nil {
307+
return xerrors.WithStackTrace(err)
308+
}
309+
310+
client := Ydb_Table_V1.NewTableServiceClient(c.cc)
311+
312+
err = retry.Retry(ctx,
313+
func(ctx context.Context) (err error) {
314+
attempts++
315+
_, err = client.BulkUpsert(ctx, request)
316+
317+
return err
318+
},
319+
config.RetryOptions...,
320+
)
321+
if err != nil {
322+
return xerrors.WithStackTrace(err)
323+
}
324+
325+
return nil
326+
}
327+
267328
func executeTxOperation(ctx context.Context, c *Client, op table.TxOperation, tx table.Transaction) (err error) {
268329
if panicCallback := c.config.PanicCallback(); panicCallback != nil {
269330
defer func() {

internal/table/session.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1261,7 +1261,6 @@ func (s *session) BulkUpsert(ctx context.Context, table string, rows value.Value
12611261
onDone = trace.TableOnSessionBulkUpsert(
12621262
s.config.Trace(), &ctx,
12631263
stack.FunctionID("github.com/ydb-platform/ydb-go-sdk/v3/internal/table.(*session).BulkUpsert"),
1264-
s,
12651264
)
12661265
)
12671266
defer func() {

table/table.go

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ import (
44
"context"
55
"time"
66

7+
"github.com/ydb-platform/ydb-go-genproto/protos/Ydb_Formats"
78
"github.com/ydb-platform/ydb-go-genproto/protos/Ydb_Table"
89

10+
"github.com/ydb-platform/ydb-go-sdk/v3/internal/allocator"
911
"github.com/ydb-platform/ydb-go-sdk/v3/internal/closer"
1012
"github.com/ydb-platform/ydb-go-sdk/v3/internal/params"
1113
"github.com/ydb-platform/ydb-go-sdk/v3/internal/tx"
@@ -68,6 +70,12 @@ type Client interface {
6870
// If op TxOperation return non nil - transaction will be rollback
6971
// Warning: if context without deadline or cancellation func than DoTx can run indefinitely
7072
DoTx(ctx context.Context, op TxOperation, opts ...Option) error
73+
74+
// BulkUpsert upserts a batch of rows non-transactionally.
75+
//
76+
// Returns success only when all rows were successfully upserted. In case of an error some rows might
77+
// be upserted and some might not.
78+
BulkUpsert(ctx context.Context, table string, data BulkUpsertData, opts ...Option) error
7179
}
7280

7381
type SessionStatus = string
@@ -179,6 +187,7 @@ type Session interface {
179187
opts ...options.ExecuteScanQueryOption,
180188
) (_ result.StreamResult, err error)
181189

190+
// Deprecated: use Client instance instead.
182191
BulkUpsert(
183192
ctx context.Context,
184193
table string,
@@ -578,3 +587,170 @@ func (opt traceOption) ApplyTableOption(opts *Options) {
578587
func WithTrace(t trace.Table) traceOption { //nolint:gocritic
579588
return traceOption{t: &t}
580589
}
590+
591+
type BulkUpsertData interface {
592+
ToYDB(a *allocator.Allocator, tableName string) (*Ydb_Table.BulkUpsertRequest, error)
593+
}
594+
595+
type bulkUpsertRows struct {
596+
rows value.Value
597+
}
598+
599+
func (data bulkUpsertRows) ToYDB(a *allocator.Allocator, tableName string) (*Ydb_Table.BulkUpsertRequest, error) {
600+
return &Ydb_Table.BulkUpsertRequest{
601+
Table: tableName,
602+
Rows: value.ToYDB(data.rows, a),
603+
}, nil
604+
}
605+
606+
func BulkUpsertDataRows(rows value.Value) bulkUpsertRows {
607+
return bulkUpsertRows{
608+
rows: rows,
609+
}
610+
}
611+
612+
type bulkUpsertCsv struct {
613+
data []byte
614+
opts []csvFormatOption
615+
}
616+
617+
type csvFormatOption interface {
618+
applyCsvFormatOption(dataFormat *Ydb_Table.BulkUpsertRequest_CsvSettings) (err error)
619+
}
620+
621+
func (data bulkUpsertCsv) ToYDB(a *allocator.Allocator, tableName string) (*Ydb_Table.BulkUpsertRequest, error) {
622+
var (
623+
request = &Ydb_Table.BulkUpsertRequest{
624+
Table: tableName,
625+
Data: data.data,
626+
}
627+
dataFormat = &Ydb_Table.BulkUpsertRequest_CsvSettings{
628+
CsvSettings: &Ydb_Formats.CsvSettings{},
629+
}
630+
)
631+
632+
for _, opt := range data.opts {
633+
if opt != nil {
634+
if err := opt.applyCsvFormatOption(dataFormat); err != nil {
635+
return nil, err
636+
}
637+
}
638+
}
639+
640+
request.DataFormat = dataFormat
641+
642+
return request, nil
643+
}
644+
645+
func BulkUpsertDataCsv(data []byte, opts ...csvFormatOption) bulkUpsertCsv {
646+
return bulkUpsertCsv{
647+
data: data,
648+
opts: opts,
649+
}
650+
}
651+
652+
type csvHeaderOption struct{}
653+
654+
func (opt *csvHeaderOption) applyCsvFormatOption(dataFormat *Ydb_Table.BulkUpsertRequest_CsvSettings) error {
655+
dataFormat.CsvSettings.Header = true
656+
657+
return nil
658+
}
659+
660+
// First not skipped line is a CSV header (list of column names).
661+
func WithCsvHeader() csvFormatOption {
662+
return &csvHeaderOption{}
663+
}
664+
665+
type csvNullValueOption []byte
666+
667+
func (nullValue csvNullValueOption) applyCsvFormatOption(dataFormat *Ydb_Table.BulkUpsertRequest_CsvSettings) error {
668+
dataFormat.CsvSettings.NullValue = nullValue
669+
670+
return nil
671+
}
672+
673+
// String value that would be interpreted as NULL.
674+
func WithCsvNullValue(value []byte) csvFormatOption {
675+
return csvNullValueOption(value)
676+
}
677+
678+
type csvDelimiterOption []byte
679+
680+
func (delimeter csvDelimiterOption) applyCsvFormatOption(dataFormat *Ydb_Table.BulkUpsertRequest_CsvSettings) error {
681+
dataFormat.CsvSettings.Delimiter = delimeter
682+
683+
return nil
684+
}
685+
686+
// Fields delimiter in CSV file. It's "," if not set.
687+
func WithCsvDelimiter(value []byte) csvFormatOption {
688+
return csvDelimiterOption(value)
689+
}
690+
691+
type csvSkipRowsOption uint32
692+
693+
func (skipRows csvSkipRowsOption) applyCsvFormatOption(dataFormat *Ydb_Table.BulkUpsertRequest_CsvSettings) error {
694+
dataFormat.CsvSettings.SkipRows = uint32(skipRows)
695+
696+
return nil
697+
}
698+
699+
// Number of rows to skip before CSV data. It should be present only in the first upsert of CSV file.
700+
func WithCsvSkipRows(skipRows uint32) csvFormatOption {
701+
return csvSkipRowsOption(skipRows)
702+
}
703+
704+
type bulkUpsertArrow struct {
705+
data []byte
706+
opts []arrowFormatOption
707+
}
708+
709+
type arrowFormatOption interface {
710+
applyArrowFormatOption(req *Ydb_Table.BulkUpsertRequest_ArrowBatchSettings) (err error)
711+
}
712+
713+
func (data bulkUpsertArrow) ToYDB(a *allocator.Allocator, tableName string) (*Ydb_Table.BulkUpsertRequest, error) {
714+
var (
715+
request = &Ydb_Table.BulkUpsertRequest{
716+
Table: tableName,
717+
Data: data.data,
718+
}
719+
dataFormat = &Ydb_Table.BulkUpsertRequest_ArrowBatchSettings{
720+
ArrowBatchSettings: &Ydb_Formats.ArrowBatchSettings{},
721+
}
722+
)
723+
724+
for _, opt := range data.opts {
725+
if opt != nil {
726+
if err := opt.applyArrowFormatOption(dataFormat); err != nil {
727+
return nil, err
728+
}
729+
}
730+
}
731+
732+
request.DataFormat = dataFormat
733+
734+
return request, nil
735+
}
736+
737+
func BulkUpsertDataArrow(data []byte, opts ...arrowFormatOption) bulkUpsertArrow {
738+
return bulkUpsertArrow{
739+
data: data,
740+
opts: opts,
741+
}
742+
}
743+
744+
type arrowSchemaOption []byte
745+
746+
func (schema arrowSchemaOption) applyArrowFormatOption(
747+
dataFormat *Ydb_Table.BulkUpsertRequest_ArrowBatchSettings,
748+
) error {
749+
dataFormat.ArrowBatchSettings.Schema = schema
750+
751+
return nil
752+
}
753+
754+
func WithArrowSchema(schema []byte) arrowFormatOption {
755+
return arrowSchemaOption(schema)
756+
}

0 commit comments

Comments
 (0)