From 3effa7365e76f008ab9a2612c92e4fa6b610b1c9 Mon Sep 17 00:00:00 2001 From: MANDY Alimaa Date: Tue, 9 Dec 2025 14:57:32 -0600 Subject: [PATCH 1/8] feat(c/driver/postgresql): refactor converting decimal type to Postgresql numeric format logic and add test for different scales --- .../copy/postgres_copy_writer_test.cc | 174 ++++++++++++++++++ c/driver/postgresql/copy/writer.h | 123 +++++++++---- 2 files changed, 262 insertions(+), 35 deletions(-) diff --git a/c/driver/postgresql/copy/postgres_copy_writer_test.cc b/c/driver/postgresql/copy/postgres_copy_writer_test.cc index cd8cb30083..23cff72550 100644 --- a/c/driver/postgresql/copy/postgres_copy_writer_test.cc +++ b/c/driver/postgresql/copy/postgres_copy_writer_test.cc @@ -500,6 +500,180 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) { } } +// Regression test for bug where 44.123456 with Decimal(10,6) became 4412.345500 +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (44.123456), +// (0.123456), (123.456789)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericScale6[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x2c, 0x04, 0xd2, 0x15, 0xe0, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0c, 0x00, 0x02, 0xff, 0xff, 0x00, 0x00, 0x00, 0x06, 0x04, 0xd2, 0x15, + 0xe0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x7b, 0x11, 0xd7, 0x22, 0xc4, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale6) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 6; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 44123456); + + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, 123456); + + ArrowDecimalInit(&decimal3, size, precision, scale); + ArrowDecimalSetInt(&decimal3, 123456789); + + const std::vector> values = {&decimal1, &decimal2, + &decimal3}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + + constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale6) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale6[i]) << " at position " << i; + } +} + +// Test for scale=5 (remainder 1 when divided by 4) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (12.34567), +// (-9.87654), (0.00123)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericScale5[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x0c, 0x0d, 0x80, 0x1b, 0x58, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x40, 0x00, 0x00, 0x05, 0x00, 0x09, 0x22, + 0x3d, 0x0f, 0xa0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0xff, 0xff, 0x00, + 0x00, 0x00, 0x05, 0x00, 0x0c, 0x0b, 0xb8, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale5) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 5; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 1234567); + + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, -987654); + + ArrowDecimalInit(&decimal3, size, precision, scale); + ArrowDecimalSetInt(&decimal3, 123); + + const std::vector> values = {&decimal1, &decimal2, + &decimal3}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale5) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale5[i]) << " at position " << i; + } +} + +// Test for scale=7 (remainder 3 when divided by 4) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (5.1234567), +// (-123.456789), (0.0000001)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericScale7[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x05, 0x04, 0xd2, 0x16, 0x26, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x40, 0x00, 0x00, + 0x06, 0x00, 0x7b, 0x11, 0xd7, 0x22, 0xc4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, + 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x07, 0x00, 0x0a, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale7) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 7; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 51234567); + + // This represents -123.456789, but NUMERIC(10,7) will display it as -123.4567890 + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, -1234567890); + + // 0.0000001 with scale=7 -> internal value: 1 + ArrowDecimalInit(&decimal3, size, precision, scale); + ArrowDecimalSetInt(&decimal3, 1); + + const std::vector> values = {&decimal1, &decimal2, + &decimal3}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale7) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale7[i]) << " at position " << i; + } +} + using TimestampTestParamType = std::tuple>>; diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index b352635a9f..49e53a58d1 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -234,48 +234,101 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { // Number of decimal digits per Postgres digit constexpr int kDecDigits = 4; std::vector pg_digits; - int16_t weight = -(scale_ / kDecDigits); - int16_t dscale = scale_; - bool seen_decimal = scale_ == 0; - bool truncating_trailing_zeros = true; + int16_t weight; + int16_t dscale; char decimal_string[max_decimal_digits_ + 1]; - int digits_remaining = DecimalToString(&decimal, decimal_string); - do { - const int start_pos = - digits_remaining < kDecDigits ? 0 : digits_remaining - kDecDigits; - const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; - const std::string_view substr{decimal_string + start_pos, len}; - int16_t val{}; - std::from_chars(substr.data(), substr.data() + substr.size(), val); + int total_digits = DecimalToString(&decimal, decimal_string); - if (val == 0) { - if (!seen_decimal && truncating_trailing_zeros) { - dscale -= kDecDigits; - } - } else { - pg_digits.insert(pg_digits.begin(), val); - if (!seen_decimal && truncating_trailing_zeros) { - if (val % 1000 == 0) { - dscale -= 3; - } else if (val % 100 == 0) { - dscale -= 2; - } else if (val % 10 == 0) { - dscale -= 1; - } + const int n_int_digits = total_digits > scale_ ? total_digits - scale_ : 0; + int n_frac_digits = total_digits > n_int_digits ? total_digits - n_int_digits : 0; + + std::string_view decimal_string_view(decimal_string, total_digits); + std::string_view int_part = decimal_string_view.substr(0, n_int_digits); + + std::string frac_part_str; + if (n_int_digits == 0 && total_digits < scale_) { + frac_part_str.assign(scale_ - total_digits, '0'); + frac_part_str.append(decimal_string, total_digits); + n_frac_digits = scale_; + } else { + frac_part_str.assign(decimal_string_view.substr(n_int_digits, n_frac_digits)); + } + std::string_view frac_part(frac_part_str); + + // Count trailing zeros in the fractional part to minimize dscale + int actual_trailing_zeros = 0; + for (int j = frac_part.length() - 1; j >= 0 && frac_part[j] == '0'; j--) { + actual_trailing_zeros++; + } + + // Group integer part + int i = int_part.length(); + std::vector int_digits; + int n_int_digit_groups = 0; + if (i > 0) { + // Calculate weight based on original integer length + weight = (i + kDecDigits - 1) / kDecDigits - 1; + + while (i > 0) { + int chunk_size = std::min(i, kDecDigits); + std::string_view chunk = int_part.substr(i - chunk_size, chunk_size); + int16_t val{}; + std::from_chars(chunk.data(), chunk.data() + chunk.size(), val); + // Skip trailing zeros in integer part (which appear first when processing + // right-to-left) + if (val != 0 || !int_digits.empty()) { + int_digits.insert(int_digits.begin(), val); } - truncating_trailing_zeros = false; + i -= chunk_size; } - digits_remaining -= kDecDigits; - if (digits_remaining <= 0) { - break; - } - weight++; + n_int_digit_groups = int_digits.size(); + pg_digits.insert(pg_digits.end(), int_digits.begin(), int_digits.end()); + } else { + weight = -1; + n_int_digit_groups = 0; + } + + // Group fractional part + // Chunk in 4-digit groups, padding the LAST group on the right if needed + i = 0; + bool skip_leading_zeros = (n_int_digits == 0); + + while (i < (int)frac_part.length()) { + int chunk_size = std::min((int)frac_part.length() - i, kDecDigits); + std::string chunk_str(frac_part.substr(i, chunk_size)); - if (start_pos <= static_cast(std::strlen(decimal_string)) - scale_) { - seen_decimal = true; + // Pad the last group on the RIGHT if it's less than 4 digits + chunk_str.resize(kDecDigits, '0'); + + int16_t val{}; + std::from_chars(chunk_str.data(), chunk_str.data() + chunk_str.size(), val); + + if (skip_leading_zeros && val == 0) { + weight--; + } else { + pg_digits.push_back(val); + skip_leading_zeros = false; } - } while (true); + i += chunk_size; + } + + // Calculate dscale by removing trailing zeros + dscale = scale_ - actual_trailing_zeros; + + // Trim trailing full zero digit groups from fractional part + // (these zeros are already accounted for in actual_trailing_zeros) + while (static_cast(pg_digits.size()) > n_int_digit_groups && + pg_digits.back() == 0) { + pg_digits.pop_back(); + } + + // If all fractional digits were removed, dscale should be 0 + if (static_cast(pg_digits.size()) <= n_int_digit_groups) { + dscale = 0; + } + + if (dscale < 0) dscale = 0; int16_t ndigits = pg_digits.size(); int32_t field_size_bytes = sizeof(ndigits) + sizeof(weight) + sizeof(sign) + From bc767dcc9fe3c539bd451ae711e0234dd9e7c03d Mon Sep 17 00:00:00 2001 From: MANDY Alimaa Date: Tue, 9 Dec 2025 15:16:34 -0600 Subject: [PATCH 2/8] include algorithm --- c/driver/postgresql/copy/writer.h | 1 + 1 file changed, 1 insertion(+) diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index 49e53a58d1..7a1dd48802 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include From f29278db33a970616cb995ad79c4618def7496e9 Mon Sep 17 00:00:00 2001 From: MANDY Alimaa Date: Fri, 26 Dec 2025 15:19:10 -0600 Subject: [PATCH 3/8] handle negative scales and 0 and add tests, cleanup --- .../copy/postgres_copy_writer_test.cc | 456 ++++++++++++++++-- c/driver/postgresql/copy/writer.h | 34 +- 2 files changed, 442 insertions(+), 48 deletions(-) diff --git a/c/driver/postgresql/copy/postgres_copy_writer_test.cc b/c/driver/postgresql/copy/postgres_copy_writer_test.cc index 23cff72550..f38bb686c3 100644 --- a/c/driver/postgresql/copy/postgres_copy_writer_test.cc +++ b/c/driver/postgresql/copy/postgres_copy_writer_test.cc @@ -435,18 +435,32 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteTime) { // This buffer is similar to the read variant above but removes special values // nan, ±inf as they are not supported via the Arrow Decimal types -// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456), -// ('0.00001234'), (1.0000), (123.456), (1000000)) AS drvd(col)) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (NULL), (999999999999999999999999999999.99999999), +// (-999999999999999999999999999999.99999999), +// (0), (1234), (92233720368.54775807), (-92233720368.54775808), +// (-123.456), ('0.00001234'), (1), (123.456), (1000000)) AS drvd(col)) // TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyNumericWrite[] = { 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, - 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, - 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff}; + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x63, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x40, + 0x00, 0x00, 0x08, 0x00, 0x63, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xd2, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x03, 0x9a, 0x0d, + 0x2c, 0x01, 0x70, 0x15, 0x65, 0x16, 0xaf, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, + 0x05, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x03, 0x9a, 0x0d, 0x2c, 0x01, 0x70, 0x15, + 0x65, 0x16, 0xb0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, + 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, + 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x64, 0xff, 0xff}; TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) { adbc_validation::Handle schema; @@ -462,20 +476,52 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) { struct ArrowDecimal decimal3; struct ArrowDecimal decimal4; struct ArrowDecimal decimal5; + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_no_frac; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; - ArrowDecimalInit(&decimal1, size, 19, 8); + ArrowDecimalInit(&decimal1, size, precision, scale); ArrowDecimalSetInt(&decimal1, -12345600000); - ArrowDecimalInit(&decimal2, size, 19, 8); + ArrowDecimalInit(&decimal2, size, precision, scale); ArrowDecimalSetInt(&decimal2, 1234); - ArrowDecimalInit(&decimal3, size, 19, 8); + ArrowDecimalInit(&decimal3, size, precision, scale); ArrowDecimalSetInt(&decimal3, 100000000); - ArrowDecimalInit(&decimal4, size, 19, 8); + ArrowDecimalInit(&decimal4, size, precision, scale); ArrowDecimalSetInt(&decimal4, 12345600000); - ArrowDecimalInit(&decimal5, size, 19, 8); + ArrowDecimalInit(&decimal5, size, precision, scale); ArrowDecimalSetInt(&decimal5, 100000000000000); + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + ArrowDecimalSetInt(&decimal_no_frac, 123400000000LL); // 1234 * 10^8 + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits_8; + max_digits_8.data = "99999999999999999999999999999999999999"; + max_digits_8.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits_8); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits_8; + min_digits_8.data = "-99999999999999999999999999999999999999"; + min_digits_8.size_bytes = 39; + ArrowDecimalSetDigits(&decimal_min_128, min_digits_8); + const std::vector> values = { - std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; + std::nullopt, &decimal_max_128, &decimal_min_128, &decimal_zero, + &decimal_no_frac, &decimal_max_64, &decimal_min_64, &decimal1, + &decimal2, &decimal3, &decimal4, &decimal5}; ArrowSchemaInit(&schema.value); ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); @@ -501,15 +547,29 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) { } // Regression test for bug where 44.123456 with Decimal(10,6) became 4412.345500 -// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (44.123456), -// (0.123456), (123.456789)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (99999999999999999999999999999999.999999), +// (-99999999999999999999999999999999.999999), +// (0), (1000000000000), (9223372036854.775807), (-9223372036854.775808), +// (44.123456), (0.123456), (123.456789)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyNumericScale6[] = { 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x2c, 0x04, 0xd2, 0x15, 0xe0, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x0c, 0x00, 0x02, 0xff, 0xff, 0x00, 0x00, 0x00, 0x06, 0x04, 0xd2, 0x15, - 0xe0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x7b, 0x11, 0xd7, 0x22, 0xc4, 0xff, 0xff}; + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x26, 0xac, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x40, 0x00, 0x00, 0x06, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x26, 0xac, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x06, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x09, 0x08, 0xb9, 0x1c, 0x23, 0x1a, 0xc6, 0x1e, + 0x4e, 0x02, 0xbc, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x06, 0x00, 0x03, 0x40, + 0x00, 0x00, 0x06, 0x00, 0x09, 0x08, 0xb9, 0x1c, 0x23, 0x1a, 0xc6, 0x1e, 0x4e, 0x03, + 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x2c, 0x04, 0xd2, 0x15, 0xe0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, + 0x02, 0xff, 0xff, 0x00, 0x00, 0x00, 0x06, 0x04, 0xd2, 0x15, 0xe0, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x7b, 0x11, + 0xd7, 0x22, 0xc4, 0xff, 0xff}; TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale6) { adbc_validation::Handle schema; @@ -523,6 +583,12 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale6) { struct ArrowDecimal decimal1; struct ArrowDecimal decimal2; struct ArrowDecimal decimal3; + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_no_frac; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; ArrowDecimalInit(&decimal1, size, precision, scale); ArrowDecimalSetInt(&decimal1, 44123456); @@ -533,8 +599,34 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale6) { ArrowDecimalInit(&decimal3, size, precision, scale); ArrowDecimalSetInt(&decimal3, 123456789); - const std::vector> values = {&decimal1, &decimal2, - &decimal3}; + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + ArrowDecimalSetInt(&decimal_no_frac, 1000000000000000000LL); + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits; + max_digits.data = "99999999999999999999999999999999999999"; + max_digits.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits; + min_digits.data = "-99999999999999999999999999999999999999"; + min_digits.size_bytes = 39; // 38 digits + 1 for '-' sign + ArrowDecimalSetDigits(&decimal_min_128, min_digits); + + const std::vector> values = { + &decimal_max_128, &decimal_min_128, &decimal_zero, + &decimal_no_frac, &decimal_max_64, &decimal_min_64, + &decimal1, &decimal2, &decimal3}; ArrowSchemaInit(&schema.value); ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); @@ -553,17 +645,32 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale6) { constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale6) - 2; ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale6[i]) << " at position " << i; } } // Test for scale=5 (remainder 1 when divided by 4) -// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (12.34567), -// (-9.87654), (0.00123)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (999999999999999999999999999999999.99999), +// (-999999999999999999999999999999999.99999), +// (0), (10000000000000), (92233720368547.75807), (-92233720368547.75808), +// (12.34567), (-9.87654), (0.00123)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyNumericScale5[] = { 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x0b, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x05, 0x00, 0x09, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x23, 0x28, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x0b, 0x00, 0x08, 0x40, 0x00, 0x00, 0x05, 0x00, + 0x09, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x23, 0x28, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x06, 0x00, 0x03, 0x00, 0x00, 0x00, 0x05, 0x00, 0x5c, 0x09, 0x21, 0x07, + 0xf4, 0x21, 0x63, 0x1d, 0x9c, 0x1b, 0x58, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, + 0x06, 0x00, 0x03, 0x40, 0x00, 0x00, 0x05, 0x00, 0x5c, 0x09, 0x21, 0x07, 0xf4, 0x21, + 0x63, 0x1d, 0x9c, 0x1f, 0x40, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x0c, 0x0d, 0x80, 0x1b, 0x58, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x40, 0x00, 0x00, 0x05, 0x00, 0x09, 0x22, 0x3d, 0x0f, 0xa0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0xff, 0xff, 0x00, @@ -581,6 +688,12 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale5) { struct ArrowDecimal decimal1; struct ArrowDecimal decimal2; struct ArrowDecimal decimal3; + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_no_frac; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; ArrowDecimalInit(&decimal1, size, precision, scale); ArrowDecimalSetInt(&decimal1, 1234567); @@ -591,8 +704,34 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale5) { ArrowDecimalInit(&decimal3, size, precision, scale); ArrowDecimalSetInt(&decimal3, 123); - const std::vector> values = {&decimal1, &decimal2, - &decimal3}; + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + ArrowDecimalSetInt(&decimal_no_frac, 1000000000000000000LL); + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits_5; + max_digits_5.data = "99999999999999999999999999999999999999"; + max_digits_5.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits_5); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits_5; + min_digits_5.data = "-99999999999999999999999999999999999999"; + min_digits_5.size_bytes = 39; + ArrowDecimalSetDigits(&decimal_min_128, min_digits_5); + + const std::vector> values = { + &decimal_max_128, &decimal_min_128, &decimal_zero, + &decimal_no_frac, &decimal_max_64, &decimal_min_64, + &decimal1, &decimal2, &decimal3}; ArrowSchemaInit(&schema.value); ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); @@ -616,15 +755,28 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale5) { } // Test for scale=7 (remainder 3 when divided by 4) -// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (5.1234567), -// (-123.456789), (0.0000001)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (9999999999999999999999999999999.9999999), +// (-9999999999999999999999999999999.9999999), +// (0), (1000), (922337203685.4775807), (-922337203685.4775808), +// (5.1234567), (-123.456789), (0.0000001)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyNumericScale7[] = { - 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x05, 0x04, 0xd2, 0x16, 0x26, - 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x40, 0x00, 0x00, - 0x06, 0x00, 0x7b, 0x11, 0xd7, 0x22, 0xc4, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, - 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x07, 0x00, 0x0a, 0xff, 0xff}; + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x07, 0x03, 0xe7, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x06, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x40, 0x00, 0x00, 0x07, 0x03, 0xe7, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x03, 0xe8, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x07, 0x24, 0x07, 0x0e, 0x88, 0x0e, 0x65, 0x12, 0xa7, 0x1f, + 0x86, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x07, 0x24, 0x07, 0x0e, 0x88, 0x0e, 0x65, 0x12, 0xa7, 0x1f, 0x90, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x05, 0x04, + 0xd2, 0x16, 0x26, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x40, + 0x00, 0x00, 0x06, 0x00, 0x7b, 0x11, 0xd7, 0x22, 0xc4, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x07, 0x00, 0x0a, 0xff, 0xff}; TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale7) { adbc_validation::Handle schema; @@ -638,6 +790,12 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale7) { struct ArrowDecimal decimal1; struct ArrowDecimal decimal2; struct ArrowDecimal decimal3; + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_no_frac; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; ArrowDecimalInit(&decimal1, size, precision, scale); ArrowDecimalSetInt(&decimal1, 51234567); @@ -650,8 +808,34 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale7) { ArrowDecimalInit(&decimal3, size, precision, scale); ArrowDecimalSetInt(&decimal3, 1); - const std::vector> values = {&decimal1, &decimal2, - &decimal3}; + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + ArrowDecimalSetInt(&decimal_no_frac, 10000000000LL); // 1000 * 10^7 (1000.0000000) + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits_7; + max_digits_7.data = "99999999999999999999999999999999999999"; + max_digits_7.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits_7); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits_7; + min_digits_7.data = "-99999999999999999999999999999999999999"; + min_digits_7.size_bytes = 39; + ArrowDecimalSetDigits(&decimal_min_128, min_digits_7); + + const std::vector> values = { + &decimal_max_128, &decimal_min_128, &decimal_zero, + &decimal_no_frac, &decimal_max_64, &decimal_min_64, + &decimal1, &decimal2, &decimal3}; ArrowSchemaInit(&schema.value); ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); @@ -668,12 +852,208 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale7) { const struct ArrowBuffer buf = tester.WriteBuffer(); constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale7) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); for (size_t i = 0; i < buf_size; i++) { ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale7[i]) << " at position " << i; } } +// Test for scale=0 (integers) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (99999999999999999999999999999999999999), +// (-99999999999999999999999999999999999999), +// (0), (1000000000000000000000000000000000), (9223372036854775807), +// (-9223372036854775808), (1), (100), (1000), (-100000)) AS drvd(col)) +// TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericScale0[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x09, 0x40, 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x08, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x9a, 0x0d, 0x2c, 0x01, 0x70, 0x15, 0x65, 0x16, + 0xaf, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, 0x04, 0x40, 0x00, 0x00, + 0x00, 0x03, 0x9a, 0x0d, 0x2c, 0x01, 0x70, 0x15, 0x65, 0x16, 0xb0, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0xe8, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x01, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale0) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 0; + + struct ArrowDecimal decimal0; + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal4; + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; + struct ArrowDecimal decimal_no_frac; + + ArrowDecimalInit(&decimal0, size, precision, scale); + ArrowDecimalSetInt(&decimal0, 0); + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 1); + + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, 100); + + ArrowDecimalInit(&decimal3, size, precision, scale); + ArrowDecimalSetInt(&decimal3, 1000); + + ArrowDecimalInit(&decimal4, size, precision, scale); + ArrowDecimalSetInt(&decimal4, -100000); + + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits_0; + max_digits_0.data = "99999999999999999999999999999999999999"; + max_digits_0.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits_0); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits_0; + min_digits_0.data = "-99999999999999999999999999999999999999"; + min_digits_0.size_bytes = 39; + ArrowDecimalSetDigits(&decimal_min_128, min_digits_0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + struct ArrowStringView no_frac_digits_0; + no_frac_digits_0.data = "1000000000000000000000000000000000"; + no_frac_digits_0.size_bytes = 34; + ArrowDecimalSetDigits(&decimal_no_frac, no_frac_digits_0); + + const std::vector> values = { + &decimal_max_128, &decimal_min_128, &decimal0, &decimal_no_frac, &decimal_max_64, + &decimal_min_64, &decimal1, &decimal2, &decimal3, &decimal4}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale0) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale0[i]) << " at position " << i; + } +} + +// Test negative scale +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (12300), (-12300), (0), (922337203685477580700), +// (99999999999999999999999999999999999900), +// (-99999999999999999999999999999999999900)) +// AS drvd(col)) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericNegScale2[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0xfc, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0xfc, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x08, 0xb9, 0x1c, 0x23, 0x1a, 0xc6, 0x1e, 0x4e, 0x02, 0xbc, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x26, 0xac, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x09, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x26, 0xac, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericNegativeScale) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = -2; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_large; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 123); + + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, -123); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_large, size, precision, scale); + ArrowDecimalSetInt(&decimal_large, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits; + max_digits.data = "999999999999999999999999999999999999"; + max_digits.size_bytes = 36; + ArrowDecimalSetDigits(&decimal_max_128, max_digits); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits; + min_digits.data = "-999999999999999999999999999999999999"; + min_digits.size_bytes = 37; // 36 digits + 1 for '-' sign + ArrowDecimalSetDigits(&decimal_min_128, min_digits); + + const std::vector> values = { + &decimal1, &decimal2, &decimal_zero, + &decimal_large, &decimal_max_128, &decimal_min_128}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + constexpr size_t buf_size = sizeof(kTestPgCopyNumericNegScale2) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericNegScale2[i]) << " at position " << i; + } +} + using TimestampTestParamType = std::tuple>>; diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index 7a1dd48802..dfcfff40e2 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -241,17 +241,28 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { char decimal_string[max_decimal_digits_ + 1]; int total_digits = DecimalToString(&decimal, decimal_string); - const int n_int_digits = total_digits > scale_ ? total_digits - scale_ : 0; + // Handle negative scale by appending zeros + int effective_scale = scale_; + if (scale_ < 0) { + int zeros_to_append = -scale_; + memset(decimal_string + total_digits, '0', zeros_to_append); + total_digits += zeros_to_append; + decimal_string[total_digits] = '\0'; + effective_scale = 0; + } + + const int n_int_digits = + total_digits > effective_scale ? total_digits - effective_scale : 0; int n_frac_digits = total_digits > n_int_digits ? total_digits - n_int_digits : 0; std::string_view decimal_string_view(decimal_string, total_digits); std::string_view int_part = decimal_string_view.substr(0, n_int_digits); std::string frac_part_str; - if (n_int_digits == 0 && total_digits < scale_) { - frac_part_str.assign(scale_ - total_digits, '0'); + if (n_int_digits == 0 && total_digits < effective_scale) { + frac_part_str.assign(effective_scale - total_digits, '0'); frac_part_str.append(decimal_string, total_digits); - n_frac_digits = scale_; + n_frac_digits = effective_scale; } else { frac_part_str.assign(decimal_string_view.substr(n_int_digits, n_frac_digits)); } @@ -295,8 +306,8 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { i = 0; bool skip_leading_zeros = (n_int_digits == 0); - while (i < (int)frac_part.length()) { - int chunk_size = std::min((int)frac_part.length() - i, kDecDigits); + while (i < static_cast(frac_part.length())) { + int chunk_size = std::min(static_cast(frac_part.length()) - i, kDecDigits); std::string chunk_str(frac_part.substr(i, chunk_size)); // Pad the last group on the RIGHT if it's less than 4 digits @@ -315,7 +326,7 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } // Calculate dscale by removing trailing zeros - dscale = scale_ - actual_trailing_zeros; + dscale = effective_scale - actual_trailing_zeros; // Trim trailing full zero digit groups from fractional part // (these zeros are already accounted for in actual_trailing_zeros) @@ -327,6 +338,10 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { // If all fractional digits were removed, dscale should be 0 if (static_cast(pg_digits.size()) <= n_int_digit_groups) { dscale = 0; + // For zero (no digits at all), use canonical weight=0 + if (pg_digits.empty()) { + weight = 0; + } } if (dscale < 0) dscale = 0; @@ -376,10 +391,9 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { for (size_t i = 0; i < DEC_WIDTH; i++) { int carry; - carry = (buf[nwords - 1] >= 0x7FFFFFFFFFFFFFFF); + carry = (buf[nwords - 1] > 0x7FFFFFFFFFFFFFFF); for (size_t j = nwords - 1; j > 0; j--) { - buf[j] = - ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j - 1] >= 0x7FFFFFFFFFFFFFFF); + buf[j] = ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j - 1] > 0x7FFFFFFFFFFFFFFF); } buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF); From 000d48813a183ac68ac8582124558b8168d43ee8 Mon Sep 17 00:00:00 2001 From: Mandukhai Alimaa <114253933+Mandukhai-Alimaa@users.noreply.github.com> Date: Wed, 7 Jan 2026 19:56:15 -0600 Subject: [PATCH 4/8] Update c/driver/postgresql/copy/writer.h Co-authored-by: David Li --- c/driver/postgresql/copy/writer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index dfcfff40e2..fa20d22028 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -245,7 +245,7 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { int effective_scale = scale_; if (scale_ < 0) { int zeros_to_append = -scale_; - memset(decimal_string + total_digits, '0', zeros_to_append); + std::memset(decimal_string + total_digits, '0', zeros_to_append); total_digits += zeros_to_append; decimal_string[total_digits] = '\0'; effective_scale = 0; From 17702e54cad9c91a15a44860d7d0028fe13846a9 Mon Sep 17 00:00:00 2001 From: Mandukhai Alimaa <114253933+Mandukhai-Alimaa@users.noreply.github.com> Date: Wed, 7 Jan 2026 19:56:24 -0600 Subject: [PATCH 5/8] Update c/driver/postgresql/copy/writer.h Co-authored-by: David Li --- c/driver/postgresql/copy/writer.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index fa20d22028..2acc4c5000 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -236,6 +236,9 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { constexpr int kDecDigits = 4; std::vector pg_digits; int16_t weight; + // "decimal scale". Number of digits after the decimal point (>=0) + // dscale may be more than the actual number of stored digits, + // implying there are significant zeroes that were not stored int16_t dscale; char decimal_string[max_decimal_digits_ + 1]; From 92d81f2408c4e50bd72eecbcda607c4e583cc7d2 Mon Sep 17 00:00:00 2001 From: Mandukhai Alimaa <114253933+Mandukhai-Alimaa@users.noreply.github.com> Date: Wed, 7 Jan 2026 19:56:30 -0600 Subject: [PATCH 6/8] Update c/driver/postgresql/copy/writer.h Co-authored-by: David Li --- c/driver/postgresql/copy/writer.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index 2acc4c5000..193351bbe3 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -235,6 +235,8 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { // Number of decimal digits per Postgres digit constexpr int kDecDigits = 4; std::vector pg_digits; + // There are `weight + 1` base 10000 digits before the decimal point + // (may be negative) int16_t weight; // "decimal scale". Number of digits after the decimal point (>=0) // dscale may be more than the actual number of stored digits, From cfca30fb043c2485d95c767227725a0bc48eb0ae Mon Sep 17 00:00:00 2001 From: MANDY Alimaa Date: Wed, 7 Jan 2026 23:04:45 -0600 Subject: [PATCH 7/8] make the code easier to follow by using helper functions and adding comments --- c/driver/postgresql/copy/writer.h | 338 +++++++++++------- .../validation/queries/ingest/decimal.toml | 2 +- .../validation/queries/type/bind/decimal.toml | 2 +- 3 files changed, 219 insertions(+), 123 deletions(-) diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index 193351bbe3..90c8f7ca14 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -225,6 +225,39 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { PostgresCopyNumericFieldWriter(int32_t precision, int32_t scale) : precision_{precision}, scale_{scale} {} + // PostgreSQL NUMERIC Binary Format: + // =================================== + // PostgreSQL stores NUMERIC values in a variable-length binary format: + // - ndigits (int16): Number of base-10000 digits stored + // - weight (int16): Position of the first digit group relative to decimal point + // (weight can be negative for small fractional numbers) + // - sign (int16): kNumericPos (0x0000) or kNumericNeg (0x4000) + // - dscale (int16): Number of decimal digits after the decimal point (display scale) + // - digits[]: Array of int16 values, each 0-9999 (base-10000 representation) + // + // Value calculation: sum(digits[i] * 10000^(weight - i)) * 10^(-dscale) + // + // Example 1: 12300 (from Arrow Decimal value=123, scale=-2) + // - Logical representation: "12300" + // - Grouped in base-10000: [1][2300] + // - ndigits=2, weight=1, sign=0x0000, dscale=0, digits=[1, 2300] + // - Calculation: 1*10000^1 + 2300*10000^0 = 10000 + 2300 = 12300 + // + // Example 2: 123.45 (from Arrow Decimal value=12345, scale=2) + // - Logical representation: "123.45" + // - Integer part "123", fractional part "45" + // - Grouped in base-10000: [123][4500] (fractional part right-padded) + // - ndigits=2, weight=0, sign=0x0000, dscale=2, digits=[123, 4500] + // - Calculation: 123*10000^0 + 4500*10000^(-1) = 123 + 0.45 = 123.45 + // + // Example 3: 0.00123 (from Arrow Decimal value=123, scale=5) + // - Logical representation: "0.00123" + // - Integer part "0", fractional part "00123" + // - Grouped in base-10000: [123] (leading zeros skipped via negative weight) + // - ndigits=1, weight=-1, sign=0x0000, dscale=5, digits=[123] + // - Calculation: 123*10000^(-1) * 10^0 = 0.0123, but dscale=5 means display as + // 0.00123 + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { struct ArrowDecimal decimal; ArrowDecimalInit(&decimal, bitwidth_, precision_, scale_); @@ -232,138 +265,80 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; - // Number of decimal digits per Postgres digit - constexpr int kDecDigits = 4; - std::vector pg_digits; - // There are `weight + 1` base 10000 digits before the decimal point - // (may be negative) - int16_t weight; - // "decimal scale". Number of digits after the decimal point (>=0) - // dscale may be more than the actual number of stored digits, - // implying there are significant zeroes that were not stored - int16_t dscale; - - char decimal_string[max_decimal_digits_ + 1]; - int total_digits = DecimalToString(&decimal, decimal_string); - - // Handle negative scale by appending zeros - int effective_scale = scale_; - if (scale_ < 0) { - int zeros_to_append = -scale_; - std::memset(decimal_string + total_digits, '0', zeros_to_append); - total_digits += zeros_to_append; - decimal_string[total_digits] = '\0'; - effective_scale = 0; - } - - const int n_int_digits = - total_digits > effective_scale ? total_digits - effective_scale : 0; - int n_frac_digits = total_digits > n_int_digits ? total_digits - n_int_digits : 0; - - std::string_view decimal_string_view(decimal_string, total_digits); - std::string_view int_part = decimal_string_view.substr(0, n_int_digits); - - std::string frac_part_str; - if (n_int_digits == 0 && total_digits < effective_scale) { - frac_part_str.assign(effective_scale - total_digits, '0'); - frac_part_str.append(decimal_string, total_digits); - n_frac_digits = effective_scale; - } else { - frac_part_str.assign(decimal_string_view.substr(n_int_digits, n_frac_digits)); - } - std::string_view frac_part(frac_part_str); - - // Count trailing zeros in the fractional part to minimize dscale - int actual_trailing_zeros = 0; - for (int j = frac_part.length() - 1; j >= 0 && frac_part[j] == '0'; j--) { - actual_trailing_zeros++; - } - - // Group integer part - int i = int_part.length(); - std::vector int_digits; - int n_int_digit_groups = 0; - if (i > 0) { - // Calculate weight based on original integer length - weight = (i + kDecDigits - 1) / kDecDigits - 1; - - while (i > 0) { - int chunk_size = std::min(i, kDecDigits); - std::string_view chunk = int_part.substr(i - chunk_size, chunk_size); - int16_t val{}; - std::from_chars(chunk.data(), chunk.data() + chunk.size(), val); - // Skip trailing zeros in integer part (which appear first when processing - // right-to-left) - if (val != 0 || !int_digits.empty()) { - int_digits.insert(int_digits.begin(), val); - } - i -= chunk_size; - } - n_int_digit_groups = int_digits.size(); - pg_digits.insert(pg_digits.end(), int_digits.begin(), int_digits.end()); - } else { - weight = -1; - n_int_digit_groups = 0; - } - - // Group fractional part - // Chunk in 4-digit groups, padding the LAST group on the right if needed - i = 0; - bool skip_leading_zeros = (n_int_digits == 0); - - while (i < static_cast(frac_part.length())) { - int chunk_size = std::min(static_cast(frac_part.length()) - i, kDecDigits); - std::string chunk_str(frac_part.substr(i, chunk_size)); - - // Pad the last group on the RIGHT if it's less than 4 digits - chunk_str.resize(kDecDigits, '0'); - - int16_t val{}; - std::from_chars(chunk_str.data(), chunk_str.data() + chunk_str.size(), val); - - if (skip_leading_zeros && val == 0) { - weight--; - } else { - pg_digits.push_back(val); - skip_leading_zeros = false; - } - i += chunk_size; - } - - // Calculate dscale by removing trailing zeros - dscale = effective_scale - actual_trailing_zeros; - - // Trim trailing full zero digit groups from fractional part - // (these zeros are already accounted for in actual_trailing_zeros) - while (static_cast(pg_digits.size()) > n_int_digit_groups && - pg_digits.back() == 0) { - pg_digits.pop_back(); - } - - // If all fractional digits were removed, dscale should be 0 - if (static_cast(pg_digits.size()) <= n_int_digit_groups) { + // Convert decimal to string and split into integer/fractional parts + // Example transformation for Arrow Decimal(value=12345, scale=2) representing 123.45: + // Input: decimal.value = 12345, scale_ = 2 + // After DecimalToString: raw_decimal_string = "12345", original_digits = 5 + // After SplitDecimalParts: parts.integer_part = "123" + // parts.fractional_part = "45" + // parts.effective_scale = 2 + char raw_decimal_string[max_decimal_digits_ + 1]; + int original_digits = DecimalToString(&decimal, raw_decimal_string); + DecimalParts parts = SplitDecimalParts(raw_decimal_string, original_digits, scale_); + + // Group into PostgreSQL base-10000 representation + // After GroupIntegerDigits: int_digits = [123], weight = 0 + // (groups "123" right-to-left: "123" → 123, only 1 group so weight = 0) + auto [int_digits, weight] = GroupIntegerDigits(parts.integer_part); + + // After GroupFractionalDigits: frac_digits = [4500], final_weight = 0 + // (groups "45" left-to-right with right-padding: "45" → "4500" → 4500) + auto [frac_digits, final_weight] = + GroupFractionalDigits(parts.fractional_part, weight, !parts.integer_part.empty()); + + // Combine digit arrays + // After combining: all_digits = [123, 4500] + std::vector all_digits = int_digits; + all_digits.insert(all_digits.end(), frac_digits.begin(), frac_digits.end()); + + // Calculate display scale by counting trailing zeros in the DECIMAL STRING + // For our example: frac_part="45" has 0 trailing zeros, effective_scale=2 + // So dscale = 2 - 0 = 2 (2 fractional digits to display) + int trailing_zeros = 0; + for (int j = parts.fractional_part.length() - 1; + j >= 0 && parts.fractional_part[j] == '0'; j--) { + trailing_zeros++; + } + int16_t dscale = std::max(0, parts.effective_scale - trailing_zeros); + + // Optimize: remove trailing zero digit groups from fractional part + int n_int_digit_groups = int_digits.size(); + while (static_cast(all_digits.size()) > n_int_digit_groups && + all_digits.back() == 0) { + all_digits.pop_back(); + } + + // Handle zero special case + if (all_digits.empty()) { + final_weight = 0; + dscale = 0; + } else if (static_cast(all_digits.size()) <= n_int_digit_groups) { + // All fractional digits were removed dscale = 0; - // For zero (no digits at all), use canonical weight=0 - if (pg_digits.empty()) { - weight = 0; - } } if (dscale < 0) dscale = 0; - int16_t ndigits = pg_digits.size(); - int32_t field_size_bytes = sizeof(ndigits) + sizeof(weight) + sizeof(sign) + + // Write PostgreSQL NUMERIC binary format to buffer + // Final values for our example: ndigits = 2 + // final_weight = 0 + // sign = 0x0000 + // dscale = 2 + // digits = [123, 4500] + // Binary output represents: 123 * 10000^0 + 4500 * 10000^(-1) = 123 + 0.45 = 123.45 + int16_t ndigits = all_digits.size(); + int32_t field_size_bytes = sizeof(ndigits) + sizeof(final_weight) + sizeof(sign) + sizeof(dscale) + ndigits * sizeof(int16_t); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ndigits, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, weight, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, final_weight, error)); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, sign, error)); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); - const size_t pg_digit_bytes = sizeof(int16_t) * pg_digits.size(); + const size_t pg_digit_bytes = sizeof(int16_t) * all_digits.size(); NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes)); - for (auto pg_digit : pg_digits) { + for (auto pg_digit : all_digits) { WriteUnsafe(buffer, pg_digit); } @@ -371,9 +346,19 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } private: - // returns the length of the string + // Helper struct for organizing data flow between functions + struct DecimalParts { + std::string integer_part; // e.g., "12300" or "123" + std::string fractional_part; // e.g., "45" or "00123" + int effective_scale; // Scale after handling negative values + }; + + // Helper function implementations for decimal-to-PostgreSQL NUMERIC conversion + + // Convert decimal to string (absolute value, no sign) + // Returns the length of the string template - int DecimalToString(struct ArrowDecimal* decimal, char* out) { + int DecimalToString(struct ArrowDecimal* decimal, char* out) const { constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; uint8_t tmp[DEC_WIDTH / 8]; ArrowDecimalGetBytes(decimal, tmp); @@ -423,6 +408,117 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { return ndigits; } + DecimalParts SplitDecimalParts(const char* decimal_digits, int digit_count, + int scale) const { + // Virtual zeros represent the logical zeros appended for negative scale + // Example: value=123, scale=-2 → "123" with 2 virtual zeros = "12300" + const int virtual_zeros = (scale < 0) ? -scale : 0; + const int effective_scale = (scale < 0) ? 0 : scale; + const int total_logical_digits = digit_count + virtual_zeros; + + // Calculate split point + const int n_int_digits = total_logical_digits > effective_scale + ? total_logical_digits - effective_scale + : 0; + const int n_frac_digits = total_logical_digits - n_int_digits; + + DecimalParts parts; + parts.effective_scale = effective_scale; + + // Extract integer part + if (n_int_digits > 0) { + if (n_int_digits <= digit_count) { + // Integer part is within the original digits + parts.integer_part.assign(decimal_digits, n_int_digits); + } else { + // Integer part includes all original digits + virtual zeros + parts.integer_part.assign(decimal_digits, digit_count); + parts.integer_part.append(virtual_zeros, '0'); + } + } + + // Extract fractional part (only exists if scale > 0) + if (n_int_digits == 0 && total_logical_digits < effective_scale) { + // Small fractional: 0.00123 needs leading zeros + parts.fractional_part.assign(effective_scale - total_logical_digits, '0'); + parts.fractional_part.append(decimal_digits, digit_count); + } else if (n_frac_digits > 0 && n_int_digits < digit_count) { + // Fractional part from remaining digits (virtual zeros don't appear in fractional + // part) + parts.fractional_part.assign(decimal_digits + n_int_digits, + digit_count - n_int_digits); + } + + return parts; + } + + std::pair, int16_t> GroupIntegerDigits( + const std::string& int_part) const { + constexpr int kDecDigits = 4; + std::vector digits; + + if (int_part.empty()) { + return {digits, -1}; // weight = -1 for pure fractional numbers + } + + // Calculate weight: ceil(length / 4) - 1 + int16_t weight = (int_part.length() + kDecDigits - 1) / kDecDigits - 1; + + // Group right-to-left in chunks of 4 + int i = int_part.length(); + while (i > 0) { + int chunk_size = std::min(i, kDecDigits); + std::string_view chunk = + std::string_view(int_part).substr(i - chunk_size, chunk_size); + + int16_t val{}; + std::from_chars(chunk.data(), chunk.data() + chunk.size(), val); + + // Skip trailing zeros + if (val != 0 || !digits.empty()) { + digits.insert(digits.begin(), val); + } + i -= chunk_size; + } + + return {digits, weight}; + } + + std::pair, int16_t> GroupFractionalDigits( + const std::string& frac_part, int16_t initial_weight, bool has_integer_part) const { + constexpr int kDecDigits = 4; + std::vector digits; + int16_t weight = initial_weight; + + if (frac_part.empty()) { + return {digits, weight}; + } + + bool skip_leading_zeros = !has_integer_part; + + // Group left-to-right in chunks of 4, right-padding last chunk + for (size_t i = 0; i < frac_part.length(); i += kDecDigits) { + int chunk_size = std::min(kDecDigits, static_cast(frac_part.length() - i)); + std::string chunk_str = frac_part.substr(i, chunk_size); + + // Right-pad to 4 digits (e.g., "45" → "4500") + chunk_str.resize(kDecDigits, '0'); + + int16_t val{}; + std::from_chars(chunk_str.data(), chunk_str.data() + chunk_str.size(), val); + + if (skip_leading_zeros && val == 0) { + // Skip leading zero groups in fractional part (e.g., 0.0012 → skip "0012") + weight--; + } else { + digits.push_back(val); + skip_leading_zeros = false; + } + } + + return {digits, weight}; + } + static constexpr uint16_t kNumericPos = 0x0000; static constexpr uint16_t kNumericNeg = 0x4000; static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128 : 256; diff --git a/c/driver/postgresql/validation/queries/ingest/decimal.toml b/c/driver/postgresql/validation/queries/ingest/decimal.toml index 0f154e4b41..5d4d6fe655 100644 --- a/c/driver/postgresql/validation/queries/ingest/decimal.toml +++ b/c/driver/postgresql/validation/queries/ingest/decimal.toml @@ -16,4 +16,4 @@ # under the License. -skip = "decimal ingest code has a bug and fix has not been merged in yet. https://github.com/apache/arrow-adbc/pull/3787" +skip = "AssertionError: Field types do not match: assert Decimal128Type(decimal128(10, 2)) == OpaqueType(extension)" diff --git a/c/driver/postgresql/validation/queries/type/bind/decimal.toml b/c/driver/postgresql/validation/queries/type/bind/decimal.toml index 0f154e4b41..5d4d6fe655 100644 --- a/c/driver/postgresql/validation/queries/type/bind/decimal.toml +++ b/c/driver/postgresql/validation/queries/type/bind/decimal.toml @@ -16,4 +16,4 @@ # under the License. -skip = "decimal ingest code has a bug and fix has not been merged in yet. https://github.com/apache/arrow-adbc/pull/3787" +skip = "AssertionError: Field types do not match: assert Decimal128Type(decimal128(10, 2)) == OpaqueType(extension)" From 9fb34ea0135ee8b2a7e19d58ffd464aaecf15d34 Mon Sep 17 00:00:00 2001 From: MANDY Alimaa Date: Thu, 8 Jan 2026 15:23:31 -0600 Subject: [PATCH 8/8] override the decimal test expected schema --- .../validation/queries/ingest/decimal.toml | 19 -------- .../validation/queries/ingest/decimal.txtcase | 45 +++++++++++++++++++ .../validation/queries/type/bind/decimal.toml | 19 -------- .../queries/type/bind/decimal.txtcase | 32 +++++++++++++ 4 files changed, 77 insertions(+), 38 deletions(-) delete mode 100644 c/driver/postgresql/validation/queries/ingest/decimal.toml create mode 100644 c/driver/postgresql/validation/queries/ingest/decimal.txtcase delete mode 100644 c/driver/postgresql/validation/queries/type/bind/decimal.toml create mode 100644 c/driver/postgresql/validation/queries/type/bind/decimal.txtcase diff --git a/c/driver/postgresql/validation/queries/ingest/decimal.toml b/c/driver/postgresql/validation/queries/ingest/decimal.toml deleted file mode 100644 index 5d4d6fe655..0000000000 --- a/c/driver/postgresql/validation/queries/ingest/decimal.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -skip = "AssertionError: Field types do not match: assert Decimal128Type(decimal128(10, 2)) == OpaqueType(extension)" diff --git a/c/driver/postgresql/validation/queries/ingest/decimal.txtcase b/c/driver/postgresql/validation/queries/ingest/decimal.txtcase new file mode 100644 index 0000000000..81bc7bf871 --- /dev/null +++ b/c/driver/postgresql/validation/queries/ingest/decimal.txtcase @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// part: expected_schema +{ + "format": "+s", + "children": [ + { + "name": "idx", + "format": "l", + "flags": ["nullable"] + }, + { + "name": "value", + "format": "u", + "flags": ["nullable"], + "metadata": { + "ARROW:extension:name": "arrow.opaque", + "ARROW:extension:metadata": "{\"type_name\": \"numeric\", \"vendor_name\": \"PostgreSQL\"}" + } + } + ] +} + +// part: expected + +{"idx": 0, "value": "0"} +{"idx": 1, "value": "123.45"} +{"idx": 2, "value": "-123.45"} +{"idx": 3, "value": "9999999.99"} +{"idx": 4, "value": "-9999999.99"} diff --git a/c/driver/postgresql/validation/queries/type/bind/decimal.toml b/c/driver/postgresql/validation/queries/type/bind/decimal.toml deleted file mode 100644 index 5d4d6fe655..0000000000 --- a/c/driver/postgresql/validation/queries/type/bind/decimal.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -skip = "AssertionError: Field types do not match: assert Decimal128Type(decimal128(10, 2)) == OpaqueType(extension)" diff --git a/c/driver/postgresql/validation/queries/type/bind/decimal.txtcase b/c/driver/postgresql/validation/queries/type/bind/decimal.txtcase new file mode 100644 index 0000000000..aac33f2697 --- /dev/null +++ b/c/driver/postgresql/validation/queries/type/bind/decimal.txtcase @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// part: expected_schema +{ + "format": "+s", + "children": [ + { + "name": "res", + "format": "u", + "flags": ["nullable"], + "metadata": { + "ARROW:extension:name": "arrow.opaque", + "ARROW:extension:metadata": "{\"type_name\": \"numeric\", \"vendor_name\": \"PostgreSQL\"}" + } + } + ] +}