diff --git a/c/driver/postgresql/copy/postgres_copy_writer_test.cc b/c/driver/postgresql/copy/postgres_copy_writer_test.cc index cd8cb30083..f38bb686c3 100644 --- a/c/driver/postgresql/copy/postgres_copy_writer_test.cc +++ b/c/driver/postgresql/copy/postgres_copy_writer_test.cc @@ -435,18 +435,32 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteTime) { // This buffer is similar to the read variant above but removes special values // nan, ±inf as they are not supported via the Arrow Decimal types -// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456), -// ('0.00001234'), (1.0000), (123.456), (1000000)) AS drvd(col)) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (NULL), (999999999999999999999999999999.99999999), +// (-999999999999999999999999999999.99999999), +// (0), (1234), (92233720368.54775807), (-92233720368.54775808), +// (-123.456), ('0.00001234'), (1), (123.456), (1000000)) AS drvd(col)) // TO STDOUT WITH (FORMAT binary); static uint8_t kTestPgCopyNumericWrite[] = { 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x00, - 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, - 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, - 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff}; + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x63, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x40, + 0x00, 0x00, 0x08, 0x00, 0x63, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xd2, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x03, 0x9a, 0x0d, + 0x2c, 0x01, 0x70, 0x15, 0x65, 0x16, 0xaf, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, + 0x05, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x03, 0x9a, 0x0d, 0x2c, 0x01, 0x70, 0x15, + 0x65, 0x16, 0xb0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, + 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, + 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x64, 0xff, 0xff}; TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) { adbc_validation::Handle schema; @@ -462,20 +476,52 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) { struct ArrowDecimal decimal3; struct ArrowDecimal decimal4; struct ArrowDecimal decimal5; - - ArrowDecimalInit(&decimal1, size, 19, 8); + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_no_frac; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; + + ArrowDecimalInit(&decimal1, size, precision, scale); ArrowDecimalSetInt(&decimal1, -12345600000); - ArrowDecimalInit(&decimal2, size, 19, 8); + ArrowDecimalInit(&decimal2, size, precision, scale); ArrowDecimalSetInt(&decimal2, 1234); - ArrowDecimalInit(&decimal3, size, 19, 8); + ArrowDecimalInit(&decimal3, size, precision, scale); ArrowDecimalSetInt(&decimal3, 100000000); - ArrowDecimalInit(&decimal4, size, 19, 8); + ArrowDecimalInit(&decimal4, size, precision, scale); ArrowDecimalSetInt(&decimal4, 12345600000); - ArrowDecimalInit(&decimal5, size, 19, 8); + ArrowDecimalInit(&decimal5, size, precision, scale); ArrowDecimalSetInt(&decimal5, 100000000000000); + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + ArrowDecimalSetInt(&decimal_no_frac, 123400000000LL); // 1234 * 10^8 + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits_8; + max_digits_8.data = "99999999999999999999999999999999999999"; + max_digits_8.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits_8); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits_8; + min_digits_8.data = "-99999999999999999999999999999999999999"; + min_digits_8.size_bytes = 39; + ArrowDecimalSetDigits(&decimal_min_128, min_digits_8); + const std::vector> values = { - std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5}; + std::nullopt, &decimal_max_128, &decimal_min_128, &decimal_zero, + &decimal_no_frac, &decimal_max_64, &decimal_min_64, &decimal1, + &decimal2, &decimal3, &decimal4, &decimal5}; ArrowSchemaInit(&schema.value); ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); @@ -500,6 +546,514 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) { } } +// Regression test for bug where 44.123456 with Decimal(10,6) became 4412.345500 +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (99999999999999999999999999999999.999999), +// (-99999999999999999999999999999999.999999), +// (0), (1000000000000), (9223372036854.775807), (-9223372036854.775808), +// (44.123456), (0.123456), (123.456789)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericScale6[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x26, 0xac, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x40, 0x00, 0x00, 0x06, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x26, 0xac, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x06, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x09, 0x08, 0xb9, 0x1c, 0x23, 0x1a, 0xc6, 0x1e, + 0x4e, 0x02, 0xbc, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x06, 0x00, 0x03, 0x40, + 0x00, 0x00, 0x06, 0x00, 0x09, 0x08, 0xb9, 0x1c, 0x23, 0x1a, 0xc6, 0x1e, 0x4e, 0x03, + 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x2c, 0x04, 0xd2, 0x15, 0xe0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, + 0x02, 0xff, 0xff, 0x00, 0x00, 0x00, 0x06, 0x04, 0xd2, 0x15, 0xe0, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x7b, 0x11, + 0xd7, 0x22, 0xc4, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale6) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 6; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_no_frac; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 44123456); + + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, 123456); + + ArrowDecimalInit(&decimal3, size, precision, scale); + ArrowDecimalSetInt(&decimal3, 123456789); + + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + ArrowDecimalSetInt(&decimal_no_frac, 1000000000000000000LL); + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits; + max_digits.data = "99999999999999999999999999999999999999"; + max_digits.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits; + min_digits.data = "-99999999999999999999999999999999999999"; + min_digits.size_bytes = 39; // 38 digits + 1 for '-' sign + ArrowDecimalSetDigits(&decimal_min_128, min_digits); + + const std::vector> values = { + &decimal_max_128, &decimal_min_128, &decimal_zero, + &decimal_no_frac, &decimal_max_64, &decimal_min_64, + &decimal1, &decimal2, &decimal3}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + + constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale6) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale6[i]) << " at position " << i; + } +} + +// Test for scale=5 (remainder 1 when divided by 4) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (999999999999999999999999999999999.99999), +// (-999999999999999999999999999999999.99999), +// (0), (10000000000000), (92233720368547.75807), (-92233720368547.75808), +// (12.34567), (-9.87654), (0.00123)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericScale5[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x0b, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x05, 0x00, 0x09, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x23, 0x28, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x0b, 0x00, 0x08, 0x40, 0x00, 0x00, 0x05, 0x00, + 0x09, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x23, 0x28, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x06, 0x00, 0x03, 0x00, 0x00, 0x00, 0x05, 0x00, 0x5c, 0x09, 0x21, 0x07, + 0xf4, 0x21, 0x63, 0x1d, 0x9c, 0x1b, 0x58, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, + 0x06, 0x00, 0x03, 0x40, 0x00, 0x00, 0x05, 0x00, 0x5c, 0x09, 0x21, 0x07, 0xf4, 0x21, + 0x63, 0x1d, 0x9c, 0x1f, 0x40, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x0c, 0x0d, 0x80, 0x1b, 0x58, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x40, 0x00, 0x00, 0x05, 0x00, 0x09, 0x22, + 0x3d, 0x0f, 0xa0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0xff, 0xff, 0x00, + 0x00, 0x00, 0x05, 0x00, 0x0c, 0x0b, 0xb8, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale5) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 5; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_no_frac; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 1234567); + + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, -987654); + + ArrowDecimalInit(&decimal3, size, precision, scale); + ArrowDecimalSetInt(&decimal3, 123); + + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + ArrowDecimalSetInt(&decimal_no_frac, 1000000000000000000LL); + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits_5; + max_digits_5.data = "99999999999999999999999999999999999999"; + max_digits_5.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits_5); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits_5; + min_digits_5.data = "-99999999999999999999999999999999999999"; + min_digits_5.size_bytes = 39; + ArrowDecimalSetDigits(&decimal_min_128, min_digits_5); + + const std::vector> values = { + &decimal_max_128, &decimal_min_128, &decimal_zero, + &decimal_no_frac, &decimal_max_64, &decimal_min_64, + &decimal1, &decimal2, &decimal3}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale5) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale5[i]) << " at position " << i; + } +} + +// Test for scale=7 (remainder 3 when divided by 4) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (9999999999999999999999999999999.9999999), +// (-9999999999999999999999999999999.9999999), +// (0), (1000), (922337203685.4775807), (-922337203685.4775808), +// (5.1234567), (-123.456789), (0.0000001)) AS drvd(col)) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericScale7[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x07, 0x03, 0xe7, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x06, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x40, 0x00, 0x00, 0x07, 0x03, 0xe7, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x03, 0xe8, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x07, 0x24, 0x07, 0x0e, 0x88, 0x0e, 0x65, 0x12, 0xa7, 0x1f, + 0x86, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, 0x02, 0x40, 0x00, 0x00, + 0x07, 0x24, 0x07, 0x0e, 0x88, 0x0e, 0x65, 0x12, 0xa7, 0x1f, 0x90, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x05, 0x04, + 0xd2, 0x16, 0x26, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x40, + 0x00, 0x00, 0x06, 0x00, 0x7b, 0x11, 0xd7, 0x22, 0xc4, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x07, 0x00, 0x0a, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale7) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 7; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_no_frac; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 51234567); + + // This represents -123.456789, but NUMERIC(10,7) will display it as -123.4567890 + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, -1234567890); + + // 0.0000001 with scale=7 -> internal value: 1 + ArrowDecimalInit(&decimal3, size, precision, scale); + ArrowDecimalSetInt(&decimal3, 1); + + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + ArrowDecimalSetInt(&decimal_no_frac, 10000000000LL); // 1000 * 10^7 (1000.0000000) + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits_7; + max_digits_7.data = "99999999999999999999999999999999999999"; + max_digits_7.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits_7); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits_7; + min_digits_7.data = "-99999999999999999999999999999999999999"; + min_digits_7.size_bytes = 39; + ArrowDecimalSetDigits(&decimal_min_128, min_digits_7); + + const std::vector> values = { + &decimal_max_128, &decimal_min_128, &decimal_zero, + &decimal_no_frac, &decimal_max_64, &decimal_min_64, + &decimal1, &decimal2, &decimal3}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale7) - 2; + + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale7[i]) << " at position " << i; + } +} + +// Test for scale=0 (integers) +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (99999999999999999999999999999999999999), +// (-99999999999999999999999999999999999999), +// (0), (1000000000000000000000000000000000), (9223372036854775807), +// (-9223372036854775808), (1), (100), (1000), (-100000)) AS drvd(col)) +// TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericScale0[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x09, 0x40, 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x08, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x9a, 0x0d, 0x2c, 0x01, 0x70, 0x15, 0x65, 0x16, + 0xaf, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, 0x04, 0x40, 0x00, 0x00, + 0x00, 0x03, 0x9a, 0x0d, 0x2c, 0x01, 0x70, 0x15, 0x65, 0x16, 0xb0, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0xe8, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x01, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale0) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = 0; + + struct ArrowDecimal decimal0; + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal3; + struct ArrowDecimal decimal4; + struct ArrowDecimal decimal_max_64; + struct ArrowDecimal decimal_min_64; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; + struct ArrowDecimal decimal_no_frac; + + ArrowDecimalInit(&decimal0, size, precision, scale); + ArrowDecimalSetInt(&decimal0, 0); + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 1); + + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, 100); + + ArrowDecimalInit(&decimal3, size, precision, scale); + ArrowDecimalSetInt(&decimal3, 1000); + + ArrowDecimalInit(&decimal4, size, precision, scale); + ArrowDecimalSetInt(&decimal4, -100000); + + ArrowDecimalInit(&decimal_max_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_min_64, size, precision, scale); + ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1); + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits_0; + max_digits_0.data = "99999999999999999999999999999999999999"; + max_digits_0.size_bytes = 38; + ArrowDecimalSetDigits(&decimal_max_128, max_digits_0); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits_0; + min_digits_0.data = "-99999999999999999999999999999999999999"; + min_digits_0.size_bytes = 39; + ArrowDecimalSetDigits(&decimal_min_128, min_digits_0); + + ArrowDecimalInit(&decimal_no_frac, size, precision, scale); + struct ArrowStringView no_frac_digits_0; + no_frac_digits_0.data = "1000000000000000000000000000000000"; + no_frac_digits_0.size_bytes = 34; + ArrowDecimalSetDigits(&decimal_no_frac, no_frac_digits_0); + + const std::vector> values = { + &decimal_max_128, &decimal_min_128, &decimal0, &decimal_no_frac, &decimal_max_64, + &decimal_min_64, &decimal1, &decimal2, &decimal3, &decimal4}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale0) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale0[i]) << " at position " << i; + } +} + +// Test negative scale +// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES +// (12300), (-12300), (0), (922337203685477580700), +// (99999999999999999999999999999999999900), +// (-99999999999999999999999999999999999900)) +// AS drvd(col)) TO STDOUT WITH (FORMAT binary); +static uint8_t kTestPgCopyNumericNegScale2[] = { + 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0xfc, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0xfc, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x08, 0xb9, 0x1c, 0x23, 0x1a, 0xc6, 0x1e, 0x4e, 0x02, 0xbc, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x26, 0xac, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x09, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, + 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x26, 0xac, 0xff, 0xff}; + +TEST_F(PostgresCopyTest, PostgresCopyWriteNumericNegativeScale) { + adbc_validation::Handle schema; + adbc_validation::Handle array; + struct ArrowError na_error; + constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128; + constexpr int32_t size = 128; + constexpr int32_t precision = 38; + constexpr int32_t scale = -2; + + struct ArrowDecimal decimal1; + struct ArrowDecimal decimal2; + struct ArrowDecimal decimal_zero; + struct ArrowDecimal decimal_large; + struct ArrowDecimal decimal_max_128; + struct ArrowDecimal decimal_min_128; + + ArrowDecimalInit(&decimal1, size, precision, scale); + ArrowDecimalSetInt(&decimal1, 123); + + ArrowDecimalInit(&decimal2, size, precision, scale); + ArrowDecimalSetInt(&decimal2, -123); + + ArrowDecimalInit(&decimal_zero, size, precision, scale); + ArrowDecimalSetInt(&decimal_zero, 0); + + ArrowDecimalInit(&decimal_large, size, precision, scale); + ArrowDecimalSetInt(&decimal_large, 9223372036854775807LL); + + ArrowDecimalInit(&decimal_max_128, size, precision, scale); + struct ArrowStringView max_digits; + max_digits.data = "999999999999999999999999999999999999"; + max_digits.size_bytes = 36; + ArrowDecimalSetDigits(&decimal_max_128, max_digits); + + ArrowDecimalInit(&decimal_min_128, size, precision, scale); + struct ArrowStringView min_digits; + min_digits.data = "-999999999999999999999999999999999999"; + min_digits.size_bytes = 37; // 36 digits + 1 for '-' sign + ArrowDecimalSetDigits(&decimal_min_128, min_digits); + + const std::vector> values = { + &decimal1, &decimal2, &decimal_zero, + &decimal_large, &decimal_max_128, &decimal_min_128}; + + ArrowSchemaInit(&schema.value); + ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0); + ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type, precision, scale), + 0); + ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0); + ASSERT_EQ(adbc_validation::MakeBatch(&schema.value, &array.value, + &na_error, values), + ADBC_STATUS_OK); + + PostgresCopyStreamWriteTester tester; + ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_), NANOARROW_OK); + ASSERT_EQ(tester.WriteAll(nullptr), ENODATA); + + const struct ArrowBuffer buf = tester.WriteBuffer(); + constexpr size_t buf_size = sizeof(kTestPgCopyNumericNegScale2) - 2; + ASSERT_EQ(buf.size_bytes, static_cast(buf_size)); + for (size_t i = 0; i < buf_size; i++) { + ASSERT_EQ(buf.data[i], kTestPgCopyNumericNegScale2[i]) << " at position " << i; + } +} + using TimestampTestParamType = std::tuple>>; diff --git a/c/driver/postgresql/copy/writer.h b/c/driver/postgresql/copy/writer.h index b352635a9f..90c8f7ca14 100644 --- a/c/driver/postgresql/copy/writer.h +++ b/c/driver/postgresql/copy/writer.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -224,6 +225,39 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { PostgresCopyNumericFieldWriter(int32_t precision, int32_t scale) : precision_{precision}, scale_{scale} {} + // PostgreSQL NUMERIC Binary Format: + // =================================== + // PostgreSQL stores NUMERIC values in a variable-length binary format: + // - ndigits (int16): Number of base-10000 digits stored + // - weight (int16): Position of the first digit group relative to decimal point + // (weight can be negative for small fractional numbers) + // - sign (int16): kNumericPos (0x0000) or kNumericNeg (0x4000) + // - dscale (int16): Number of decimal digits after the decimal point (display scale) + // - digits[]: Array of int16 values, each 0-9999 (base-10000 representation) + // + // Value calculation: sum(digits[i] * 10000^(weight - i)) * 10^(-dscale) + // + // Example 1: 12300 (from Arrow Decimal value=123, scale=-2) + // - Logical representation: "12300" + // - Grouped in base-10000: [1][2300] + // - ndigits=2, weight=1, sign=0x0000, dscale=0, digits=[1, 2300] + // - Calculation: 1*10000^1 + 2300*10000^0 = 10000 + 2300 = 12300 + // + // Example 2: 123.45 (from Arrow Decimal value=12345, scale=2) + // - Logical representation: "123.45" + // - Integer part "123", fractional part "45" + // - Grouped in base-10000: [123][4500] (fractional part right-padded) + // - ndigits=2, weight=0, sign=0x0000, dscale=2, digits=[123, 4500] + // - Calculation: 123*10000^0 + 4500*10000^(-1) = 123 + 0.45 = 123.45 + // + // Example 3: 0.00123 (from Arrow Decimal value=123, scale=5) + // - Logical representation: "0.00123" + // - Integer part "0", fractional part "00123" + // - Grouped in base-10000: [123] (leading zeros skipped via negative weight) + // - ndigits=1, weight=-1, sign=0x0000, dscale=5, digits=[123] + // - Calculation: 123*10000^(-1) * 10^0 = 0.0123, but dscale=5 means display as + // 0.00123 + ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) override { struct ArrowDecimal decimal; ArrowDecimalInit(&decimal, bitwidth_, precision_, scale_); @@ -231,65 +265,80 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos : kNumericNeg; - // Number of decimal digits per Postgres digit - constexpr int kDecDigits = 4; - std::vector pg_digits; - int16_t weight = -(scale_ / kDecDigits); - int16_t dscale = scale_; - bool seen_decimal = scale_ == 0; - bool truncating_trailing_zeros = true; - - char decimal_string[max_decimal_digits_ + 1]; - int digits_remaining = DecimalToString(&decimal, decimal_string); - do { - const int start_pos = - digits_remaining < kDecDigits ? 0 : digits_remaining - kDecDigits; - const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits; - const std::string_view substr{decimal_string + start_pos, len}; - int16_t val{}; - std::from_chars(substr.data(), substr.data() + substr.size(), val); - - if (val == 0) { - if (!seen_decimal && truncating_trailing_zeros) { - dscale -= kDecDigits; - } - } else { - pg_digits.insert(pg_digits.begin(), val); - if (!seen_decimal && truncating_trailing_zeros) { - if (val % 1000 == 0) { - dscale -= 3; - } else if (val % 100 == 0) { - dscale -= 2; - } else if (val % 10 == 0) { - dscale -= 1; - } - } - truncating_trailing_zeros = false; - } - digits_remaining -= kDecDigits; - if (digits_remaining <= 0) { - break; - } - weight++; - - if (start_pos <= static_cast(std::strlen(decimal_string)) - scale_) { - seen_decimal = true; - } - } while (true); - - int16_t ndigits = pg_digits.size(); - int32_t field_size_bytes = sizeof(ndigits) + sizeof(weight) + sizeof(sign) + + // Convert decimal to string and split into integer/fractional parts + // Example transformation for Arrow Decimal(value=12345, scale=2) representing 123.45: + // Input: decimal.value = 12345, scale_ = 2 + // After DecimalToString: raw_decimal_string = "12345", original_digits = 5 + // After SplitDecimalParts: parts.integer_part = "123" + // parts.fractional_part = "45" + // parts.effective_scale = 2 + char raw_decimal_string[max_decimal_digits_ + 1]; + int original_digits = DecimalToString(&decimal, raw_decimal_string); + DecimalParts parts = SplitDecimalParts(raw_decimal_string, original_digits, scale_); + + // Group into PostgreSQL base-10000 representation + // After GroupIntegerDigits: int_digits = [123], weight = 0 + // (groups "123" right-to-left: "123" → 123, only 1 group so weight = 0) + auto [int_digits, weight] = GroupIntegerDigits(parts.integer_part); + + // After GroupFractionalDigits: frac_digits = [4500], final_weight = 0 + // (groups "45" left-to-right with right-padding: "45" → "4500" → 4500) + auto [frac_digits, final_weight] = + GroupFractionalDigits(parts.fractional_part, weight, !parts.integer_part.empty()); + + // Combine digit arrays + // After combining: all_digits = [123, 4500] + std::vector all_digits = int_digits; + all_digits.insert(all_digits.end(), frac_digits.begin(), frac_digits.end()); + + // Calculate display scale by counting trailing zeros in the DECIMAL STRING + // For our example: frac_part="45" has 0 trailing zeros, effective_scale=2 + // So dscale = 2 - 0 = 2 (2 fractional digits to display) + int trailing_zeros = 0; + for (int j = parts.fractional_part.length() - 1; + j >= 0 && parts.fractional_part[j] == '0'; j--) { + trailing_zeros++; + } + int16_t dscale = std::max(0, parts.effective_scale - trailing_zeros); + + // Optimize: remove trailing zero digit groups from fractional part + int n_int_digit_groups = int_digits.size(); + while (static_cast(all_digits.size()) > n_int_digit_groups && + all_digits.back() == 0) { + all_digits.pop_back(); + } + + // Handle zero special case + if (all_digits.empty()) { + final_weight = 0; + dscale = 0; + } else if (static_cast(all_digits.size()) <= n_int_digit_groups) { + // All fractional digits were removed + dscale = 0; + } + + if (dscale < 0) dscale = 0; + + // Write PostgreSQL NUMERIC binary format to buffer + // Final values for our example: ndigits = 2 + // final_weight = 0 + // sign = 0x0000 + // dscale = 2 + // digits = [123, 4500] + // Binary output represents: 123 * 10000^0 + 4500 * 10000^(-1) = 123 + 0.45 = 123.45 + int16_t ndigits = all_digits.size(); + int32_t field_size_bytes = sizeof(ndigits) + sizeof(final_weight) + sizeof(sign) + sizeof(dscale) + ndigits * sizeof(int16_t); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, field_size_bytes, error)); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, ndigits, error)); - NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, weight, error)); + NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, final_weight, error)); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, sign, error)); NANOARROW_RETURN_NOT_OK(WriteChecked(buffer, dscale, error)); - const size_t pg_digit_bytes = sizeof(int16_t) * pg_digits.size(); + const size_t pg_digit_bytes = sizeof(int16_t) * all_digits.size(); NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes)); - for (auto pg_digit : pg_digits) { + for (auto pg_digit : all_digits) { WriteUnsafe(buffer, pg_digit); } @@ -297,9 +346,19 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { } private: - // returns the length of the string + // Helper struct for organizing data flow between functions + struct DecimalParts { + std::string integer_part; // e.g., "12300" or "123" + std::string fractional_part; // e.g., "45" or "00123" + int effective_scale; // Scale after handling negative values + }; + + // Helper function implementations for decimal-to-PostgreSQL NUMERIC conversion + + // Convert decimal to string (absolute value, no sign) + // Returns the length of the string template - int DecimalToString(struct ArrowDecimal* decimal, char* out) { + int DecimalToString(struct ArrowDecimal* decimal, char* out) const { constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4; uint8_t tmp[DEC_WIDTH / 8]; ArrowDecimalGetBytes(decimal, tmp); @@ -322,10 +381,9 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { for (size_t i = 0; i < DEC_WIDTH; i++) { int carry; - carry = (buf[nwords - 1] >= 0x7FFFFFFFFFFFFFFF); + carry = (buf[nwords - 1] > 0x7FFFFFFFFFFFFFFF); for (size_t j = nwords - 1; j > 0; j--) { - buf[j] = - ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j - 1] >= 0x7FFFFFFFFFFFFFFF); + buf[j] = ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j - 1] > 0x7FFFFFFFFFFFFFFF); } buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF); @@ -350,6 +408,117 @@ class PostgresCopyNumericFieldWriter : public PostgresCopyFieldWriter { return ndigits; } + DecimalParts SplitDecimalParts(const char* decimal_digits, int digit_count, + int scale) const { + // Virtual zeros represent the logical zeros appended for negative scale + // Example: value=123, scale=-2 → "123" with 2 virtual zeros = "12300" + const int virtual_zeros = (scale < 0) ? -scale : 0; + const int effective_scale = (scale < 0) ? 0 : scale; + const int total_logical_digits = digit_count + virtual_zeros; + + // Calculate split point + const int n_int_digits = total_logical_digits > effective_scale + ? total_logical_digits - effective_scale + : 0; + const int n_frac_digits = total_logical_digits - n_int_digits; + + DecimalParts parts; + parts.effective_scale = effective_scale; + + // Extract integer part + if (n_int_digits > 0) { + if (n_int_digits <= digit_count) { + // Integer part is within the original digits + parts.integer_part.assign(decimal_digits, n_int_digits); + } else { + // Integer part includes all original digits + virtual zeros + parts.integer_part.assign(decimal_digits, digit_count); + parts.integer_part.append(virtual_zeros, '0'); + } + } + + // Extract fractional part (only exists if scale > 0) + if (n_int_digits == 0 && total_logical_digits < effective_scale) { + // Small fractional: 0.00123 needs leading zeros + parts.fractional_part.assign(effective_scale - total_logical_digits, '0'); + parts.fractional_part.append(decimal_digits, digit_count); + } else if (n_frac_digits > 0 && n_int_digits < digit_count) { + // Fractional part from remaining digits (virtual zeros don't appear in fractional + // part) + parts.fractional_part.assign(decimal_digits + n_int_digits, + digit_count - n_int_digits); + } + + return parts; + } + + std::pair, int16_t> GroupIntegerDigits( + const std::string& int_part) const { + constexpr int kDecDigits = 4; + std::vector digits; + + if (int_part.empty()) { + return {digits, -1}; // weight = -1 for pure fractional numbers + } + + // Calculate weight: ceil(length / 4) - 1 + int16_t weight = (int_part.length() + kDecDigits - 1) / kDecDigits - 1; + + // Group right-to-left in chunks of 4 + int i = int_part.length(); + while (i > 0) { + int chunk_size = std::min(i, kDecDigits); + std::string_view chunk = + std::string_view(int_part).substr(i - chunk_size, chunk_size); + + int16_t val{}; + std::from_chars(chunk.data(), chunk.data() + chunk.size(), val); + + // Skip trailing zeros + if (val != 0 || !digits.empty()) { + digits.insert(digits.begin(), val); + } + i -= chunk_size; + } + + return {digits, weight}; + } + + std::pair, int16_t> GroupFractionalDigits( + const std::string& frac_part, int16_t initial_weight, bool has_integer_part) const { + constexpr int kDecDigits = 4; + std::vector digits; + int16_t weight = initial_weight; + + if (frac_part.empty()) { + return {digits, weight}; + } + + bool skip_leading_zeros = !has_integer_part; + + // Group left-to-right in chunks of 4, right-padding last chunk + for (size_t i = 0; i < frac_part.length(); i += kDecDigits) { + int chunk_size = std::min(kDecDigits, static_cast(frac_part.length() - i)); + std::string chunk_str = frac_part.substr(i, chunk_size); + + // Right-pad to 4 digits (e.g., "45" → "4500") + chunk_str.resize(kDecDigits, '0'); + + int16_t val{}; + std::from_chars(chunk_str.data(), chunk_str.data() + chunk_str.size(), val); + + if (skip_leading_zeros && val == 0) { + // Skip leading zero groups in fractional part (e.g., 0.0012 → skip "0012") + weight--; + } else { + digits.push_back(val); + skip_leading_zeros = false; + } + } + + return {digits, weight}; + } + static constexpr uint16_t kNumericPos = 0x0000; static constexpr uint16_t kNumericNeg = 0x4000; static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128 : 256; diff --git a/c/driver/postgresql/validation/queries/ingest/decimal.toml b/c/driver/postgresql/validation/queries/ingest/decimal.toml deleted file mode 100644 index 0f154e4b41..0000000000 --- a/c/driver/postgresql/validation/queries/ingest/decimal.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -skip = "decimal ingest code has a bug and fix has not been merged in yet. https://github.com/apache/arrow-adbc/pull/3787" diff --git a/c/driver/postgresql/validation/queries/ingest/decimal.txtcase b/c/driver/postgresql/validation/queries/ingest/decimal.txtcase new file mode 100644 index 0000000000..81bc7bf871 --- /dev/null +++ b/c/driver/postgresql/validation/queries/ingest/decimal.txtcase @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// part: expected_schema +{ + "format": "+s", + "children": [ + { + "name": "idx", + "format": "l", + "flags": ["nullable"] + }, + { + "name": "value", + "format": "u", + "flags": ["nullable"], + "metadata": { + "ARROW:extension:name": "arrow.opaque", + "ARROW:extension:metadata": "{\"type_name\": \"numeric\", \"vendor_name\": \"PostgreSQL\"}" + } + } + ] +} + +// part: expected + +{"idx": 0, "value": "0"} +{"idx": 1, "value": "123.45"} +{"idx": 2, "value": "-123.45"} +{"idx": 3, "value": "9999999.99"} +{"idx": 4, "value": "-9999999.99"} diff --git a/c/driver/postgresql/validation/queries/type/bind/decimal.toml b/c/driver/postgresql/validation/queries/type/bind/decimal.toml deleted file mode 100644 index 0f154e4b41..0000000000 --- a/c/driver/postgresql/validation/queries/type/bind/decimal.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -skip = "decimal ingest code has a bug and fix has not been merged in yet. https://github.com/apache/arrow-adbc/pull/3787" diff --git a/c/driver/postgresql/validation/queries/type/bind/decimal.txtcase b/c/driver/postgresql/validation/queries/type/bind/decimal.txtcase new file mode 100644 index 0000000000..aac33f2697 --- /dev/null +++ b/c/driver/postgresql/validation/queries/type/bind/decimal.txtcase @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// part: expected_schema +{ + "format": "+s", + "children": [ + { + "name": "res", + "format": "u", + "flags": ["nullable"], + "metadata": { + "ARROW:extension:name": "arrow.opaque", + "ARROW:extension:metadata": "{\"type_name\": \"numeric\", \"vendor_name\": \"PostgreSQL\"}" + } + } + ] +}