Skip to content

Commit fde3026

Browse files
committed
fix review comments
1 parent 19c60f0 commit fde3026

File tree

3 files changed

+14
-8
lines changed

3 files changed

+14
-8
lines changed

cpp/src/arrow/csv/options.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,9 @@ struct ARROW_EXPORT WriteOptions {
210210
QuotingStyle quoting_style = QuotingStyle::Needed;
211211

212212
/// \brief Quoting style of header
213+
///
214+
/// Note that `QuotingStyle::Needed` and `QuotingStyle::AllValid` have the same
215+
/// effect of quoting all column names.
213216
QuotingStyle quoting_header = QuotingStyle::Needed;
214217

215218
/// Create write options with default values

cpp/src/arrow/csv/writer.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ int64_t CountQuotes(std::string_view s) {
105105

106106
// Matching quote pair character length.
107107
constexpr int64_t kQuoteCount = 2;
108-
// Matching delimiter character length.
108+
// Delimiter character length.
109109
constexpr int64_t kDelimiterCount = 1;
110110

111111
// Interface for generating CSV data per column.
@@ -177,6 +177,8 @@ char* Escape(std::string_view s, char* out) {
177177
return out;
178178
}
179179

180+
// Return the index of the first structural char in the input. A structural char
181+
// is a character that needs quoting and/or escaping.
180182
int64_t StopAtStructuralChar(const uint8_t* data, const int64_t buffer_size,
181183
const char delimiter) {
182184
int64_t offset = 0;
@@ -634,11 +636,11 @@ class CSVWriterImpl : public ipc::RecordBatchWriter {
634636
memcpy(next, col_name.data(), col_name.size());
635637
next += col_name.size();
636638
break;
637-
// The behavior of the Needed quoting_style in CSV data depends on the data type.
638-
// And it is always quoted when the data type is binary. To avoid semantic
639-
// differences, the behavior of Need and AllValid should be consistent.
640639
case QuotingStyle::Needed:
641640
case QuotingStyle::AllValid:
641+
// QuotingStyle::Needed is defined as always quoting string/binary data,
642+
// regardless of whether it contains structural chars.
643+
// We use consistent semantics for header names, which are strings.
642644
*next++ = '"';
643645
next = Escape(schema_->field(col)->name(), next);
644646
*next++ = '"';

cpp/src/arrow/csv/writer_test.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,10 @@ std::vector<WriterTestParams> GenerateTestCases() {
9393
auto dummy_schema = schema({field("a", uint8())});
9494
std::string dummy_batch_data = R"([{"a": null}])";
9595

96-
auto header_without_structral_charaters =
96+
auto header_without_structural_charaters =
9797
schema({field("a ", uint64()), field("b", int32())});
98-
std::string expected_header_without_structral_charaters = std::string(R"(a ,b)") + "\n";
98+
std::string expected_header_without_structural_charaters =
99+
std::string(R"(a ,b)") + "\n";
99100
auto expected_status_no_quotes_with_structural_in_header = [](const char* header) {
100101
return Status::Invalid(
101102
"CSV header may not contain structural characters if quoting "
@@ -292,12 +293,12 @@ std::vector<WriterTestParams> GenerateTestCases() {
292293
DefaultTestOptions(/*include_header=*/false, /*null_string=*/"",
293294
QuotingStyle::Needed, /*eol=*/";", /*delimiter=*/';'),
294295
/*expected_output*/ "", expected_status_illegal_delimiter(';')},
295-
{header_without_structral_charaters, "[]",
296+
{header_without_structural_charaters, "[]",
296297
DefaultTestOptions(/*include_header=*/true, /*null_string=*/"",
297298
QuotingStyle::Needed, /*eol=*/"\n",
298299
/*delimiter=*/',', /*batch_size=*/5,
299300
/*quoting_header=*/QuotingStyle::None),
300-
expected_header_without_structral_charaters},
301+
expected_header_without_structural_charaters},
301302
{abc_schema, "[]",
302303
DefaultTestOptions(/*include_header=*/true, /*null_string=*/"",
303304
QuotingStyle::Needed, /*eol=*/"\n",

0 commit comments

Comments
 (0)