Skip to content

Commit e7f1f96

Browse files
authored
refactor: adjust SIMD thresholds and optimize quote detection in Writer (#76)
* refactor: optimize quote detection and writing in Writer using SIMD and string operations * fix: fix lint error * refactor: adjust SIMD thresholds and optimize quote detection in Writer
1 parent 10094de commit e7f1f96

File tree

1 file changed

+20
-11
lines changed

1 file changed

+20
-11
lines changed

writer.go

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,12 @@ func (w *Writer) Error() error {
9696
return w.err
9797
}
9898

99-
// writerSIMDMinSize is the minimum field size for SIMD benefit in Writer.
100-
// Smaller than the general simdMinThreshold because we use padded operations.
101-
const writerSIMDMinSize = 8
99+
// writerSIMDMinSize is the minimum field size for SIMD benefit in writeQuotedField.
100+
const writerSIMDMinSize = 16
101+
102+
// writerSIMDCheckThreshold is the minimum size for SIMD benefit in fieldNeedsQuotes.
103+
// Higher than writerSIMDMinSize because checking has more overhead than writing.
104+
const writerSIMDCheckThreshold = 64
102105

103106
// fieldNeedsQuotes reports whether field requires quoting.
104107
// Dispatches to SIMD or scalar based on CPU support and field size.
@@ -110,21 +113,26 @@ func (w *Writer) fieldNeedsQuotes(field string) bool {
110113
if field[0] == ' ' || field[0] == '\t' {
111114
return true
112115
}
113-
// Use SIMD for ASCII delimiters (most common case)
114-
if useAVX512 && len(field) >= writerSIMDMinSize && w.Comma >= 0 && w.Comma < 128 {
116+
// Use SIMD only for larger fields where the overhead is justified
117+
if useAVX512 && len(field) >= writerSIMDCheckThreshold && w.Comma >= 0 && w.Comma < 128 {
115118
return w.fieldNeedsQuotesSIMD(field)
116119
}
117120
return w.fieldNeedsQuotesScalar(field)
118121
}
119122

120-
// fieldNeedsQuotesScalar checks for special characters using optimized string search.
121-
// strings.IndexAny is internally optimized and uses SIMD on modern Go runtimes.
123+
// fieldNeedsQuotesScalar checks for special characters using direct byte iteration.
124+
// This is faster than strings.ContainsAny for short strings due to charset building overhead.
122125
func (w *Writer) fieldNeedsQuotesScalar(field string) bool {
123-
// For ASCII comma (common case), use IndexAny with precomputed charset
126+
// For ASCII comma (common case), use direct byte comparison
124127
if w.Comma < 128 {
125-
// Build search charset: comma + newline + carriage return + quote
126-
charset := string([]byte{byte(w.Comma), '\n', '\r', '"'})
127-
return strings.ContainsAny(field, charset)
128+
comma := byte(w.Comma)
129+
for i := 0; i < len(field); i++ {
130+
c := field[i]
131+
if c == comma || c == '\n' || c == '\r' || c == '"' {
132+
return true
133+
}
134+
}
135+
return false
128136
}
129137
// For non-ASCII comma, fall back to rune iteration
130138
for _, c := range field {
@@ -184,6 +192,7 @@ func (w *Writer) writeQuotedField(field string) error {
184192
if err := w.w.WriteByte('"'); err != nil {
185193
return err
186194
}
195+
// Use SIMD for fields that benefit from parallel quote detection
187196
if useAVX512 && len(field) >= writerSIMDMinSize {
188197
return w.writeQuotedFieldSIMD(field)
189198
}

0 commit comments

Comments
 (0)