33//nolint:gosec // G115: Integer conversions are safe - buffer size bounded by DefaultMaxInputSize (2GB)
44package simdcsv
55
6- import "bytes"
6+ import (
7+ "bytes"
8+ "unsafe"
9+ )
710
811// Buffer allocation constants for reducing reallocations in hot path.
912const (
@@ -30,10 +33,27 @@ const (
3033// This matches encoding/csv behavior and allows callers to recover partial data.
3134func (r * Reader ) buildRecordWithValidation (row rowInfo , rowIdx int ) ([]string , error ) {
3235 fieldCount := row .fieldCount
33- r .prepareBuffers (row , fieldCount )
34-
3536 fields := r .getFieldsForRow (row , fieldCount )
3637
38+ // Fast path: check if any field needs transformation
39+ needsTransform := r .state .hasCR
40+ if ! needsTransform {
41+ for _ , field := range fields {
42+ if field .needsUnescape () {
43+ needsTransform = true
44+ break
45+ }
46+ }
47+ }
48+
49+ // Fast path: zero-copy when no transformation needed (but still validate)
50+ if ! needsTransform && ! r .TrimLeadingSpace {
51+ return r .buildRecordWithValidationZeroCopy (row , fields )
52+ }
53+
54+ // Standard path with transformation
55+ r .prepareBuffers (row , fieldCount )
56+
3757 for i , field := range fields {
3858 if err := r .validateFieldIfNeeded (field , row .lineNum ); err != nil {
3959 return r .buildPartialRecord (i ), err
@@ -45,36 +65,116 @@ func (r *Reader) buildRecordWithValidation(row rowInfo, rowIdx int) ([]string, e
4565 return r .buildFinalRecord (fieldCount ), nil
4666}
4767
48- // buildRecordNoQuotes builds a record when the input contains no quotes.
49- // It avoids the recordBuffer copy path and mirrors appendSimpleContent behavior.
50- func (r * Reader ) buildRecordNoQuotes (row rowInfo ) []string {
68+ // buildRecordWithValidationZeroCopy builds a record with zero-copy strings while still validating.
69+ func (r * Reader ) buildRecordWithValidationZeroCopy (row rowInfo , fields []fieldInfo ) ([]string , error ) {
5170 fieldCount := row .fieldCount
5271 record := r .allocateRecord (fieldCount )
5372 r .state .fieldPositions = r .ensureFieldPositionsCapacity (fieldCount )
5473
55- fields := r .getFieldsForRow (row , fieldCount )
5674 buf := r .state .rawBuffer
5775 bufLen := uint32 (len (buf ))
5876
5977 for i , field := range fields {
78+ // Validate even in zero-copy path
79+ if err := r .validateFieldIfNeeded (field , row .lineNum ); err != nil {
80+ return record [:i ], err
81+ }
82+
6083 start := field .start
6184 end := start + field .length
6285 if start >= bufLen {
6386 record [i ] = ""
64- r .state .fieldPositions [i ] = position {line : row .lineNum , column : int (start ) + 1 }
65- continue
87+ } else {
88+ if end > bufLen {
89+ end = bufLen
90+ }
91+ // Zero-copy string from rawBuffer
92+ record [i ] = unsafe .String (& buf [start ], int (end - start ))
6693 }
67- if end > bufLen {
68- end = bufLen
94+ r .state .fieldPositions [i ] = position {line : row .lineNum , column : int (field .rawStart ()) + 1 }
95+ }
96+ return record , nil
97+ }
98+
99+ // buildRecordNoQuotes builds a record when the input contains no quotes.
100+ // Uses a single row string to avoid per-field allocations.
101+ func (r * Reader ) buildRecordNoQuotes (row rowInfo ) []string {
102+ fieldCount := row .fieldCount
103+ record := r .allocateRecord (fieldCount )
104+ r .state .fieldPositions = r .ensureFieldPositionsCapacity (fieldCount )
105+
106+ fields := r .getFieldsForRow (row , fieldCount )
107+ buf := r .state .rawBuffer
108+ bufLen := uint32 (len (buf ))
109+
110+ if len (fields ) == 0 {
111+ return record
112+ }
113+
114+ rowStart := fields [0 ].rawStart ()
115+ rowEnd := fields [len (fields )- 1 ].rawEnd ()
116+ if rowStart >= bufLen {
117+ for i , field := range fields {
118+ record [i ] = ""
119+ r .state .fieldPositions [i ] = position {line : row .lineNum , column : int (field .rawStart ()) + 1 }
120+ }
121+ return record
122+ }
123+ if rowEnd > bufLen {
124+ rowEnd = bufLen
125+ }
126+ if rowEnd < rowStart {
127+ rowEnd = rowStart
128+ }
129+
130+ var rowStr string
131+ if r .TrimLeadingSpace {
132+ rowStr = string (buf [rowStart :rowEnd ])
133+ } else {
134+ // Zero-copy string from rawBuffer - safe because rawBuffer outlives record.
135+ rowStr = unsafe .String (& buf [rowStart ], int (rowEnd - rowStart ))
136+ }
137+ rowStrLen := len (rowStr )
138+
139+ for i , field := range fields {
140+ start := field .start
141+ end := start + field .length
142+ rawStart := field .rawStart ()
143+
144+ if start < bufLen {
145+ if end > bufLen {
146+ end = bufLen
147+ }
148+ if r .TrimLeadingSpace && start < end {
149+ for start < end && (buf [start ] == ' ' || buf [start ] == '\t' ) {
150+ start ++
151+ }
152+ }
69153 }
70154
71- content := buf [start :end ]
72- if r .TrimLeadingSpace {
73- content = trimLeftBytes (content )
155+ if start < rowStart {
156+ start = rowStart
157+ }
158+ if end < start {
159+ end = start
160+ }
161+ relStart := int (start - rowStart )
162+ relEnd := int (end - rowStart )
163+ if relStart < 0 {
164+ relStart = 0
165+ }
166+ if relStart > rowStrLen {
167+ relStart = rowStrLen
168+ }
169+ if relEnd < relStart {
170+ relEnd = relStart
171+ }
172+ if relEnd > rowStrLen {
173+ relEnd = rowStrLen
74174 }
75175
76- record [i ] = string ( content )
77- r .state .fieldPositions [i ] = position {line : row .lineNum , column : int (start ) + 1 }
176+ record [i ] = rowStr [ relStart : relEnd ]
177+ r .state .fieldPositions [i ] = position {line : row .lineNum , column : int (rawStart ) + 1 }
78178 }
79179 return record
80180}
0 commit comments