@@ -59,16 +59,20 @@ type Reader struct {
5959 lastRecord []string
6060
6161 // SIMD processing state
62- scanResult * scanResult // Scan result (structural character masks)
63- parseResult * parseResult // Parse result (extracted fields/rows)
64- currentRecordIndex int // Current record index in parseResult.rows
65- initialized bool // Whether scan/parse have been run
62+ scanResult * scanResult // Scan result (structural character masks)
63+ parseResult * parseResult // Parse result (extracted fields/rows)
64+ currentRecordIndex int // Current record index in parseResult.rows
65+ nonCommentRecordCount int // Count of non-comment records returned (for O(1) first record detection)
66+ initialized bool // Whether scan/parse have been run
6667
6768 // Extended options (set via NewReaderWithOptions)
68- skipBOM bool // Skip UTF-8 BOM if present
69- bufferSize int // Buffer size hint (reserved for future use)
70- chunkSize int // Chunk size hint (reserved for future use)
71- zeroCopy bool // Zero-copy mode hint (reserved for future use)
69+ skipBOM bool // Skip UTF-8 BOM if present
70+
71+ // Reserved fields for future streaming/chunked processing implementation.
72+ // These fields are accepted by NewReaderWithOptions but currently have no effect.
73+ bufferSize int // Buffer size hint (not yet implemented)
74+ chunkSize int // Chunk size hint (not yet implemented)
75+ zeroCopy bool // Zero-copy mode hint (not yet implemented)
7276}
7377
7478// position represents a position in the input.
@@ -95,7 +99,7 @@ func NewReader(r io.Reader) *Reader {
9599// If ReuseRecord is true, the returned slice may be shared
96100// between multiple calls to Read.
97101func (r * Reader ) Read () (record []string , err error ) {
98- // Initialize on first call: read all input and run Stage 1 + Stage 2
102+ // Initialize on first call: read all input and run scanBuffer + parseBuffer
99103 if ! r .initialized {
100104 if err := r .initialize (); err != nil {
101105 return nil , err
@@ -108,18 +112,19 @@ func (r *Reader) Read() (record []string, err error) {
108112 return nil , io .EOF
109113 }
110114
111- // Get current row info
112- rowInfo := r .parseResult .rows [r .currentRecordIndex ]
115+ // Get current row info and index
116+ rowIdx := r .currentRecordIndex
117+ rowInfo := r .parseResult .rows [rowIdx ]
113118 r .currentRecordIndex ++
114119
115120 // Check for comment line (line starting with Comment character)
116- if r .Comment != 0 && r .isCommentLine (rowInfo ) {
121+ if r .Comment != 0 && r .isCommentLine (rowInfo , rowIdx ) {
117122 // Skip this line and continue to next
118123 continue
119124 }
120125
121126 // Build record from fields with validation
122- record , err = r .buildRecordWithValidation (rowInfo )
127+ record , err = r .buildRecordWithValidation (rowInfo , rowIdx )
123128 if err != nil {
124129 return record , err
125130 }
@@ -152,24 +157,19 @@ func (r *Reader) Read() (record []string, err error) {
152157 }
153158 // If FieldsPerRecord < 0, no check is performed
154159
160+ r .nonCommentRecordCount ++
155161 return record , nil
156162 }
157163}
158164
159- // isFirstNonCommentRecord checks if this is the first non-comment record being returned
165+ // isFirstNonCommentRecord checks if this is the first non-comment record being returned.
166+ // Uses O(1) counter instead of O(n) re-scanning.
160167func (r * Reader ) isFirstNonCommentRecord () bool {
161- // Count how many non-comment records we've processed
162- nonCommentCount := 0
163- for i := 0 ; i < r .currentRecordIndex ; i ++ {
164- if i < len (r .parseResult .rows ) && ! r .isCommentLine (r .parseResult .rows [i ]) {
165- nonCommentCount ++
166- }
167- }
168- return nonCommentCount == 1
168+ return r .nonCommentRecordCount == 0
169169}
170170
171171// isCommentLine checks if a row is a comment line
172- func (r * Reader ) isCommentLine (row rowInfo ) bool {
172+ func (r * Reader ) isCommentLine (row rowInfo , rowIdx int ) bool {
173173 if r .Comment == 0 || row .fieldCount == 0 {
174174 return false
175175 }
@@ -186,15 +186,16 @@ func (r *Reader) isCommentLine(row rowInfo) bool {
186186 return false
187187 }
188188 // Get the raw start position (the original field start in rawBuffer)
189- rawStart := r .getRawFieldStart (row , firstFieldIdx )
189+ rawStart := r .getRawFieldStart (row , rowIdx , firstFieldIdx )
190190 if rawStart < uint64 (len (r .rawBuffer )) {
191191 return r .rawBuffer [rawStart ] == byte (r .Comment )
192192 }
193193 return false
194194}
195195
196- // getRawFieldStart gets the original field start position before quote adjustment
197- func (r * Reader ) getRawFieldStart (row rowInfo , fieldIdx int ) uint64 {
196+ // getRawFieldStart gets the original field start position before quote adjustment.
197+ // Uses O(1) lookup with rowIdx instead of O(n) search.
198+ func (r * Reader ) getRawFieldStart (row rowInfo , rowIdx , fieldIdx int ) uint64 {
198199 // For the first field of a row, we need to find the actual start
199200 // which is either:
200201 // - 0 for the first row
@@ -208,15 +209,8 @@ func (r *Reader) getRawFieldStart(row rowInfo, fieldIdx int) uint64 {
208209 // If quoteAdjust was applied, start is field.start - 1
209210 // But for comment detection, we need the actual line start
210211 // We can find it by looking at the previous row's end position
211- prevRowIdx := - 1
212- for i , r := range r .parseResult .rows {
213- if r .firstField == row .firstField {
214- prevRowIdx = i - 1
215- break
216- }
217- }
218- if prevRowIdx >= 0 {
219- prevRow := r .parseResult .rows [prevRowIdx ]
212+ if rowIdx > 0 {
213+ prevRow := r .parseResult .rows [rowIdx - 1 ]
220214 lastFieldIdx := prevRow .firstField + prevRow .fieldCount - 1
221215 if lastFieldIdx >= 0 && lastFieldIdx < len (r .parseResult .fields ) {
222216 lastField := r .parseResult .fields [lastFieldIdx ]
@@ -227,7 +221,7 @@ func (r *Reader) getRawFieldStart(row rowInfo, fieldIdx int) uint64 {
227221 return field .start
228222}
229223
230- // initialize reads all input and runs Stage 1 and Stage 2 processing.
224+ // initialize reads all input and runs scanBuffer and parseBuffer processing.
231225func (r * Reader ) initialize () error {
232226 r .initialized = true
233227
@@ -269,7 +263,7 @@ func (r *Reader) initialize() error {
269263}
270264
271265// buildRecordWithValidation constructs a []string record from a rowInfo with quote validation
272- func (r * Reader ) buildRecordWithValidation (row rowInfo ) ([]string , error ) {
266+ func (r * Reader ) buildRecordWithValidation (row rowInfo , rowIdx int ) ([]string , error ) {
273267 fieldCount := row .fieldCount
274268 record := r .allocateRecord (fieldCount )
275269
@@ -283,7 +277,7 @@ func (r *Reader) buildRecordWithValidation(row rowInfo) ([]string, error) {
283277 field := r .parseResult .fields [fieldIdx ]
284278
285279 // Get raw field data for validation
286- rawStart , rawEnd := r .getFieldRawBounds (row , fieldIdx , i )
280+ rawStart , rawEnd := r .getFieldRawBounds (row , rowIdx , fieldIdx , i )
287281
288282 // Validate quotes unless LazyQuotes is enabled
289283 if ! r .LazyQuotes {
@@ -306,7 +300,7 @@ func (r *Reader) buildRecordWithValidation(row rowInfo) ([]string, error) {
306300}
307301
308302// getFieldRawBounds returns the raw start and end positions for a field in the buffer
309- func (r * Reader ) getFieldRawBounds (row rowInfo , fieldIdx , fieldNum int ) (uint64 , uint64 ) {
303+ func (r * Reader ) getFieldRawBounds (row rowInfo , rowIdx , fieldIdx , fieldNum int ) (uint64 , uint64 ) {
310304 field := r .parseResult .fields [fieldIdx ]
311305
312306 // Calculate raw start (before any quote adjustment)
@@ -317,7 +311,7 @@ func (r *Reader) getFieldRawBounds(row rowInfo, fieldIdx, fieldNum int) (uint64,
317311 rawStart = 0
318312 } else {
319313 // Find the position after the previous newline
320- rawStart = r .findLineStart (row )
314+ rawStart = r .findLineStart (rowIdx )
321315 }
322316 } else {
323317 // For non-first fields, find the position after the previous separator
@@ -384,17 +378,9 @@ func (r *Reader) findRawFieldEnd(start uint64, isLastField bool) uint64 {
384378 return bufLen
385379}
386380
387- // findLineStart finds the start position of a line
388- func (r * Reader ) findLineStart (row rowInfo ) uint64 {
389- // Find the row index
390- rowIdx := - 1
391- for i , ri := range r .parseResult .rows {
392- if ri .firstField == row .firstField && ri .lineNum == row .lineNum {
393- rowIdx = i
394- break
395- }
396- }
397-
381+ // findLineStart finds the start position of a line.
382+ // Uses O(1) lookup with rowIdx instead of O(n) search.
383+ func (r * Reader ) findLineStart (rowIdx int ) uint64 {
398384 if rowIdx <= 0 {
399385 return 0
400386 }
@@ -517,10 +503,20 @@ func (r *Reader) InputOffset() int64 {
517503
518504// ReaderOptions contains extended configuration options for [Reader].
519505type ReaderOptions struct {
520- BufferSize int // BufferSize specifies the internal buffer size in bytes. Default is 64KB.
521- ChunkSize int // Parallel processing chunk size
522- ZeroCopy bool // Zero-copy optimization (default: false)
523- SkipBOM bool // Skip UTF-8 BOM (default: false)
506+ // SkipBOM skips UTF-8 BOM (EF BB BF) at the beginning of input if present.
507+ SkipBOM bool
508+
509+ // BufferSize specifies the internal buffer size hint in bytes.
510+ // NOTE: Not yet implemented; reserved for future streaming support.
511+ BufferSize int
512+
513+ // ChunkSize specifies the parallel processing chunk size.
514+ // NOTE: Not yet implemented; reserved for future streaming support.
515+ ChunkSize int
516+
517+ // ZeroCopy enables zero-copy optimization.
518+ // NOTE: Not yet implemented; reserved for future optimization.
519+ ZeroCopy bool
524520}
525521
526522// NewReaderWithOptions creates a Reader with extended options.
0 commit comments