Skip to content

Commit 553b753

Browse files
committed
refactor: use cached values
1 parent 732fb64 commit 553b753

File tree

4 files changed

+73
-45
lines changed

4 files changed

+73
-45
lines changed

field_parser.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,14 @@ func (pr *parseResult) reset() {
103103
pr.rows = pr.rows[:0]
104104
}
105105

106-
// releaseParseResult returns a parseResult to the pool for reuse.
107-
func releaseParseResult(pr *parseResult) {
108-
if pr != nil {
109-
pr.reset()
110-
parseResultPool.Put(pr)
106+
// release returns the parseResult to the pool for reuse.
107+
func (pr *parseResult) release() {
108+
if pr == nil {
109+
return
111110
}
111+
112+
pr.reset()
113+
parseResultPool.Put(pr)
112114
}
113115

114116
// =============================================================================

parse.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ func ParseBytes(data []byte, comma rune) ([][]string, error) {
2121
pr := parseBuffer(data, sr)
2222
records := buildRecords(data, pr, sr.hasCR)
2323

24-
releaseParseResult(pr)
25-
releaseScanResult(sr)
24+
pr.release()
25+
sr.release()
2626

2727
return records, nil
2828
}
@@ -37,8 +37,8 @@ func ParseBytesStreaming(data []byte, comma rune, callback func([]string) error)
3737
separator := byte(comma)
3838
sr := scanBuffer(data, separator)
3939
pr := parseBuffer(data, sr)
40-
defer releaseParseResult(pr)
41-
defer releaseScanResult(sr)
40+
defer pr.release()
41+
defer sr.release()
4242

4343
if pr == nil || len(pr.rows) == 0 {
4444
return nil

reader.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ func (r *Reader) initialize() error {
362362
r.state.parseResult = parseBuffer(r.state.rawBuffer, r.state.scanResult)
363363

364364
// Release scanResult (no longer needed after parsing)
365-
releaseScanResult(r.state.scanResult)
365+
r.state.scanResult.release()
366366
r.state.scanResult = nil
367367

368368
r.state.offset = int64(len(r.state.rawBuffer))

simd_scanner.go

Lines changed: 61 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ func bytesToInt8Slice(b []byte) []int8 {
2121
// useAVX512 indicates whether AVX-512 instructions are available at runtime.
2222
var useAVX512 bool
2323

24+
// Cached broadcast values for fixed characters (initialized in init()).
25+
var (
26+
cachedQuoteCmp archsimd.Int8x64
27+
cachedCrCmp archsimd.Int8x64
28+
cachedNlCmp archsimd.Int8x64
29+
)
30+
2431
// SIMD processing constants.
2532
const (
2633
simdChunkSize = 64 // bytes per AVX-512 iteration
@@ -32,6 +39,12 @@ const (
3239

3340
func init() {
3441
useAVX512 = archsimd.X86.AVX512()
42+
if useAVX512 {
43+
// Pre-broadcast fixed characters to avoid repeated BroadcastInt8x64 calls
44+
cachedQuoteCmp = archsimd.BroadcastInt8x64('"')
45+
cachedCrCmp = archsimd.BroadcastInt8x64('\r')
46+
cachedNlCmp = archsimd.BroadcastInt8x64('\n')
47+
}
3548
}
3649

3750
// =============================================================================
@@ -124,9 +137,9 @@ func (sr *scanResult) reset() {
124137
sr.newlineCount = 0
125138
}
126139

127-
// releaseScanResult returns a scanResult to the pool for reuse.
140+
// release returns the scanResult to the pool for reuse.
128141
// Large results (>= scanResultLargeThreshold) are cached separately to survive GC.
129-
func releaseScanResult(sr *scanResult) {
142+
func (sr *scanResult) release() {
130143
if sr == nil {
131144
return
132145
}
@@ -203,12 +216,11 @@ func generateMasksScalar(data []byte, separator byte) (quote, sep, cr, nl uint64
203216

204217
// generateMasksAVX512 generates masks using AVX-512 SIMD instructions.
205218
// Requires AVX-512BW for ToBits() which uses VPMOVB2M instruction.
219+
// Uses cached broadcast values for fixed characters (quote, CR, NL) to avoid
220+
// repeated BroadcastInt8x64 calls.
206221
func generateMasksAVX512(data []byte, separator byte) (quote, sep, cr, nl uint64) {
207-
quoteCmp := archsimd.BroadcastInt8x64('"')
208222
sepCmp := archsimd.BroadcastInt8x64(int8(separator))
209-
crCmp := archsimd.BroadcastInt8x64('\r')
210-
nlCmp := archsimd.BroadcastInt8x64('\n')
211-
return generateMasksAVX512WithCmp(data, quoteCmp, sepCmp, crCmp, nlCmp)
223+
return generateMasksAVX512WithCmp(data, cachedQuoteCmp, sepCmp, cachedCrCmp, cachedNlCmp)
212224
}
213225

214226
// generateMasksAVX512WithCmp generates masks reusing pre-broadcasted comparators.
@@ -446,10 +458,10 @@ type avx512MaskGenerator struct {
446458

447459
func newAVX512MaskGenerator(separator byte) *avx512MaskGenerator {
448460
return &avx512MaskGenerator{
449-
quoteCmp: archsimd.BroadcastInt8x64('"'),
461+
quoteCmp: cachedQuoteCmp,
450462
sepCmp: archsimd.BroadcastInt8x64(int8(separator)),
451-
crCmp: archsimd.BroadcastInt8x64('\r'),
452-
nlCmp: archsimd.BroadcastInt8x64('\n'),
463+
crCmp: cachedCrCmp,
464+
nlCmp: cachedNlCmp,
453465
}
454466
}
455467

@@ -492,20 +504,34 @@ func scanBufferWithGenerator(buf []byte, gen maskGenerator) *scanResult {
492504
result := acquireScanResult(chunkCount)
493505
state := scanState{}
494506

495-
curMasks, curValidBits := generateFirstChunkMasks(buf, gen, result)
496-
nextMasks := generateSecondChunkMasks(buf, chunkCount, gen, result)
507+
sc := bufferScanContext{
508+
buf: buf,
509+
gen: gen,
510+
result: result,
511+
chunkCount: chunkCount,
512+
}
513+
514+
curMasks, curValidBits := sc.generateFirstChunkMasks()
515+
nextMasks := sc.generateSecondChunkMasks()
497516

498517
for chunkIdx := 0; chunkIdx < chunkCount; chunkIdx++ {
499518
processChunk(chunkIdx, curMasks, nextMasks, curValidBits, &state, result)
500519

501520
curMasks = nextMasks
502-
nextMasks, curValidBits = generateNextLookahead(buf, chunkIdx, chunkCount, gen, result)
521+
nextMasks, curValidBits = sc.generateNextLookahead(chunkIdx)
503522
}
504523

505524
result.finalQuoted = state.quoted
506525
return result
507526
}
508527

528+
type bufferScanContext struct {
529+
buf []byte
530+
gen maskGenerator
531+
result *scanResult
532+
chunkCount int
533+
}
534+
509535
// acquireScanResult gets a pooled scanResult and initializes it for the given chunk count.
510536
func acquireScanResult(chunkCount int) *scanResult {
511537
if chunkCount >= scanResultLargeThreshold {
@@ -531,61 +557,61 @@ func acquireScanResult(chunkCount int) *scanResult {
531557

532558
// generateFirstChunkMasks generates masks for the first chunk of the buffer.
533559
// Handles both full chunks and partial (padded) chunks.
534-
func generateFirstChunkMasks(buf []byte, gen maskGenerator, result *scanResult) (chunkMasks, int) {
535-
if len(buf) >= simdChunkSize {
536-
return gen.generateFull(buf[0:simdChunkSize]), simdChunkSize
560+
func (sc *bufferScanContext) generateFirstChunkMasks() (chunkMasks, int) {
561+
if len(sc.buf) >= simdChunkSize {
562+
return sc.gen.generateFull(sc.buf[0:simdChunkSize]), simdChunkSize
537563
}
538564

539-
masks, validBits := gen.generatePadded(buf)
540-
result.lastChunkBits = validBits
565+
masks, validBits := sc.gen.generatePadded(sc.buf)
566+
sc.result.lastChunkBits = validBits
541567
return masks, validBits
542568
}
543569

544570
// generateSecondChunkMasks generates lookahead masks for the second chunk if it exists.
545571
// Returns empty masks if there is no second chunk.
546-
func generateSecondChunkMasks(buf []byte, chunkCount int, gen maskGenerator, result *scanResult) chunkMasks {
547-
if chunkCount <= 1 || len(buf) <= simdChunkSize {
572+
func (sc *bufferScanContext) generateSecondChunkMasks() chunkMasks {
573+
if sc.chunkCount <= 1 || len(sc.buf) <= simdChunkSize {
548574
return chunkMasks{}
549575
}
550576

551-
if len(buf) >= 2*simdChunkSize {
552-
return gen.generateFull(buf[simdChunkSize : 2*simdChunkSize])
577+
if len(sc.buf) >= 2*simdChunkSize {
578+
return sc.gen.generateFull(sc.buf[simdChunkSize : 2*simdChunkSize])
553579
}
554580

555-
masks, validBits := gen.generatePadded(buf[simdChunkSize:])
556-
if chunkCount == 2 {
557-
result.lastChunkBits = validBits
581+
masks, validBits := sc.gen.generatePadded(sc.buf[simdChunkSize:])
582+
if sc.chunkCount == 2 {
583+
sc.result.lastChunkBits = validBits
558584
}
559585
return masks
560586
}
561587

562588
// generateNextLookahead generates masks for the chunk two positions ahead (lookahead).
563589
// This enables processing current chunk while knowing what comes next.
564-
func generateNextLookahead(buf []byte, chunkIdx, chunkCount int, gen maskGenerator, result *scanResult) (chunkMasks, int) {
590+
func (sc *bufferScanContext) generateNextLookahead(chunkIdx int) (chunkMasks, int) {
565591
lookaheadIdx := chunkIdx + 2
566-
if lookaheadIdx >= chunkCount {
567-
return handleFinalChunkValidBits(buf, chunkIdx, chunkCount, result)
592+
if lookaheadIdx >= sc.chunkCount {
593+
return sc.handleFinalChunkValidBits(chunkIdx)
568594
}
569595

570596
offset := lookaheadIdx * simdChunkSize
571-
remaining := len(buf) - offset
597+
remaining := len(sc.buf) - offset
572598

573599
if remaining >= simdChunkSize {
574-
return gen.generateFull(buf[offset : offset+simdChunkSize]), simdChunkSize
600+
return sc.gen.generateFull(sc.buf[offset : offset+simdChunkSize]), simdChunkSize
575601
}
576602

577-
masks, validBits := gen.generatePadded(buf[offset:])
578-
result.lastChunkBits = validBits
603+
masks, validBits := sc.gen.generatePadded(sc.buf[offset:])
604+
sc.result.lastChunkBits = validBits
579605
return masks, validBits
580606
}
581607

582608
// handleFinalChunkValidBits computes valid bits when no more lookahead chunks exist.
583-
func handleFinalChunkValidBits(buf []byte, chunkIdx, chunkCount int, result *scanResult) (chunkMasks, int) {
609+
func (sc *bufferScanContext) handleFinalChunkValidBits(chunkIdx int) (chunkMasks, int) {
584610
validBits := simdChunkSize
585611

586-
if chunkIdx+1 == chunkCount-1 && len(buf)%simdChunkSize != 0 {
587-
validBits = len(buf) % simdChunkSize
588-
result.lastChunkBits = validBits
612+
if chunkIdx+1 == sc.chunkCount-1 && len(sc.buf)%simdChunkSize != 0 {
613+
validBits = len(sc.buf) % simdChunkSize
614+
sc.result.lastChunkBits = validBits
589615
}
590616

591617
return chunkMasks{}, validBits

0 commit comments

Comments
 (0)