@@ -1160,3 +1160,165 @@ func popcount(x uint64) int {
11601160 }
11611161 return count
11621162}
1163+
1164+ // ============================================================================
1165+ // TestPrefixXOR - Test prefix XOR computation (PCLMULQDQ optimization)
1166+ // ============================================================================
1167+
1168+ func TestPrefixXOR (t * testing.T ) {
1169+ // prefixXOR computes the cumulative XOR: result[i] = XOR of bits 0..i
1170+ // This is used for quote region detection: bit i is set if there's an
1171+ // odd number of quotes at positions 0 through i (inclusive).
1172+ tests := []struct {
1173+ name string
1174+ input uint64
1175+ want uint64
1176+ }{
1177+ {
1178+ name : "empty" ,
1179+ input : 0 ,
1180+ want : 0 , // no quotes = no regions
1181+ },
1182+ {
1183+ name : "single_bit_0" ,
1184+ input : 1 , // quote at position 0
1185+ // All positions 0..63 have odd count (1) → all bits set
1186+ want : 0xFFFFFFFFFFFFFFFF ,
1187+ },
1188+ {
1189+ name : "single_bit_1" ,
1190+ input : 2 , // quote at position 1 (0b10)
1191+ // pos 0: 0 quotes → 0; pos 1+: 1 quote → 1
1192+ want : 0xFFFFFFFFFFFFFFFE ,
1193+ },
1194+ {
1195+ name : "single_bit_2" ,
1196+ input : 4 , // quote at position 2 (0b100)
1197+ // pos 0,1: 0 quotes → 0; pos 2+: 1 quote → 1
1198+ want : 0xFFFFFFFFFFFFFFFC ,
1199+ },
1200+ {
1201+ name : "two_adjacent_bits" ,
1202+ input : 0b11 , // quotes at positions 0,1
1203+ // pos 0: 1 quote → 1; pos 1+: 2 quotes → 0
1204+ want : 0x0000000000000001 ,
1205+ },
1206+ {
1207+ name : "alternating_bits_8bit" ,
1208+ input : 0xAA , // 0b10101010 = quotes at positions 1,3,5,7
1209+ // pos 0: 0→0, pos 1: 1→1, pos 2: 1→1, pos 3: 2→0,
1210+ // pos 4: 2→0, pos 5: 3→1, pos 6: 3→1, pos 7: 4→0
1211+ // Low 8 bits: 0b01100110 = 0x66
1212+ // pos 8+: 4 quotes (even) → 0
1213+ want : 0x0000000000000066 ,
1214+ },
1215+ {
1216+ name : "quote_example" ,
1217+ input : 0x4A , // 0b01001010 = quotes at positions 1, 3, 6
1218+ // pos 0: 0→0, pos 1: 1→1, pos 2: 1→1, pos 3: 2→0,
1219+ // pos 4: 2→0, pos 5: 2→0, pos 6: 3→1, pos 7+: 3→1 (odd)
1220+ // Low 8 bits: 0b11000110 = 0xC6
1221+ // Upper bits: all 1 (odd count continues)
1222+ want : 0xFFFFFFFFFFFFFFC6 ,
1223+ },
1224+ {
1225+ name : "all_ones_8bit" ,
1226+ input : 0xFF , // quotes at all positions 0-7
1227+ // pos 0: 1→1, pos 1: 2→0, pos 2: 3→1, pos 3: 4→0, ...
1228+ // Pattern: 10101010... = 0x55 for low 8 bits
1229+ // pos 8+: 8 quotes (even) → 0
1230+ want : 0x0000000000000055 ,
1231+ },
1232+ {
1233+ name : "high_bit_only" ,
1234+ input : uint64 (1 ) << 63 , // quote at position 63 only
1235+ // pos 0-62: 0 quotes → 0; pos 63: 1 quote → 1
1236+ want : 0x8000000000000000 ,
1237+ },
1238+ {
1239+ name : "bits_0_and_63" ,
1240+ input : 1 | (uint64 (1 ) << 63 ), // quotes at positions 0 and 63
1241+ // pos 0: 1→1, pos 1-62: 1→1 (odd), pos 63: 2→0
1242+ want : 0x7FFFFFFFFFFFFFFF ,
1243+ },
1244+ }
1245+
1246+ for _ , tt := range tests {
1247+ t .Run (tt .name , func (t * testing.T ) {
1248+ got := prefixXOR (tt .input )
1249+ if got != tt .want {
1250+ t .Errorf ("prefixXOR(0x%016x) = 0x%016x, want 0x%016x" ,
1251+ tt .input , got , tt .want )
1252+ }
1253+ })
1254+ }
1255+ }
1256+
1257+ // TestPrefixXORQuoteRegions tests the actual use case: converting quote positions to regions
1258+ func TestPrefixXORQuoteRegions (t * testing.T ) {
1259+ // prefixXOR result[i] = 1 means odd number of quotes at positions 0..i (inclusive)
1260+ // In CSV parsing context:
1261+ // - Position i has inQuote[i]=1 if we're at or after an opening quote but not past a closing quote
1262+ // - Quote chars themselves are considered "in quote" for masking purposes
1263+ tests := []struct {
1264+ name string
1265+ quotePos []int // positions where quotes appear
1266+ wantInQuote []int // positions where inQuote bit should be 1
1267+ wantOutQuote []int // positions where inQuote bit should be 0
1268+ initialQuoted bool // if true, we start inside a quoted region
1269+ }{
1270+ {
1271+ name : "simple_quoted_field" ,
1272+ quotePos : []int {0 , 5 }, // "hello" - quotes at 0 and 5
1273+ // pos 0: 1 quote → 1; pos 1-4: 1 quote → 1; pos 5: 2 quotes → 0
1274+ wantInQuote : []int {0 , 1 , 2 , 3 , 4 },
1275+ wantOutQuote : []int {5 , 6 , 7 , 8 },
1276+ },
1277+ {
1278+ name : "two_quoted_fields" ,
1279+ quotePos : []int {0 , 3 , 5 , 8 }, // "ab","cd" - pattern at 0,3,5,8
1280+ // pos 0-2: 1 quote → 1; pos 3-4: 2 quotes → 0; pos 5-7: 3 quotes → 1; pos 8+: 4 quotes → 0
1281+ wantInQuote : []int {0 , 1 , 2 , 5 , 6 , 7 },
1282+ wantOutQuote : []int {3 , 4 , 8 , 9 , 10 },
1283+ },
1284+ {
1285+ name : "start_inside_quote" ,
1286+ quotePos : []int {5 }, // closing quote at 5
1287+ // Without inversion: pos 0-4: 0 quotes → 0; pos 5+: 1 quote → 1
1288+ // With inversion (initialQuoted=true): pos 0-4: 1; pos 5+: 0
1289+ wantInQuote : []int {0 , 1 , 2 , 3 , 4 },
1290+ wantOutQuote : []int {5 , 6 , 7 , 8 },
1291+ initialQuoted : true ,
1292+ },
1293+ }
1294+
1295+ for _ , tt := range tests {
1296+ t .Run (tt .name , func (t * testing.T ) {
1297+ // Build quote mask
1298+ var quoteMask uint64
1299+ for _ , pos := range tt .quotePos {
1300+ quoteMask |= uint64 (1 ) << pos
1301+ }
1302+
1303+ // Compute in-quote mask using prefixXOR
1304+ inQuote := prefixXOR (quoteMask )
1305+ if tt .initialQuoted {
1306+ inQuote = ^ inQuote
1307+ }
1308+
1309+ // Verify expected in-quote positions
1310+ for _ , pos := range tt .wantInQuote {
1311+ if inQuote & (uint64 (1 )<< pos ) == 0 {
1312+ t .Errorf ("position %d should be inside quotes (bit=1), but bit is 0. inQuote=0x%016x" , pos , inQuote )
1313+ }
1314+ }
1315+
1316+ // Verify expected out-quote positions
1317+ for _ , pos := range tt .wantOutQuote {
1318+ if inQuote & (uint64 (1 )<< pos ) != 0 {
1319+ t .Errorf ("position %d should be outside quotes (bit=0), but bit is 1. inQuote=0x%016x" , pos , inQuote )
1320+ }
1321+ }
1322+ })
1323+ }
1324+ }
0 commit comments