@@ -1373,6 +1373,40 @@ func rebuildVectorIndex(ctx context.Context, factorySpecs []*tok.FactoryCreateSp
13731373 return err
13741374 }
13751375
1376+ numVectorsToCheck := 100
1377+ lenFreq := make (map [int ]int , numVectorsToCheck )
1378+ maxFreq := 0
1379+ dimension := 0
1380+ MemLayerInstance .IterateDisk (ctx , IterateDiskArgs {
1381+ Prefix : pk .DataPrefix (),
1382+ ReadTs : rb .StartTs ,
1383+ AllVersions : false ,
1384+ Reverse : false ,
1385+ CheckInclusion : func (uid uint64 ) error {
1386+ return nil
1387+ },
1388+ Function : func (l * List , pk x.ParsedKey ) error {
1389+ val , err := l .Value (rb .StartTs )
1390+ if err != nil {
1391+ return err
1392+ }
1393+ inVec := types .BytesAsFloatArray (val .Value .([]byte ))
1394+ lenFreq [len (inVec )] += 1
1395+ if lenFreq [len (inVec )] > maxFreq {
1396+ maxFreq = lenFreq [len (inVec )]
1397+ dimension = len (inVec )
1398+ }
1399+ numVectorsToCheck -= 1
1400+ if numVectorsToCheck <= 0 {
1401+ return ErrStopIteration
1402+ }
1403+ return nil
1404+ },
1405+ StartKey : x .DataKey (rb .Attr , 0 ),
1406+ })
1407+
1408+ fmt .Println ("Selecting vector dimension to be:" , dimension )
1409+
13761410 if indexer .NumSeedVectors () > 0 {
13771411 count := 0
13781412 MemLayerInstance .IterateDisk (ctx , IterateDiskArgs {
@@ -1389,6 +1423,9 @@ func rebuildVectorIndex(ctx context.Context, factorySpecs []*tok.FactoryCreateSp
13891423 return err
13901424 }
13911425 inVec := types .BytesAsFloatArray (val .Value .([]byte ))
1426+ if len (inVec ) != dimension {
1427+ return nil
1428+ }
13921429 count += 1
13931430 indexer .AddSeedVector (inVec )
13941431 if count == indexer .NumSeedVectors () {
@@ -1423,6 +1460,9 @@ func rebuildVectorIndex(ctx context.Context, factorySpecs []*tok.FactoryCreateSp
14231460 }
14241461
14251462 inVec := types .BytesAsFloatArray (val .Value .([]byte ))
1463+ if len (inVec ) != dimension {
1464+ return []* pb.DirectedEdge {}, nil
1465+ }
14261466 indexer .BuildInsert (ctx , uid , inVec )
14271467 return edges , nil
14281468 }
@@ -1449,6 +1489,12 @@ func rebuildVectorIndex(ctx context.Context, factorySpecs []*tok.FactoryCreateSp
14491489 }
14501490
14511491 inVec := types .BytesAsFloatArray (val .Value .([]byte ))
1492+ if len (inVec ) != dimension {
1493+ if pass_idx == 0 {
1494+ glog .Warningf ("Skipping vector with invalid dimension uid: %d, dimension: %d" , uid , len (inVec ))
1495+ }
1496+ return []* pb.DirectedEdge {}, nil
1497+ }
14521498 indexer .BuildInsert (ctx , uid , inVec )
14531499 return edges , nil
14541500 }
0 commit comments