1111
1212import com .carrotsearch .randomizedtesting .generators .RandomPicks ;
1313
14- import org .apache .lucene .analysis .standard .StandardAnalyzer ;
1514import org .apache .lucene .codecs .Codec ;
1615import org .apache .lucene .codecs .KnnVectorsFormat ;
1716import org .apache .lucene .document .BinaryDocValuesField ;
1817import org .apache .lucene .document .KnnByteVectorField ;
1918import org .apache .lucene .document .KnnFloatVectorField ;
20- import org .apache .lucene .index .DirectoryReader ;
21- import org .apache .lucene .index .IndexWriterConfig ;
2219import org .apache .lucene .index .IndexableField ;
2320import org .apache .lucene .index .VectorEncoding ;
2421import org .apache .lucene .index .VectorSimilarityFunction ;
2522import org .apache .lucene .search .FieldExistsQuery ;
2623import org .apache .lucene .search .Query ;
27- import org .apache .lucene .tests .index .RandomIndexWriter ;
2824import org .apache .lucene .util .BytesRef ;
2925import org .apache .lucene .util .VectorUtil ;
30- import org .elasticsearch .common .Strings ;
3126import org .elasticsearch .common .bytes .BytesReference ;
32- import org .elasticsearch .common .settings .Settings ;
3327import org .elasticsearch .common .util .BigArrays ;
3428import org .elasticsearch .common .xcontent .XContentHelper ;
35- import org .elasticsearch .index .IndexSettings ;
3629import org .elasticsearch .index .IndexVersion ;
3730import org .elasticsearch .index .IndexVersions ;
3831import org .elasticsearch .index .codec .CodecService ;
4639import org .elasticsearch .index .mapper .MapperBuilderContext ;
4740import org .elasticsearch .index .mapper .MapperParsingException ;
4841import org .elasticsearch .index .mapper .MapperService ;
49- import org .elasticsearch .index .mapper .MapperTestCase ;
5042import org .elasticsearch .index .mapper .ParsedDocument ;
5143import org .elasticsearch .index .mapper .SourceToParse ;
5244import org .elasticsearch .index .mapper .ValueFetcher ;
6153import org .elasticsearch .test .ESTestCase ;
6254import org .elasticsearch .test .index .IndexVersionUtils ;
6355import org .elasticsearch .xcontent .XContentBuilder ;
64- import org .elasticsearch .xcontent .XContentType ;
6556import org .junit .AssumptionViolatedException ;
6657
6758import java .io .IOException ;
7465import static org .apache .lucene .codecs .lucene99 .Lucene99HnswVectorsFormat .DEFAULT_BEAM_WIDTH ;
7566import static org .apache .lucene .codecs .lucene99 .Lucene99HnswVectorsFormat .DEFAULT_MAX_CONN ;
7667import static org .apache .lucene .tests .index .BaseKnnVectorsFormatTestCase .randomNormalizedVector ;
77- import static org .elasticsearch .index .IndexSettings .SYNTHETIC_VECTORS ;
7868import static org .elasticsearch .index .codec .vectors .IVFVectorsFormat .DYNAMIC_NPROBE ;
7969import static org .elasticsearch .index .mapper .vectors .DenseVectorFieldMapper .DEFAULT_OVERSAMPLE ;
8070import static org .elasticsearch .index .mapper .vectors .DenseVectorFieldMapper .IVF_FORMAT ;
81- import static org .elasticsearch .test .hamcrest .ElasticsearchAssertions .assertToXContentEquivalent ;
8271import static org .hamcrest .Matchers .containsString ;
8372import static org .hamcrest .Matchers .equalTo ;
8473import static org .hamcrest .Matchers .instanceOf ;
8574import static org .mockito .Mockito .mock ;
8675import static org .mockito .Mockito .when ;
8776
88- public class DenseVectorFieldMapperTests extends MapperTestCase {
77+ public class DenseVectorFieldMapperTests extends SyntheticVectorsMapperTestCase {
8978
9079 private static final IndexVersion INDEXED_BY_DEFAULT_PREVIOUS_INDEX_VERSION = IndexVersions .V_8_10_0 ;
9180 private final ElementType elementType ;
@@ -95,7 +84,7 @@ public class DenseVectorFieldMapperTests extends MapperTestCase {
9584
9685 public DenseVectorFieldMapperTests () {
9786 this .elementType = randomFrom (ElementType .BYTE , ElementType .FLOAT , ElementType .BIT );
98- this .indexed = randomBoolean ();
87+ this .indexed = usually ();
9988 this .indexOptionsSet = this .indexed && randomBoolean ();
10089 int baseDims = ElementType .BIT == elementType ? 4 * Byte .SIZE : 4 ;
10190 int randomMultiplier = ElementType .FLOAT == elementType ? randomIntBetween (1 , 64 ) : 1 ;
@@ -160,17 +149,25 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I
160149 protected Object getSampleValueForDocument () {
161150 return elementType == ElementType .FLOAT
162151 ? convertToList (randomNormalizedVector (this .dims ))
163- : List . of (( byte ) 1 , ( byte ) 1 , ( byte ) 1 , ( byte ) 1 );
152+ : convertToList ( randomByteArrayOfLength ( elementType == ElementType . BIT ? this . dims / Byte . SIZE : dims ) );
164153 }
165154
166- private static List <Float > convertToList (float [] vector ) {
155+ public static List <Float > convertToList (float [] vector ) {
167156 List <Float > list = new ArrayList <>(vector .length );
168157 for (float v : vector ) {
169158 list .add (v );
170159 }
171160 return list ;
172161 }
173162
163+ public static List <Byte > convertToList (byte [] vector ) {
164+ List <Byte > list = new ArrayList <>(vector .length );
165+ for (byte v : vector ) {
166+ list .add (v );
167+ }
168+ return list ;
169+ }
170+
174171 @ Override
175172 protected void registerParameters (ParameterChecker checker ) throws IOException {
176173 checker .registerConflictCheck (
@@ -2920,249 +2917,6 @@ public void testInvalidVectorDimensions() {
29202917 }
29212918 }
29222919
2923- public void testSyntheticVectorsMinimalValidDocument () throws IOException {
2924- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2925- for (XContentType type : XContentType .values ()) {
2926- BytesReference source = generateRandomDoc (type , true , true , false , false , false );
2927- assertSyntheticVectors (buildVectorMapping (), source , type );
2928- }
2929- }
2930-
2931- public void testSyntheticVectorsFullDocument () throws IOException {
2932- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2933- for (XContentType type : XContentType .values ()) {
2934- BytesReference source = generateRandomDoc (type , true , true , true , true , false );
2935- assertSyntheticVectors (buildVectorMapping (), source , type );
2936- }
2937- }
2938-
2939- public void testSyntheticVectorsWithUnmappedFields () throws IOException {
2940- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2941- for (XContentType type : XContentType .values ()) {
2942- BytesReference source = generateRandomDoc (type , true , true , true , true , true );
2943- assertSyntheticVectors (buildVectorMapping (), source , type );
2944- }
2945- }
2946-
2947- public void testSyntheticVectorsMissingRootFields () throws IOException {
2948- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2949- for (XContentType type : XContentType .values ()) {
2950- BytesReference source = generateRandomDoc (type , false , false , false , false , false );
2951- assertSyntheticVectors (buildVectorMapping (), source , type );
2952- }
2953- }
2954-
2955- public void testSyntheticVectorsPartialNestedContent () throws IOException {
2956- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2957- for (XContentType type : XContentType .values ()) {
2958- BytesReference source = generateRandomDoc (type , true , true , true , false , false );
2959- assertSyntheticVectors (buildVectorMapping (), source , type );
2960- }
2961- }
2962-
2963- public void testFlatPathDocument () throws IOException {
2964- assumeTrue ("feature flag must be enabled for synthetic vectors" , SYNTHETIC_VECTORS );
2965- for (XContentType type : XContentType .values ()) {
2966- BytesReference source = generateRandomDocWithFlatPath (type );
2967- assertSyntheticVectors (buildVectorMapping (), source , type );
2968- }
2969- }
2970-
2971- private static String buildVectorMapping () throws IOException {
2972- try (XContentBuilder builder = XContentBuilder .builder (XContentType .JSON .xContent ())) {
2973- builder .startObject (); // root
2974- builder .startObject ("_doc" );
2975- builder .field ("dynamic" , "false" );
2976-
2977- builder .startObject ("properties" );
2978-
2979- // field
2980- builder .startObject ("field" );
2981- builder .field ("type" , "keyword" );
2982- builder .endObject ();
2983-
2984- // emb
2985- builder .startObject ("emb" );
2986- builder .field ("type" , "dense_vector" );
2987- builder .field ("dims" , 3 );
2988- builder .field ("similarity" , "cosine" );
2989- builder .endObject ();
2990-
2991- // another_field
2992- builder .startObject ("another_field" );
2993- builder .field ("type" , "keyword" );
2994- builder .endObject ();
2995-
2996- // obj
2997- builder .startObject ("obj" );
2998- builder .startObject ("properties" );
2999-
3000- // nested
3001- builder .startObject ("nested" );
3002- builder .field ("type" , "nested" );
3003- builder .startObject ("properties" );
3004-
3005- // nested.field
3006- builder .startObject ("field" );
3007- builder .field ("type" , "keyword" );
3008- builder .endObject ();
3009-
3010- // nested.emb
3011- builder .startObject ("emb" );
3012- builder .field ("type" , "dense_vector" );
3013- builder .field ("dims" , 3 );
3014- builder .field ("similarity" , "cosine" );
3015- builder .endObject ();
3016-
3017- // double_nested
3018- builder .startObject ("double_nested" );
3019- builder .field ("type" , "nested" );
3020- builder .startObject ("properties" );
3021-
3022- // double_nested.field
3023- builder .startObject ("field" );
3024- builder .field ("type" , "keyword" );
3025- builder .endObject ();
3026-
3027- // double_nested.emb
3028- builder .startObject ("emb" );
3029- builder .field ("type" , "dense_vector" );
3030- builder .field ("dims" , 3 );
3031- builder .field ("similarity" , "cosine" );
3032- builder .endObject ();
3033-
3034- builder .endObject (); // double_nested.properties
3035- builder .endObject (); // double_nested
3036-
3037- builder .endObject (); // nested.properties
3038- builder .endObject (); // nested
3039-
3040- builder .endObject (); // obj.properties
3041- builder .endObject (); // obj
3042-
3043- builder .endObject (); // properties
3044- builder .endObject (); // _doc
3045- builder .endObject (); // root
3046-
3047- return Strings .toString (builder );
3048- }
3049- }
3050-
3051- private BytesReference generateRandomDoc (
3052- XContentType xContentType ,
3053- boolean includeRootField ,
3054- boolean includeVector ,
3055- boolean includeNested ,
3056- boolean includeDoubleNested ,
3057- boolean includeUnmapped
3058- ) throws IOException {
3059- try (var builder = XContentBuilder .builder (xContentType .xContent ())) {
3060- builder .startObject ();
3061-
3062- if (includeRootField ) {
3063- builder .field ("field" , randomAlphaOfLengthBetween (1 , 2 ));
3064- }
3065-
3066- if (includeVector ) {
3067- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3068- }
3069-
3070- if (includeUnmapped ) {
3071- builder .field ("unmapped_field" , "extra" );
3072- }
3073-
3074- builder .startObject ("obj" );
3075- if (includeNested ) {
3076- builder .startArray ("nested" );
3077-
3078- // Entry with just a field
3079- builder .startObject ();
3080- builder .field ("field" , randomAlphaOfLengthBetween (3 , 6 ));
3081- builder .endObject ();
3082-
3083- // Empty object
3084- builder .startObject ();
3085- builder .endObject ();
3086-
3087- // Entry with emb and double_nested
3088- if (includeDoubleNested ) {
3089- builder .startObject ();
3090- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3091- builder .field ("field" , "nested_val" );
3092- builder .startArray ("double_nested" );
3093- for (int i = 0 ; i < 2 ; i ++) {
3094- builder .startObject ();
3095- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3096- builder .field ("field" , "dn_field" );
3097- builder .endObject ();
3098- }
3099- builder .endArray ();
3100- builder .endObject ();
3101- }
3102-
3103- builder .endArray ();
3104- }
3105- builder .endObject ();
3106-
3107- builder .endObject ();
3108- return BytesReference .bytes (builder );
3109- }
3110- }
3111-
3112- private BytesReference generateRandomDocWithFlatPath (XContentType xContentType ) throws IOException {
3113- try (var builder = XContentBuilder .builder (xContentType .xContent ())) {
3114- builder .startObject ();
3115-
3116- // Root-level fields
3117- builder .field ("field" , randomAlphaOfLengthBetween (1 , 2 ));
3118- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3119- builder .field ("another_field" , randomAlphaOfLengthBetween (3 , 5 ));
3120-
3121- // Simulated flattened "obj.nested"
3122- builder .startObject ("obj.nested" );
3123-
3124- builder .field ("field" , randomAlphaOfLengthBetween (4 , 8 ));
3125- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3126-
3127- builder .startArray ("double_nested" );
3128- for (int i = 0 ; i < randomIntBetween (1 , 2 ); i ++) {
3129- builder .startObject ();
3130- builder .field ("field" , randomAlphaOfLengthBetween (4 , 8 ));
3131- builder .array ("emb" , new float [] { 1 , 2 , 3 });
3132- builder .endObject ();
3133- }
3134- builder .endArray ();
3135-
3136- builder .endObject (); // end obj.nested
3137-
3138- builder .endObject ();
3139- return BytesReference .bytes (builder );
3140- }
3141- }
3142-
3143- private void assertSyntheticVectors (String mapping , BytesReference source , XContentType xContentType ) throws IOException {
3144- var settings = Settings .builder ().put (IndexSettings .INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING .getKey (), true ).build ();
3145- MapperService mapperService = createMapperService (settings , mapping );
3146- var parsedDoc = mapperService .documentMapper ().parse (new SourceToParse ("0" , source , xContentType ));
3147- try (var directory = newDirectory ()) {
3148- IndexWriterConfig config = newIndexWriterConfig (random (), new StandardAnalyzer ());
3149- try (var iw = new RandomIndexWriter (random (), directory , config )) {
3150- parsedDoc .updateSeqID (0 , 1 );
3151- parsedDoc .version ().setLongValue (0 );
3152- iw .addDocuments (parsedDoc .docs ());
3153- }
3154- try (var indexReader = wrapInMockESDirectoryReader (DirectoryReader .open (directory ))) {
3155- var provider = SourceProvider .fromLookup (
3156- mapperService .mappingLookup (),
3157- null ,
3158- mapperService .getMapperMetrics ().sourceFieldMetrics ()
3159- );
3160- var searchSource = provider .getSource (indexReader .leaves ().get (0 ), parsedDoc .docs ().size () - 1 );
3161- assertToXContentEquivalent (source , searchSource .internalSourceRef (), xContentType );
3162- }
3163- }
3164- }
3165-
31662920 @ Override
31672921 protected IngestScriptSupport ingestScriptSupport () {
31682922 throw new AssumptionViolatedException ("not supported" );
0 commit comments