4848import org .openmetadata .schema .security .client .OidcClientConfig ;
4949import org .openmetadata .schema .security .client .OpenMetadataJWTClientConfig ;
5050import org .openmetadata .schema .security .scim .ScimConfiguration ;
51+ import org .openmetadata .schema .service .configuration .elasticsearch .NaturalLanguageSearchConfiguration ;
5152import org .openmetadata .schema .service .configuration .slackApp .SlackAppConfiguration ;
5253import org .openmetadata .schema .services .connections .metadata .AuthProvider ;
5354import org .openmetadata .schema .services .connections .metadata .OpenMetadataConnection ;
7475import org .openmetadata .service .migration .MigrationValidationClient ;
7576import org .openmetadata .service .resources .settings .SettingsCache ;
7677import org .openmetadata .service .search .SearchRepository ;
78+ import org .openmetadata .service .search .vector .client .EmbeddingClient ;
7779import org .openmetadata .service .secrets .SecretsManager ;
7880import org .openmetadata .service .secrets .SecretsManagerFactory ;
7981import org .openmetadata .service .secrets .masker .PasswordEntityMasker ;
@@ -544,6 +546,11 @@ public ValidationResponse validateSystem(
544546 validation .setLogStorage (logStorageValidation );
545547 }
546548
549+ if (Entity .getSearchRepository ().isVectorEmbeddingEnabled ()) {
550+ validation .setAdditionalProperty (
551+ "Semantic Search" , getEmbeddingsValidation (applicationConfig ));
552+ }
553+
547554 addExtraValidations (applicationConfig , validation );
548555
549556 return validation ;
@@ -552,6 +559,133 @@ public ValidationResponse validateSystem(
552559 public void addExtraValidations (
553560 OpenMetadataApplicationConfig applicationConfig , ValidationResponse validation ) {}
554561
562+ private StepValidation getEmbeddingsValidation (OpenMetadataApplicationConfig applicationConfig ) {
563+ StepValidation embeddingsValidation = new StepValidation ();
564+ String description = "Embeddings are used to allow Semantic Search" ;
565+ SearchRepository searchRepository = Entity .getSearchRepository ();
566+
567+ String configMessage = getEmbeddingConfigurationMessage (applicationConfig );
568+
569+ if (searchRepository .getVectorIndexService () == null ) {
570+ return embeddingsValidation
571+ .withDescription (description )
572+ .withMessage ("Embeddings are not configured properly. " + configMessage )
573+ .withPassed (false );
574+ }
575+
576+ try {
577+ searchRepository .ensureVectorIndexDimension ();
578+ } catch (Exception e ) {
579+ LOG .error ("Vector dimension mismatch detected" , e );
580+ return embeddingsValidation
581+ .withDescription (description )
582+ .withMessage ("Vector dimension mismatch: " + e .getMessage ())
583+ .withPassed (false );
584+ }
585+
586+ try {
587+ return validateEmbeddingGeneration (
588+ searchRepository .getEmbeddingClient (), embeddingsValidation , description , configMessage );
589+ } catch (Exception e ) {
590+ LOG .error ("Error during embedding generation validation" , e );
591+ return embeddingsValidation
592+ .withDescription (description )
593+ .withMessage ("Embedding generation failed: " + e .getMessage () + ". " + configMessage )
594+ .withPassed (false );
595+ }
596+ }
597+
598+ private StepValidation validateEmbeddingGeneration (
599+ EmbeddingClient embeddingClient ,
600+ StepValidation embeddingsValidation ,
601+ String description ,
602+ String configMessage ) {
603+ String testText = "OpenMetadata embedding validation test" ;
604+ float [] embedding = embeddingClient .embed (testText );
605+
606+ if (embedding == null ) {
607+ return embeddingsValidation
608+ .withDescription (description )
609+ .withMessage ("Embedding generation returned null. " + configMessage )
610+ .withPassed (false );
611+ }
612+
613+ int expectedDimension = embeddingClient .getDimension ();
614+ if (embedding .length != expectedDimension ) {
615+ return embeddingsValidation
616+ .withDescription (description )
617+ .withMessage (
618+ String .format (
619+ "Embedding dimension mismatch: expected %d, got %d. %s" ,
620+ expectedDimension , embedding .length , configMessage ))
621+ .withPassed (false );
622+ }
623+
624+ boolean allZeros = true ;
625+ for (float value : embedding ) {
626+ if (value != 0.0f ) {
627+ allZeros = false ;
628+ break ;
629+ }
630+ }
631+ if (allZeros ) {
632+ return embeddingsValidation
633+ .withDescription (description )
634+ .withMessage ("Embedding generation returned all zeros. " + configMessage )
635+ .withPassed (false );
636+ }
637+
638+ return embeddingsValidation
639+ .withDescription (description )
640+ .withMessage (String .format ("Embeddings are working correctly. %s" , configMessage ))
641+ .withPassed (true );
642+ }
643+
644+ private String getEmbeddingConfigurationMessage (OpenMetadataApplicationConfig applicationConfig ) {
645+ try {
646+ NaturalLanguageSearchConfiguration nlpConfig =
647+ applicationConfig .getElasticSearchConfiguration ().getNaturalLanguageSearch ();
648+ String provider = nlpConfig .getEmbeddingProvider ();
649+ if (nullOrEmpty (provider )) {
650+ return "Required configuration: embeddingProvider" ;
651+ }
652+
653+ return switch (provider .toLowerCase ()) {
654+ case "djl" -> String .format (
655+ "DJL configuration: embeddingModel: %s" , nlpConfig .getDjl ().getEmbeddingModel ());
656+ case "bedrock" -> String .format (
657+ "Bedrock configuration: region: %s, embeddingModelId: %s, embeddingDimension %s" ,
658+ nlpConfig .getBedrock ().getAwsConfig () != null
659+ ? nlpConfig .getBedrock ().getAwsConfig ().getRegion ()
660+ : "not configured" ,
661+ nlpConfig .getBedrock ().getEmbeddingModelId (),
662+ nlpConfig .getBedrock ().getEmbeddingDimension ());
663+ case "openai" -> {
664+ String openaiEndpoint =
665+ nullOrEmpty (nlpConfig .getOpenai ().getEndpoint ())
666+ ? "api.openai.com"
667+ : nlpConfig .getOpenai ().getEndpoint ();
668+ String deploymentInfo =
669+ nullOrEmpty (nlpConfig .getOpenai ().getDeploymentName ())
670+ ? ""
671+ : String .format (
672+ ", deploymentName: %s" , nlpConfig .getOpenai ().getDeploymentName ());
673+ yield String .format (
674+ "OpenAI configuration: endpoint: %s, embeddingModelId: %s, embeddingDimension: %s%s" ,
675+ openaiEndpoint ,
676+ nlpConfig .getOpenai ().getEmbeddingModelId (),
677+ nlpConfig .getOpenai ().getEmbeddingDimension (),
678+ deploymentInfo );
679+ }
680+ default -> String .format (
681+ "Unknown provider '%s'. Supported providers: djl, bedrock, openai" , provider );
682+ };
683+ } catch (Exception e ) {
684+ LOG .error ("Error getting embedding configuration" , e );
685+ return "Unable to determine embedding configuration" ;
686+ }
687+ }
688+
555689 private StepValidation getDatabaseValidation (OpenMetadataApplicationConfig applicationConfig ) {
556690 try {
557691 dao .testConnection ();
0 commit comments