From 1e56a21ba26a063e41e03ca2cc5bac2eff2cadb0 Mon Sep 17 00:00:00 2001 From: mikolaj Date: Wed, 14 May 2025 17:04:05 +0200 Subject: [PATCH 1/5] IBX-9846: Added search using embeddings --- .../Configuration/Parser/Embeddings.php | 68 ++++++++ src/bundle/Core/IbexaCoreBundle.php | 1 + .../Resources/config/default_settings.yml | 3 + .../Core/Resources/config/embeddings.yml | 24 +++ src/bundle/Core/Resources/config/services.yml | 1 + .../Values/Content/EmbeddingQuery.php | 116 +++++++++++++ .../Values/Content/EmbeddingQueryBuilder.php | 77 +++++++++ .../Repository/Values/Content/Query.php | 7 +- .../Values/Content/Query/Embedding.php | 31 ++++ .../Content/QueryValidatorInterface.php | 12 ++ .../EmbeddingConfigurationInterface.php | 41 +++++ .../Embedding/EmbeddingProviderInterface.php | 17 ++ .../EmbeddingProviderRegistryInterface.php | 21 +++ .../EmbeddingProviderResolverInterface.php | 14 ++ .../Search/FieldType/EmbeddingField.php | 25 +++ .../FieldType/EmbeddingFieldFactory.php | 30 ++++ .../settings/search_engines/common.yml | 3 + .../search_engines/field_value_mappers.yml | 4 + .../FieldValueMapper/EmbeddingMapper.php | 27 +++ .../Embedding/EmbeddingConfiguration.php | 81 +++++++++ .../Embedding/EmbeddingProviderRegistry.php | 45 +++++ .../Embedding/EmbeddingProviderResolver.php | 43 +++++ .../Configuration/Parser/EmbeddingsTest.php | 163 ++++++++++++++++++ .../Content/EmbeddingQueryBuilderTest.php | 79 +++++++++ .../FieldType/EmbeddingFieldFactoryTest.php | 53 ++++++ .../Embedding/EmbeddingConfigurationTest.php | 140 +++++++++++++++ .../EmbeddingProviderRegistryTest.php | 76 ++++++++ .../EmbeddingProviderResolverTest.php | 82 +++++++++ 28 files changed, 1283 insertions(+), 1 deletion(-) create mode 100644 src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php create mode 100644 src/bundle/Core/Resources/config/embeddings.yml create mode 100644 src/contracts/Repository/Values/Content/EmbeddingQuery.php create mode 100644 src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php create mode 100644 src/contracts/Repository/Values/Content/Query/Embedding.php create mode 100644 src/contracts/Repository/Values/Content/QueryValidatorInterface.php create mode 100644 src/contracts/Search/Embedding/EmbeddingConfigurationInterface.php create mode 100644 src/contracts/Search/Embedding/EmbeddingProviderInterface.php create mode 100644 src/contracts/Search/Embedding/EmbeddingProviderRegistryInterface.php create mode 100644 src/contracts/Search/Embedding/EmbeddingProviderResolverInterface.php create mode 100644 src/contracts/Search/FieldType/EmbeddingField.php create mode 100644 src/contracts/Search/FieldType/EmbeddingFieldFactory.php create mode 100644 src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php create mode 100644 src/lib/Search/Embedding/EmbeddingConfiguration.php create mode 100644 src/lib/Search/Embedding/EmbeddingProviderRegistry.php create mode 100644 src/lib/Search/Embedding/EmbeddingProviderResolver.php create mode 100644 tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php create mode 100644 tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php create mode 100644 tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php create mode 100644 tests/lib/Search/Embedding/EmbeddingConfigurationTest.php create mode 100644 tests/lib/Search/Embedding/EmbeddingProviderRegistryTest.php create mode 100644 tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php diff --git a/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php b/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php new file mode 100644 index 0000000000..df56afcbde --- /dev/null +++ b/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php @@ -0,0 +1,68 @@ +arrayNode('embedding_models') + ->normalizeKeys(false) + ->info('Defines available embedding models') + ->arrayPrototype() + ->children() + ->scalarNode('name')->isRequired()->end() + ->integerNode('dimensions')->isRequired()->end() + ->scalarNode('field_suffix')->isRequired()->end() + ->scalarNode('embedding_provider')->isRequired()->end() + ->end() + ->end() + ->end() + ->scalarNode('default_embedding_model') + ->info('Default embedding model identifier') + ->defaultValue('text-embedding-ada-002') + ->end(); + } + + /** + * @param array $config + */ + public function preMap(array $config, ContextualizerInterface $contextualizer): void + { + $contextualizer->mapConfigArray('embedding_models', $config); + $contextualizer->mapSetting('default_embedding_model', $config); + } + + /** + * @param array $scopeSettings + */ + public function mapConfig(array &$scopeSettings, $currentScope, ContextualizerInterface $contextualizer): void + { + // Nothing to do here. + } +} diff --git a/src/bundle/Core/IbexaCoreBundle.php b/src/bundle/Core/IbexaCoreBundle.php index be9b0ab962..c063430758 100644 --- a/src/bundle/Core/IbexaCoreBundle.php +++ b/src/bundle/Core/IbexaCoreBundle.php @@ -123,6 +123,7 @@ public function getContainerExtension() new ConfigParser\UrlChecker(), new ConfigParser\TwigVariablesParser(), new ConfigParser\UserContentTypeIdentifier(), + new ConfigParser\Embeddings(), ], [ new RepositoryConfigParser\Storage(), diff --git a/src/bundle/Core/Resources/config/default_settings.yml b/src/bundle/Core/Resources/config/default_settings.yml index 18e683ada3..6dc43995e5 100644 --- a/src/bundle/Core/Resources/config/default_settings.yml +++ b/src/bundle/Core/Resources/config/default_settings.yml @@ -273,3 +273,6 @@ parameters: writeFlags: ~ linkHandling: ~ permissions: [ ] + + ibexa.site_access.config.default.embedding_models: [] + ibexa.site_access.config.default.default_embedding_model: 'text-embedding-ada-002' diff --git a/src/bundle/Core/Resources/config/embeddings.yml b/src/bundle/Core/Resources/config/embeddings.yml new file mode 100644 index 0000000000..569a1c5da4 --- /dev/null +++ b/src/bundle/Core/Resources/config/embeddings.yml @@ -0,0 +1,24 @@ +services: + _defaults: + autowire: true + autoconfigure: true + public: false + + Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderRegistryInterface: + alias: Ibexa\Core\Search\Embedding\EmbeddingProviderRegistry + + Ibexa\Core\Search\Embedding\EmbeddingProviderRegistry: + arguments: + $embeddingProviders: !tagged_iterator { tag: 'ibexa.embedding_provider', index_by: 'provider_name' } + + Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderResolverInterface: + alias: Ibexa\Core\Search\Embedding\EmbeddingProviderResolver + + Ibexa\Core\Search\Embedding\EmbeddingProviderResolver: ~ + + Ibexa\Contracts\Core\Search\Embedding\EmbeddingConfigurationInterface: + alias: Ibexa\Core\Search\Embedding\EmbeddingConfiguration + + Ibexa\Core\Search\Embedding\EmbeddingConfiguration: ~ + + Ibexa\Contracts\Core\Search\FieldType\EmbeddingFieldFactory: ~ diff --git a/src/bundle/Core/Resources/config/services.yml b/src/bundle/Core/Resources/config/services.yml index 8babb6f451..170fe884f2 100644 --- a/src/bundle/Core/Resources/config/services.yml +++ b/src/bundle/Core/Resources/config/services.yml @@ -1,5 +1,6 @@ imports: - { resource: commands.yml } + - { resource: embeddings.yml } parameters: ibexa.site_access.default.name: default diff --git a/src/contracts/Repository/Values/Content/EmbeddingQuery.php b/src/contracts/Repository/Values/Content/EmbeddingQuery.php new file mode 100644 index 0000000000..b0e4a957bb --- /dev/null +++ b/src/contracts/Repository/Values/Content/EmbeddingQuery.php @@ -0,0 +1,116 @@ +embedding; + } + + public function setEmbedding(?Embedding $embedding): void + { + $this->embedding = $embedding; + } + + public function getFilter(): ?Criterion + { + return $this->filter; + } + + public function setFilter(Criterion $filter): void + { + $this->filter = $filter; + } + + /** + * @return \Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation[] + */ + public function getAggregations(): array + { + return $this->aggregations; + } + + /** + * @param \Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation[] $aggregations + */ + public function setAggregations(array $aggregations): void + { + $this->aggregations = $aggregations; + } + + public function getOffset(): int + { + return $this->offset; + } + + public function setOffset(int $offset): void + { + $this->offset = $offset; + } + + public function getLimit(): int + { + return $this->limit; + } + + public function setLimit(int $limit): void + { + $this->limit = $limit; + } + + public function setPerformCount(bool $performCount): void + { + $this->performCount = $performCount; + } + + public function getPerformCount(): bool + { + return $this->performCount; + } + + public function isValid(): bool + { + $invalid = []; + + if ($this->query !== null) { + $invalid[] = 'query'; + } + if (!empty($this->sortClauses)) { + $invalid[] = 'sortClauses'; + } + if (!empty($this->facetBuilders)) { + $invalid[] = 'facetBuilders'; + } + if ($this->spellcheck !== null) { + $invalid[] = 'spellcheck'; + } + + if (count($invalid) > 0) { + throw new InvalidArgumentException( + sprintf( + 'EmbeddingQuery may not set [%s].', + implode(', ', $invalid) + ) + ); + } + + return true; + } +} diff --git a/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php b/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php new file mode 100644 index 0000000000..e932d4b5ae --- /dev/null +++ b/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php @@ -0,0 +1,77 @@ +query = new EmbeddingQuery(); + } + + public static function create(): self + { + return new self(); + } + + public function withEmbedding(Embedding $embed): self + { + $this->query->setEmbedding($embed); + + return $this; + } + + public function setLimit(int $limit): self + { + $this->query->setLimit($limit); + + return $this; + } + + public function setOffset(int $offset): self + { + $this->query->setOffset($offset); + + return $this; + } + + public function setFilter(Criterion $filter): self + { + $this->query->setFilter($filter); + + return $this; + } + + /** + * @param array<\Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation> $aggregations + */ + public function setAggregations(array $aggregations): self + { + $this->query->setAggregations($aggregations); + + return $this; + } + + public function setPerformCount(bool $performCount): self + { + $this->query->setPerformCount($performCount); + + return $this; + } + + public function build(): EmbeddingQuery + { + return $this->query; + } +} diff --git a/src/contracts/Repository/Values/Content/Query.php b/src/contracts/Repository/Values/Content/Query.php index ebcb9aebc7..c32f98826a 100644 --- a/src/contracts/Repository/Values/Content/Query.php +++ b/src/contracts/Repository/Values/Content/Query.php @@ -14,7 +14,7 @@ /** * This class is used to perform a Content query. */ -class Query extends ValueObject +class Query extends ValueObject implements QueryValidatorInterface { public const SORT_ASC = 'ascending'; public const SORT_DESC = 'descending'; @@ -102,6 +102,11 @@ class Query extends ValueObject * @var bool */ public $performCount = true; + + public function isValid(): bool + { + return true; + } } class_alias(Query::class, 'eZ\Publish\API\Repository\Values\Content\Query'); diff --git a/src/contracts/Repository/Values/Content/Query/Embedding.php b/src/contracts/Repository/Values/Content/Query/Embedding.php new file mode 100644 index 0000000000..21a4c00a5d --- /dev/null +++ b/src/contracts/Repository/Values/Content/Query/Embedding.php @@ -0,0 +1,31 @@ +value = $value; + } + + /** @return float[] */ + public function getValue(): array + { + return $this->value; + } +} diff --git a/src/contracts/Repository/Values/Content/QueryValidatorInterface.php b/src/contracts/Repository/Values/Content/QueryValidatorInterface.php new file mode 100644 index 0000000000..4a3ae23f4f --- /dev/null +++ b/src/contracts/Repository/Values/Content/QueryValidatorInterface.php @@ -0,0 +1,12 @@ + + */ + public function getEmbeddingModels(): array; + + /** + * @return string[] + */ + public function getEmbeddingModelIdentifiers(): array; + + /** + * @return array{name: string, dimensions: int, field_suffix: string, embedding_provider: string} + */ + public function getEmbeddingModel(string $identifier): array; + + public function getDefaultEmbeddingModelIdentifier(): string; + + /** + * @return array{name: string, dimensions: int, field_suffix: string, 'embedding_provider': string} + */ + public function getDefaultEmbeddingModel(): array; + + public function getDefaultEmbeddingProvider(): string; + + public function getDefaultEmbeddingModelFieldSuffix(): string; +} diff --git a/src/contracts/Search/Embedding/EmbeddingProviderInterface.php b/src/contracts/Search/Embedding/EmbeddingProviderInterface.php new file mode 100644 index 0000000000..dbaf439a20 --- /dev/null +++ b/src/contracts/Search/Embedding/EmbeddingProviderInterface.php @@ -0,0 +1,17 @@ + $type]); + } + + /** + * @param string $type Has to be handled by configured search engine (ez_dense_vector_ada002). + */ + public static function create(string $type): self + { + return new self($type); + } +} diff --git a/src/contracts/Search/FieldType/EmbeddingFieldFactory.php b/src/contracts/Search/FieldType/EmbeddingFieldFactory.php new file mode 100644 index 0000000000..4bf0f2dc7f --- /dev/null +++ b/src/contracts/Search/FieldType/EmbeddingFieldFactory.php @@ -0,0 +1,30 @@ +config = $config; + } + + public function create(?string $type = null): EmbeddingField + { + if ($type !== null) { + return EmbeddingField::create($type); + } + + $suffix = $this->config->getDefaultEmbeddingModelFieldSuffix(); + + return EmbeddingField::create('ez_dense_vector_' . $suffix); + } +} diff --git a/src/lib/Resources/settings/search_engines/common.yml b/src/lib/Resources/settings/search_engines/common.yml index 9e31bcb722..e8da03c327 100644 --- a/src/lib/Resources/settings/search_engines/common.yml +++ b/src/lib/Resources/settings/search_engines/common.yml @@ -22,6 +22,9 @@ parameters: ez_geolocation: 'gl' ez_document: 'doc' ez_fulltext: 'fulltext' + ez_dense_vector_ada002: 'ada002_dv' + ez_dense_vector_3small: '3small_dv' + ez_dense_vector_3large: '3large_dv' services: # Note: services tagged with 'ibexa.field_type.indexable' diff --git a/src/lib/Resources/settings/search_engines/field_value_mappers.yml b/src/lib/Resources/settings/search_engines/field_value_mappers.yml index 0d5295ba6d..eb80c572db 100644 --- a/src/lib/Resources/settings/search_engines/field_value_mappers.yml +++ b/src/lib/Resources/settings/search_engines/field_value_mappers.yml @@ -73,3 +73,7 @@ services: Ibexa\Core\Search\Common\FieldValueMapper\MultipleRemoteIdentifierMapper: tags: - { name: ibexa.search.common.field_value.mapper, maps: Ibexa\Contracts\Core\Search\FieldType\MultipleRemoteIdentifierField } + + Ibexa\Core\Search\Common\FieldValueMapper\EmbeddingMapper: + tags: + - { name: ibexa.search.common.field_value.mapper, maps: Ibexa\Contracts\Core\Search\FieldType\EmbeddingField } diff --git a/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php b/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php new file mode 100644 index 0000000000..e8d858076b --- /dev/null +++ b/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php @@ -0,0 +1,27 @@ +getType() instanceof EmbeddingField; + } + + public function map(Field $field) + { + return $field->getValue(); + } +} diff --git a/src/lib/Search/Embedding/EmbeddingConfiguration.php b/src/lib/Search/Embedding/EmbeddingConfiguration.php new file mode 100644 index 0000000000..206ef0575e --- /dev/null +++ b/src/lib/Search/Embedding/EmbeddingConfiguration.php @@ -0,0 +1,81 @@ +configResolver = $configResolver; + } + + /** + * @return array + */ + public function getEmbeddingModels(): array + { + return (array)$this->configResolver->getParameter('embedding_models'); + } + + /** + * @return string[] + */ + public function getEmbeddingModelIdentifiers(): array + { + return array_keys($this->getEmbeddingModels()); + } + + /** + * @return array{name: string, dimensions: int, field_suffix: string, embedding_provider: string} + */ + public function getEmbeddingModel(string $identifier): array + { + $models = $this->getEmbeddingModels(); + + if (!isset($models[$identifier])) { + throw new InvalidArgumentException( + sprintf('Embedding model "%s" is not configured.', $identifier) + ); + } + + return $models[$identifier]; + } + + public function getDefaultEmbeddingModelIdentifier(): string + { + return (string)$this->configResolver->getParameter('default_embedding_model'); + } + + /** + * @return array{name: string, dimensions: int, field_suffix: string, 'embedding_provider': string} + */ + public function getDefaultEmbeddingModel(): array + { + return $this->getEmbeddingModel( + $this->getDefaultEmbeddingModelIdentifier() + ); + } + + public function getDefaultEmbeddingProvider(): string + { + return (string)$this->getDefaultEmbeddingModel()['embedding_provider']; + } + + public function getDefaultEmbeddingModelFieldSuffix(): string + { + return (string)$this->getDefaultEmbeddingModel()['field_suffix']; + } +} diff --git a/src/lib/Search/Embedding/EmbeddingProviderRegistry.php b/src/lib/Search/Embedding/EmbeddingProviderRegistry.php new file mode 100644 index 0000000000..8dd1bf0317 --- /dev/null +++ b/src/lib/Search/Embedding/EmbeddingProviderRegistry.php @@ -0,0 +1,45 @@ + */ + private PoolInterface $pool; + + /** + * @param iterable<\Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderInterface> $embeddingProviders + */ + public function __construct(iterable $embeddingProviders = []) + { + $this->pool = new Pool(EmbeddingProviderInterface::class, $embeddingProviders); + $this->pool->setExceptionArgumentName('embedding_provider'); + $this->pool->setExceptionMessageTemplate('Could not find %s for \'%s\' embedding provider.'); + } + + public function getEmbeddingProviders(): iterable + { + return $this->pool->getEntries(); + } + + public function hasEmbeddingProvider(string $identifier): bool + { + return $this->pool->has($identifier); + } + + public function getEmbeddingProvider(string $identifier): EmbeddingProviderInterface + { + return $this->pool->get($identifier); + } +} diff --git a/src/lib/Search/Embedding/EmbeddingProviderResolver.php b/src/lib/Search/Embedding/EmbeddingProviderResolver.php new file mode 100644 index 0000000000..175e34d5fc --- /dev/null +++ b/src/lib/Search/Embedding/EmbeddingProviderResolver.php @@ -0,0 +1,43 @@ +embeddingConfiguration = $embeddingConfiguration; + $this->registry = $registry; + } + + public function resolve(): EmbeddingProviderInterface + { + $defaultEmbeddingProvider = $this->embeddingConfiguration->getDefaultEmbeddingProvider(); + + if (!$this->registry->hasEmbeddingProvider($defaultEmbeddingProvider)) { + throw new RuntimeException( + sprintf('No embedding provider registered for identifier "%s".', $defaultEmbeddingProvider) + ); + } + + return $this->registry->getEmbeddingProvider($defaultEmbeddingProvider); + } +} diff --git a/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php b/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php new file mode 100644 index 0000000000..31ebad14d7 --- /dev/null +++ b/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php @@ -0,0 +1,163 @@ + + */ + protected function getMinimalConfiguration(): array + { + $input = file_get_contents(__DIR__ . '/../../Fixtures/ezpublish_minimal.yml'); + + if ($input === false) { + self::fail('Failed to load ezpublish_minimal.yml'); + } + + return Yaml::parse($input); + } + + public function testDefaultEmbeddingsSettings(): void + { + $this->load(); + + $this->assertConfigResolverParameterValue('embedding_models', [], 'ibexa_demo_site'); + $this->assertConfigResolverParameterValue('default_embedding_model', 'text-embedding-ada-002', 'ibexa_demo_site'); + } + + /** + * @param array $config + * @param array $expected + * + * @dataProvider embeddingsSettingsProvider + */ + public function testEmbeddingsSettings(array $config, array $expected): void + { + $this->load( + [ + 'system' => [ + 'ibexa_demo_site' => $config, + ], + ] + ); + + foreach ($expected as $key => $val) { + $this->assertConfigResolverParameterValue($key, $val, 'ibexa_demo_site'); + } + } + + /** + * @return array, + * default_embedding_model?: string + * }, + * array{ + * embedding_models: array, + * default_embedding_model: string + * } + * }> + */ + public function embeddingsSettingsProvider(): array + { + return [ + [ + [ + 'embedding_models' => [ + 'text-embedding-ada-002' => [ + 'name' => 'text-embedding-ada-002', + 'dimensions' => 1536, + 'field_suffix' => 'ada', + 'embedding_provider' => 'ibexa_openai', + ], + 'text-embedding-3-small' => [ + 'name' => 'text-embedding-3-small', + 'dimensions' => 1536, + 'field_suffix' => '3small', + 'embedding_provider' => 'ibexa_openai', + ], + 'text-embedding-3-large' => [ + 'name' => 'text-embedding-3-large', + 'dimensions' => 3072, + 'field_suffix' => '3large', + 'embedding_provider' => 'ibexa_openai', + ], + ], + ], + [ + 'embedding_models' => [ + 'text-embedding-ada-002' => [ + 'name' => 'text-embedding-ada-002', + 'dimensions' => 1536, + 'field_suffix' => 'ada', + 'embedding_provider' => 'ibexa_openai', + ], + 'text-embedding-3-small' => [ + 'name' => 'text-embedding-3-small', + 'dimensions' => 1536, + 'field_suffix' => '3small', + 'embedding_provider' => 'ibexa_openai', + ], + 'text-embedding-3-large' => [ + 'name' => 'text-embedding-3-large', + 'dimensions' => 3072, + 'field_suffix' => '3large', + 'embedding_provider' => 'ibexa_openai', + ], + ], + 'default_embedding_model' => 'text-embedding-ada-002', + ], + ], + [ + [ + 'embedding_models' => [ + 'text-embedding-ada-002' => [ + 'name' => 'text-embedding-ada-002', + 'dimensions' => 1536, + 'field_suffix' => 'ada', + 'embedding_provider' => 'ibexa_openai', + ], + ], + 'default_embedding_model' => 'text-embedding-foo', + ], + [ + 'embedding_models' => [ + 'text-embedding-ada-002' => [ + 'name' => 'text-embedding-ada-002', + 'dimensions' => 1536, + 'field_suffix' => 'ada', + 'embedding_provider' => 'ibexa_openai', + ], + ], + 'default_embedding_model' => 'text-embedding-foo', + ], + ], + ]; + } +} diff --git a/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php b/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php new file mode 100644 index 0000000000..b24a204206 --- /dev/null +++ b/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php @@ -0,0 +1,79 @@ +createMock(Embedding::class); + $aggregations = [$this->createMock(Aggregation::class), $this->createMock(Aggregation::class)]; + + $builder = EmbeddingQueryBuilder::create() + ->withEmbedding($embedding) + ->setLimit(10) + ->setOffset(5) + ->setPerformCount(true) + ->setAggregations($aggregations); + + $query = $builder->build(); + + $this->assertSame( + $embedding, + $query->getEmbedding(), + 'Embedding should be set by builder' + ); + + $this->assertEquals(10, $query->getLimit(), 'Limit should be set by builder'); + $this->assertEquals(5, $query->getOffset(), 'Offset should be set by builder'); + $this->assertTrue($query->getPerformCount(), 'PerformCount flag should be true'); + + $aggregations = $query->getAggregations(); + $this->assertIsArray($aggregations, 'Aggregations must be array'); + $this->assertCount(2, $aggregations, 'Two aggregations added'); + } + + public function testIsValidReturnsTrueForCleanQuery(): void + { + $query = EmbeddingQueryBuilder::create() + ->withEmbedding($this->createMock(Embedding::class)) + ->build(); + + $this->assertTrue($query->isValid()); + } + + public function testSettingSortClausesThenIsValidThrows(): void + { + $query = EmbeddingQueryBuilder::create() + ->withEmbedding($this->createMock(Embedding::class)) + ->build(); + + // bypass setter via array-append magic + $query->sortClauses[] = new ContentName(BaseQuery::SORT_ASC); + $query->query = $this->createMock(Criterion::class); + $query->facetBuilders = [$this->createMock(FacetBuilder::class)]; + $query->spellcheck = new Spellcheck('foo'); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('EmbeddingQuery may not set [query, sortClauses, facetBuilders, spellcheck].'); + + $query->isValid(); + } +} diff --git a/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php new file mode 100644 index 0000000000..85bf87cd03 --- /dev/null +++ b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php @@ -0,0 +1,53 @@ +createMock(EmbeddingConfigurationInterface::class); + $config + ->expects($this->once()) + ->method('getDefaultEmbeddingModelFieldSuffix') + ->willReturn($suffix); + + $factory = new EmbeddingFieldFactory($config); + + $field = $factory->create(); + + $this->assertSame( + 'ez_dense_vector_model_123', + $field->getType(), + 'Factory should prepend "ez_dense_vector_" to the suffix from theconfig' + ); + } + + public function testCreateWithCustomType(): void + { + $config = $this->createMock(EmbeddingConfigurationInterface::class); + $config + ->expects($this->never()) + ->method('getDefaultEmbeddingModelFieldSuffix'); + + $factory = new EmbeddingFieldFactory($config); + $customType = 'custom_model'; + + $field = $factory->create($customType); + + $this->assertSame( + $customType, + $field->getType(), + 'Factory should use the explicit type when provided' + ); + } +} diff --git a/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php b/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php new file mode 100644 index 0000000000..599b3c0ae6 --- /dev/null +++ b/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php @@ -0,0 +1,140 @@ + ['name' => 'text-embedding-3-small', 'dimensions' => 1536, 'field_suffix' => '3small', 'embedding_provider' => 'ibexa_openai'], + 'text-embedding-3-large' => ['name' => 'text-embedding-3-large', 'dimensions' => 3072, 'field_suffix' => '3large', 'embedding_provider' => 'ibexa_openai'], + 'text-embedding-ada-002' => ['name' => 'text-embedding-ada-002', 'dimensions' => 1536, 'field_suffix' => 'ada002', 'embedding_provider' => 'ibexa_openai'], + ]; + + /** @var \Ibexa\Contracts\Core\SiteAccess\ConfigResolverInterface&\PHPUnit\Framework\MockObject\MockObject */ + private ConfigResolverInterface $configResolver; + + private EmbeddingConfiguration $config; + + protected function setUp(): void + { + $this->configResolver = $this->createMock(ConfigResolverInterface::class); + $this->config = new EmbeddingConfiguration( + $this->configResolver + ); + } + + public function testGetDefaultEmbeddingModel(): void + { + $this->configResolver + ->method('getParameter') + ->willReturnMap([ + ['default_embedding_model', null, null, 'text-embedding-ada-002'], + ['embedding_models', null, null, self::MODELS], + ]); + + $this->assertSame( + ['name' => 'text-embedding-ada-002', 'dimensions' => 1536, 'field_suffix' => 'ada002', 'embedding_provider' => 'ibexa_openai'], + $this->config->getDefaultEmbeddingModel() + ); + } + + public function testGetEmbeddingModelIdentifiers(): void + { + $this->configResolver + ->method('getParameter') + ->willReturnMap([ + ['default_embedding_model', null, null, 'text-embedding-ada-002'], + ['embedding_models', null, null, self::MODELS], + ]); + + $this->assertSame( + ['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'], + $this->config->getEmbeddingModelIdentifiers() + ); + } + + public function testGetEmbeddingModels(): void + { + $this->configResolver + ->method('getParameter') + ->with('embedding_models') + ->willReturn(self::MODELS); + + $this->assertSame(self::MODELS, $this->config->getEmbeddingModels()); + $this->assertSame( + ['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'], + $this->config->getEmbeddingModelIdentifiers() + ); + } + + public function testGetEmbeddingModel(): void + { + $this->configResolver + ->method('getParameter') + ->with('embedding_models') + ->willReturn(self::MODELS); + + $this->assertSame( + ['name' => 'text-embedding-ada-002', 'dimensions' => 1536, 'field_suffix' => 'ada002', 'embedding_provider' => 'ibexa_openai'], + $this->config->getEmbeddingModel('text-embedding-ada-002') + ); + } + + public function testGetEmbeddingModelWillThrowException(): void + { + $this->configResolver + ->method('getParameter') + ->with('embedding_models') + ->willReturn(self::MODELS); + + self::expectException(InvalidArgumentException::class); + self::expectExceptionMessage('Embedding model "non-existing-model" is not configured.'); + + $this->config->getEmbeddingModel('non-existing-model'); + } + + public function testGetDefaultEmbeddingModelIdentifier(): void + { + $this->configResolver + ->method('getParameter') + ->with('default_embedding_model') + ->willReturn('text-embedding-ada-002'); + + $this->assertSame('text-embedding-ada-002', $this->config->getDefaultEmbeddingModelIdentifier()); + } + + public function testGetDefaultEmbeddingProvider(): void + { + $this->configResolver + ->method('getParameter') + ->willReturnMap([ + ['default_embedding_model', null, null, 'text-embedding-ada-002'], + ['embedding_models', null, null, self::MODELS], + ]); + + $this->assertSame('ibexa_openai', $this->config->getDefaultEmbeddingProvider()); + } + + public function getDefaultEmbeddingModelFieldSuffix(): void + { + $this->configResolver + ->method('getParameter') + ->willReturnMap([ + ['default_embedding_model', null, null, 'text-embedding-ada-002'], + ['embedding_models', null, null, self::MODELS], + ]); + + $this->assertSame('ada002', $this->config->getDefaultEmbeddingModelFieldSuffix()); + } +} diff --git a/tests/lib/Search/Embedding/EmbeddingProviderRegistryTest.php b/tests/lib/Search/Embedding/EmbeddingProviderRegistryTest.php new file mode 100644 index 0000000000..3f116e9c02 --- /dev/null +++ b/tests/lib/Search/Embedding/EmbeddingProviderRegistryTest.php @@ -0,0 +1,76 @@ + $this->createMock(EmbeddingProviderInterface::class), + ]); + + self::assertTrue($registry->hasEmbeddingProvider('existing')); + self::assertFalse($registry->hasEmbeddingProvider('non-existing')); + } + + public function testGetEmbeddingProvider(): void + { + $expectedEmbeddingProvider = $this->createMock(EmbeddingProviderInterface::class); + + $registry = new EmbeddingProviderRegistry([ + 'example' => $expectedEmbeddingProvider, + ]); + + self::assertSame($expectedEmbeddingProvider, $registry->getEmbeddingProvider('example')); + } + + public function testGetEmbeddingProviderThrowsInvalidArgumentException(): void + { + $message = "Argument 'embedding_provider' is invalid: Could not find " + . "Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderInterface for 'non-existing' embedding provider."; + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage($message); + + $registry = new EmbeddingProviderRegistry([/* Empty registry */]); + $registry->getEmbeddingProvider('non-existing'); + } + + public function testGetEmbeddingProviders(): void + { + $embeddingProviderA = $this->createMock(EmbeddingProviderInterface::class); + $embeddingProviderB = $this->createMock(EmbeddingProviderInterface::class); + + $registry = new EmbeddingProviderRegistry([ + 'existingA' => $embeddingProviderA, + 'existingB' => $embeddingProviderB, + ]); + + $embeddingProviders = $registry->getEmbeddingProviders(); + + $this->assertIsArray( + $embeddingProviders, + 'getProviders() should return an array of embedding providers' + ); + + self::assertSame( + [ + 'existingA' => $embeddingProviderA, + 'existingB' => $embeddingProviderB, + ], + $embeddingProviders + ); + } +} diff --git a/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php b/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php new file mode 100644 index 0000000000..fb7ac1cf42 --- /dev/null +++ b/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php @@ -0,0 +1,82 @@ +configuration = $this->createMock(EmbeddingConfigurationInterface::class); + $this->registry = $this->createMock(EmbeddingProviderRegistryInterface::class); + $this->resolver = new EmbeddingProviderResolver( + $this->configuration, + $this->registry + ); + } + + public function testResolveReturnsProviderWhenAvailable(): void + { + $embeddingProviderIdentifier = 'ibexa_openai'; + $mockProvider = $this->createMock(EmbeddingProviderInterface::class); + + $this->configuration + ->method('getDefaultEmbeddingProvider') + ->willReturn($embeddingProviderIdentifier); + + $this->registry + ->method('hasEmbeddingProvider') + ->with($embeddingProviderIdentifier) + ->willReturn(true); + + $this->registry + ->method('getEmbeddingProvider') + ->with($embeddingProviderIdentifier) + ->willReturn($mockProvider); + + $resolved = $this->resolver->resolve(); + + $this->assertSame($mockProvider, $resolved); + } + + public function testResolveThrowsWhenProviderMissing(): void + { + $embeddingProviderIdentifier = 'foo'; + + $this->configuration + ->method('getDefaultEmbeddingProvider') + ->willReturn($embeddingProviderIdentifier); + + $this->registry + ->method('hasEmbeddingProvider') + ->with($embeddingProviderIdentifier) + ->willReturn(false); + + $this->expectException(RuntimeException::class); + $this->expectExceptionMessage( + sprintf('No embedding provider registered for identifier "%s".', $embeddingProviderIdentifier) + ); + + $this->resolver->resolve(); + } +} From e8e2968c3455c4e6a97fd69fd6567767ea75d619 Mon Sep 17 00:00:00 2001 From: mikolaj Date: Mon, 26 May 2025 11:18:18 +0200 Subject: [PATCH 2/5] IBX-9846: Added dedicated exception --- .../EmbeddingProviderResolverInterface.php | 3 +++ .../EmbeddingResolverNotFoundException.php | 20 +++++++++++++++++++ .../Embedding/EmbeddingProviderResolver.php | 6 +++--- .../EmbeddingProviderResolverTest.php | 4 ++-- 4 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 src/contracts/Search/Embedding/EmbeddingResolverNotFoundException.php diff --git a/src/contracts/Search/Embedding/EmbeddingProviderResolverInterface.php b/src/contracts/Search/Embedding/EmbeddingProviderResolverInterface.php index f962d6fb5c..d71dae4f30 100644 --- a/src/contracts/Search/Embedding/EmbeddingProviderResolverInterface.php +++ b/src/contracts/Search/Embedding/EmbeddingProviderResolverInterface.php @@ -10,5 +10,8 @@ interface EmbeddingProviderResolverInterface { + /** + * @throws \Ibexa\Contracts\Core\Search\Embedding\EmbeddingResolverNotFoundException + */ public function resolve(): EmbeddingProviderInterface; } diff --git a/src/contracts/Search/Embedding/EmbeddingResolverNotFoundException.php b/src/contracts/Search/Embedding/EmbeddingResolverNotFoundException.php new file mode 100644 index 0000000000..5e1207152c --- /dev/null +++ b/src/contracts/Search/Embedding/EmbeddingResolverNotFoundException.php @@ -0,0 +1,20 @@ +embeddingConfiguration->getDefaultEmbeddingProvider(); if (!$this->registry->hasEmbeddingProvider($defaultEmbeddingProvider)) { - throw new RuntimeException( - sprintf('No embedding provider registered for identifier "%s".', $defaultEmbeddingProvider) + throw new EmbeddingResolverNotFoundException( + $defaultEmbeddingProvider ); } diff --git a/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php b/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php index fb7ac1cf42..7ba3471411 100644 --- a/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php +++ b/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php @@ -11,9 +11,9 @@ use Ibexa\Contracts\Core\Search\Embedding\EmbeddingConfigurationInterface; use Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderInterface; use Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderRegistryInterface; +use Ibexa\Contracts\Core\Search\Embedding\EmbeddingResolverNotFoundException; use Ibexa\Core\Search\Embedding\EmbeddingProviderResolver; use PHPUnit\Framework\TestCase; -use RuntimeException; final class EmbeddingProviderResolverTest extends TestCase { @@ -72,7 +72,7 @@ public function testResolveThrowsWhenProviderMissing(): void ->with($embeddingProviderIdentifier) ->willReturn(false); - $this->expectException(RuntimeException::class); + $this->expectException(EmbeddingResolverNotFoundException::class); $this->expectExceptionMessage( sprintf('No embedding provider registered for identifier "%s".', $embeddingProviderIdentifier) ); From 266bc712f87392a79acb36358fb2a25ee6ec30fa Mon Sep 17 00:00:00 2001 From: mikolaj Date: Thu, 26 Jun 2025 11:06:35 +0200 Subject: [PATCH 3/5] Fixes field name --- src/contracts/Search/FieldType/EmbeddingField.php | 2 +- src/contracts/Search/FieldType/EmbeddingFieldFactory.php | 2 +- src/lib/Resources/settings/search_engines/common.yml | 6 +++--- .../Core/Search/FieldType/EmbeddingFieldFactoryTest.php | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/contracts/Search/FieldType/EmbeddingField.php b/src/contracts/Search/FieldType/EmbeddingField.php index 366de99ea0..6e4973b4ab 100644 --- a/src/contracts/Search/FieldType/EmbeddingField.php +++ b/src/contracts/Search/FieldType/EmbeddingField.php @@ -16,7 +16,7 @@ private function __construct(string $type) } /** - * @param string $type Has to be handled by configured search engine (ez_dense_vector_ada002). + * @param string $type Has to be handled by configured search engine (ibexa_dense_vector_ada002). */ public static function create(string $type): self { diff --git a/src/contracts/Search/FieldType/EmbeddingFieldFactory.php b/src/contracts/Search/FieldType/EmbeddingFieldFactory.php index 4bf0f2dc7f..a4a4373d57 100644 --- a/src/contracts/Search/FieldType/EmbeddingFieldFactory.php +++ b/src/contracts/Search/FieldType/EmbeddingFieldFactory.php @@ -25,6 +25,6 @@ public function create(?string $type = null): EmbeddingField $suffix = $this->config->getDefaultEmbeddingModelFieldSuffix(); - return EmbeddingField::create('ez_dense_vector_' . $suffix); + return EmbeddingField::create('ibexa_dense_vector_' . $suffix); } } diff --git a/src/lib/Resources/settings/search_engines/common.yml b/src/lib/Resources/settings/search_engines/common.yml index e8da03c327..64e6ac650c 100644 --- a/src/lib/Resources/settings/search_engines/common.yml +++ b/src/lib/Resources/settings/search_engines/common.yml @@ -22,9 +22,9 @@ parameters: ez_geolocation: 'gl' ez_document: 'doc' ez_fulltext: 'fulltext' - ez_dense_vector_ada002: 'ada002_dv' - ez_dense_vector_3small: '3small_dv' - ez_dense_vector_3large: '3large_dv' + ibexa_dense_vector_ada002: 'ada002_dv' + ibexa_dense_vector_3small: '3small_dv' + ibexa_dense_vector_3large: '3large_dv' services: # Note: services tagged with 'ibexa.field_type.indexable' diff --git a/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php index 85bf87cd03..b6e31ee01d 100644 --- a/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php +++ b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php @@ -26,9 +26,9 @@ public function testCreateUsesConfigSuffix(): void $field = $factory->create(); $this->assertSame( - 'ez_dense_vector_model_123', + 'ibexa_dense_vector_model_123', $field->getType(), - 'Factory should prepend "ez_dense_vector_" to the suffix from theconfig' + 'Factory should prepend "ibexa_dense_vector_" to the suffix from the config' ); } From 863b8659038a82ddb192c37a4bf865b8f768dbbe Mon Sep 17 00:00:00 2001 From: mikolaj Date: Thu, 3 Jul 2025 10:10:58 +0200 Subject: [PATCH 4/5] CR fixes --- .../DependencyInjection/Configuration/Parser/Embeddings.php | 4 +++- .../Repository/Values/Content/EmbeddingQueryBuilder.php | 2 +- src/contracts/Search/FieldType/EmbeddingField.php | 4 +++- src/contracts/Search/FieldType/EmbeddingFieldFactory.php | 4 +++- src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php | 4 +++- .../Configuration/Parser/EmbeddingsTest.php | 4 +++- .../Repository/Values/Content/EmbeddingQueryBuilderTest.php | 2 +- .../Core/Search/FieldType/EmbeddingFieldFactoryTest.php | 4 +++- 8 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php b/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php index df56afcbde..b701620361 100644 --- a/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php +++ b/src/bundle/Core/DependencyInjection/Configuration/Parser/Embeddings.php @@ -4,6 +4,8 @@ * @copyright Copyright (C) Ibexa AS. All rights reserved. * @license For full copyright and license information view LICENSE file distributed with this source code. */ +declare(strict_types=1); + namespace Ibexa\Bundle\Core\DependencyInjection\Configuration\Parser; use Ibexa\Bundle\Core\DependencyInjection\Configuration\AbstractParser; @@ -26,7 +28,7 @@ * default_embedding_model: text-embedding-ada-002 * ``` */ -class Embeddings extends AbstractParser +final class Embeddings extends AbstractParser { public function addSemanticConfig(NodeBuilder $nodeBuilder): void { diff --git a/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php b/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php index e932d4b5ae..b69e96b220 100644 --- a/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php +++ b/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php @@ -11,7 +11,7 @@ use Ibexa\Contracts\Core\Repository\Values\Content\Query\Criterion; use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding; -class EmbeddingQueryBuilder +final class EmbeddingQueryBuilder { private EmbeddingQuery $query; diff --git a/src/contracts/Search/FieldType/EmbeddingField.php b/src/contracts/Search/FieldType/EmbeddingField.php index 6e4973b4ab..8fe78ad678 100644 --- a/src/contracts/Search/FieldType/EmbeddingField.php +++ b/src/contracts/Search/FieldType/EmbeddingField.php @@ -4,11 +4,13 @@ * @copyright Copyright (C) Ibexa AS. All rights reserved. * @license For full copyright and license information view LICENSE file distributed with this source code. */ +declare(strict_types=1); + namespace Ibexa\Contracts\Core\Search\FieldType; use Ibexa\Contracts\Core\Search\FieldType; -class EmbeddingField extends FieldType +final class EmbeddingField extends FieldType { private function __construct(string $type) { diff --git a/src/contracts/Search/FieldType/EmbeddingFieldFactory.php b/src/contracts/Search/FieldType/EmbeddingFieldFactory.php index a4a4373d57..517a8aa1ed 100644 --- a/src/contracts/Search/FieldType/EmbeddingFieldFactory.php +++ b/src/contracts/Search/FieldType/EmbeddingFieldFactory.php @@ -4,11 +4,13 @@ * @copyright Copyright (C) Ibexa AS. All rights reserved. * @license For full copyright and license information view LICENSE file distributed with this source code. */ +declare(strict_types=1); + namespace Ibexa\Contracts\Core\Search\FieldType; use Ibexa\Contracts\Core\Search\Embedding\EmbeddingConfigurationInterface; -class EmbeddingFieldFactory +final class EmbeddingFieldFactory { private EmbeddingConfigurationInterface $config; diff --git a/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php b/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php index e8d858076b..0b182792c6 100644 --- a/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php +++ b/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php @@ -4,6 +4,8 @@ * @copyright Copyright (C) Ibexa AS. All rights reserved. * @license For full copyright and license information view LICENSE file distributed with this source code. */ +declare(strict_types=1); + namespace Ibexa\Core\Search\Common\FieldValueMapper; use Ibexa\Contracts\Core\Search\Field; @@ -13,7 +15,7 @@ /** * @internal for internal use by Search engine field value mapper */ -class EmbeddingMapper extends FieldValueMapper +final class EmbeddingMapper extends FieldValueMapper { public function canMap(Field $field): bool { diff --git a/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php b/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php index 31ebad14d7..d758838147 100644 --- a/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php +++ b/tests/bundle/Core/DependencyInjection/Configuration/Parser/EmbeddingsTest.php @@ -4,13 +4,15 @@ * @copyright Copyright (C) Ibexa AS. All rights reserved. * @license For full copyright and license information view LICENSE file distributed with this source code. */ +declare(strict_types=1); + namespace Ibexa\Tests\Bundle\Core\DependencyInjection\Configuration\Parser; use Ibexa\Bundle\Core\DependencyInjection\Configuration\Parser\Embeddings as EmbeddingsConfigParser; use Ibexa\Bundle\Core\DependencyInjection\IbexaCoreExtension; use Symfony\Component\Yaml\Yaml; -class EmbeddingsTest extends AbstractParserTestCase +final class EmbeddingsTest extends AbstractParserTestCase { protected function getContainerExtensions(): array { diff --git a/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php b/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php index b24a204206..98c9e8cbca 100644 --- a/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php +++ b/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php @@ -19,7 +19,7 @@ use InvalidArgumentException; use PHPUnit\Framework\TestCase; -class EmbeddingQueryBuilderTest extends TestCase +final class EmbeddingQueryBuilderTest extends TestCase { public function testBuilderSetsAllowedProperties(): void { diff --git a/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php index b6e31ee01d..6c9ffad555 100644 --- a/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php +++ b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php @@ -4,13 +4,15 @@ * @copyright Copyright (C) Ibexa AS. All rights reserved. * @license For full copyright and license information view LICENSE file distributed with this source code. */ +declare(strict_types=1); + namespace Ibexa\Tests\Integration\Core\Search\FieldType; use Ibexa\Contracts\Core\Search\Embedding\EmbeddingConfigurationInterface; use Ibexa\Contracts\Core\Search\FieldType\EmbeddingFieldFactory; use PHPUnit\Framework\TestCase; -class EmbeddingFieldFactoryTest extends TestCase +final class EmbeddingFieldFactoryTest extends TestCase { public function testCreateUsesConfigSuffix(): void { From 7cc107cfa12a4931f564fef9e44b6461801a3be1 Mon Sep 17 00:00:00 2001 From: mikolaj Date: Wed, 15 Oct 2025 15:14:14 +0200 Subject: [PATCH 5/5] Fixed issues after code review --- .../{embeddings.yml => embeddings.yaml} | 0 src/bundle/Core/Resources/config/services.yml | 2 +- .../Values/Content/EmbeddingQuery.php | 9 ++-- .../Values/Content/EmbeddingQueryBuilder.php | 52 +++++++++++++++---- .../Values/Content/Query/Embedding.php | 4 +- .../Content/QueryValidatorInterface.php | 2 + .../EmbeddingResolverNotFoundException.php | 2 + .../FieldValueMapper/EmbeddingMapper.php | 2 +- .../Embedding/EmbeddingConfiguration.php | 2 +- .../Content/EmbeddingQueryBuilderTest.php | 29 +++++++---- .../FieldType/EmbeddingFieldFactoryTest.php | 6 +-- .../Embedding/EmbeddingConfigurationTest.php | 20 +++---- .../EmbeddingProviderResolverTest.php | 2 +- 13 files changed, 90 insertions(+), 42 deletions(-) rename src/bundle/Core/Resources/config/{embeddings.yml => embeddings.yaml} (100%) diff --git a/src/bundle/Core/Resources/config/embeddings.yml b/src/bundle/Core/Resources/config/embeddings.yaml similarity index 100% rename from src/bundle/Core/Resources/config/embeddings.yml rename to src/bundle/Core/Resources/config/embeddings.yaml diff --git a/src/bundle/Core/Resources/config/services.yml b/src/bundle/Core/Resources/config/services.yml index 170fe884f2..166b1940cc 100644 --- a/src/bundle/Core/Resources/config/services.yml +++ b/src/bundle/Core/Resources/config/services.yml @@ -1,6 +1,6 @@ imports: - { resource: commands.yml } - - { resource: embeddings.yml } + - { resource: embeddings.yaml } parameters: ibexa.site_access.default.name: default diff --git a/src/contracts/Repository/Values/Content/EmbeddingQuery.php b/src/contracts/Repository/Values/Content/EmbeddingQuery.php index b0e4a957bb..75329c2d7f 100644 --- a/src/contracts/Repository/Values/Content/EmbeddingQuery.php +++ b/src/contracts/Repository/Values/Content/EmbeddingQuery.php @@ -12,10 +12,7 @@ use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding; use InvalidArgumentException; -/** - * This class is used to perform an embedding query. - */ -class EmbeddingQuery extends Query +final class EmbeddingQuery extends Query { private ?Embedding $embedding = null; @@ -29,7 +26,7 @@ public function setEmbedding(?Embedding $embedding): void $this->embedding = $embedding; } - public function getFilter(): ?Criterion + public function getFilter(): Criterion { return $this->filter; } @@ -105,7 +102,7 @@ public function isValid(): bool if (count($invalid) > 0) { throw new InvalidArgumentException( sprintf( - 'EmbeddingQuery may not set [%s].', + 'EmbeddingQuery did not set [%s].', implode(', ', $invalid) ) ); diff --git a/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php b/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php index b69e96b220..ba1f913dc1 100644 --- a/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php +++ b/src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php @@ -13,11 +13,21 @@ final class EmbeddingQueryBuilder { - private EmbeddingQuery $query; + private ?Embedding $embedding = null; + + private ?int $limit = null; + + private ?int $offset = null; + + private ?Criterion $filter = null; + + /** @var array<\Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation> */ + private array $aggregations = []; + + private bool $performCount = false; private function __construct() { - $this->query = new EmbeddingQuery(); } public static function create(): self @@ -27,28 +37,28 @@ public static function create(): self public function withEmbedding(Embedding $embed): self { - $this->query->setEmbedding($embed); + $this->embedding = $embed; return $this; } public function setLimit(int $limit): self { - $this->query->setLimit($limit); + $this->limit = $limit; return $this; } public function setOffset(int $offset): self { - $this->query->setOffset($offset); + $this->offset = $offset; return $this; } public function setFilter(Criterion $filter): self { - $this->query->setFilter($filter); + $this->filter = $filter; return $this; } @@ -58,20 +68,44 @@ public function setFilter(Criterion $filter): self */ public function setAggregations(array $aggregations): self { - $this->query->setAggregations($aggregations); + $this->aggregations = $aggregations; return $this; } public function setPerformCount(bool $performCount): self { - $this->query->setPerformCount($performCount); + $this->performCount = $performCount; return $this; } public function build(): EmbeddingQuery { - return $this->query; + $query = new EmbeddingQuery(); + + if ($this->embedding !== null) { + $query->setEmbedding($this->embedding); + } + + if ($this->limit !== null) { + $query->setLimit($this->limit); + } + + if ($this->offset !== null) { + $query->setOffset($this->offset); + } + + if ($this->filter !== null) { + $query->setFilter($this->filter); + } + + if (!empty($this->aggregations)) { + $query->setAggregations($this->aggregations); + } + + $query->setPerformCount($this->performCount); + + return $query; } } diff --git a/src/contracts/Repository/Values/Content/Query/Embedding.php b/src/contracts/Repository/Values/Content/Query/Embedding.php index 21a4c00a5d..3a8300688c 100644 --- a/src/contracts/Repository/Values/Content/Query/Embedding.php +++ b/src/contracts/Repository/Values/Content/Query/Embedding.php @@ -23,7 +23,9 @@ public function __construct(array $value) $this->value = $value; } - /** @return float[] */ + /** + * @return float[] + */ public function getValue(): array { return $this->value; diff --git a/src/contracts/Repository/Values/Content/QueryValidatorInterface.php b/src/contracts/Repository/Values/Content/QueryValidatorInterface.php index 4a3ae23f4f..47594f842d 100644 --- a/src/contracts/Repository/Values/Content/QueryValidatorInterface.php +++ b/src/contracts/Repository/Values/Content/QueryValidatorInterface.php @@ -4,6 +4,8 @@ * @copyright Copyright (C) Ibexa AS. All rights reserved. * @license For full copyright and license information view LICENSE file distributed with this source code. */ +declare(strict_types=1); + namespace Ibexa\Contracts\Core\Repository\Values\Content; interface QueryValidatorInterface diff --git a/src/contracts/Search/Embedding/EmbeddingResolverNotFoundException.php b/src/contracts/Search/Embedding/EmbeddingResolverNotFoundException.php index 5e1207152c..14cd78bf8b 100644 --- a/src/contracts/Search/Embedding/EmbeddingResolverNotFoundException.php +++ b/src/contracts/Search/Embedding/EmbeddingResolverNotFoundException.php @@ -4,6 +4,8 @@ * @copyright Copyright (C) Ibexa AS. All rights reserved. * @license For full copyright and license information view LICENSE file distributed with this source code. */ +declare(strict_types=1); + namespace Ibexa\Contracts\Core\Search\Embedding; use RuntimeException; diff --git a/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php b/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php index 0b182792c6..0ca5036433 100644 --- a/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php +++ b/src/lib/Search/Common/FieldValueMapper/EmbeddingMapper.php @@ -13,7 +13,7 @@ use Ibexa\Core\Search\Common\FieldValueMapper; /** - * @internal for internal use by Search engine field value mapper + * @internal for internal use by search engine field value mapper */ final class EmbeddingMapper extends FieldValueMapper { diff --git a/src/lib/Search/Embedding/EmbeddingConfiguration.php b/src/lib/Search/Embedding/EmbeddingConfiguration.php index 206ef0575e..2d996d3e38 100644 --- a/src/lib/Search/Embedding/EmbeddingConfiguration.php +++ b/src/lib/Search/Embedding/EmbeddingConfiguration.php @@ -60,7 +60,7 @@ public function getDefaultEmbeddingModelIdentifier(): string } /** - * @return array{name: string, dimensions: int, field_suffix: string, 'embedding_provider': string} + * @return array{name: string, dimensions: int, field_suffix: string, embedding_provider: string} */ public function getDefaultEmbeddingModel(): array { diff --git a/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php b/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php index 98c9e8cbca..19faf6a165 100644 --- a/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php +++ b/tests/integration/Core/Repository/Values/Content/EmbeddingQueryBuilderTest.php @@ -35,19 +35,19 @@ public function testBuilderSetsAllowedProperties(): void $query = $builder->build(); - $this->assertSame( + self::assertSame( $embedding, $query->getEmbedding(), 'Embedding should be set by builder' ); - $this->assertEquals(10, $query->getLimit(), 'Limit should be set by builder'); - $this->assertEquals(5, $query->getOffset(), 'Offset should be set by builder'); - $this->assertTrue($query->getPerformCount(), 'PerformCount flag should be true'); + self::assertEquals(10, $query->getLimit(), 'Limit should be set by builder'); + self::assertEquals(5, $query->getOffset(), 'Offset should be set by builder'); + self::assertTrue($query->getPerformCount(), 'PerformCount flag should be true'); $aggregations = $query->getAggregations(); - $this->assertIsArray($aggregations, 'Aggregations must be array'); - $this->assertCount(2, $aggregations, 'Two aggregations added'); + self::assertIsArray($aggregations, 'Aggregations must be array'); + self::assertCount(2, $aggregations, 'Two aggregations added'); } public function testIsValidReturnsTrueForCleanQuery(): void @@ -56,7 +56,7 @@ public function testIsValidReturnsTrueForCleanQuery(): void ->withEmbedding($this->createMock(Embedding::class)) ->build(); - $this->assertTrue($query->isValid()); + self::assertTrue($query->isValid()); } public function testSettingSortClausesThenIsValidThrows(): void @@ -65,15 +65,26 @@ public function testSettingSortClausesThenIsValidThrows(): void ->withEmbedding($this->createMock(Embedding::class)) ->build(); - // bypass setter via array-append magic $query->sortClauses[] = new ContentName(BaseQuery::SORT_ASC); $query->query = $this->createMock(Criterion::class); $query->facetBuilders = [$this->createMock(FacetBuilder::class)]; $query->spellcheck = new Spellcheck('foo'); $this->expectException(InvalidArgumentException::class); - $this->expectExceptionMessage('EmbeddingQuery may not set [query, sortClauses, facetBuilders, spellcheck].'); + $this->expectExceptionMessage('EmbeddingQuery did not set [query, sortClauses, facetBuilders, spellcheck].'); $query->isValid(); } + + public function testBuildReturnsNewInstance(): void + { + $builder = EmbeddingQueryBuilder::create(); + + $originalQuery = $builder->build(); + $builder->setPerformCount(true); + $secondQuery = $builder->build(); + + self::assertNotSame($originalQuery, $secondQuery); + self::assertNotEquals($originalQuery->getPerformCount(), $secondQuery->getPerformCount()); + } } diff --git a/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php index 6c9ffad555..6c49c846c1 100644 --- a/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php +++ b/tests/integration/Core/Search/FieldType/EmbeddingFieldFactoryTest.php @@ -19,7 +19,7 @@ public function testCreateUsesConfigSuffix(): void $suffix = 'model_123'; $config = $this->createMock(EmbeddingConfigurationInterface::class); $config - ->expects($this->once()) + ->expects(self::once()) ->method('getDefaultEmbeddingModelFieldSuffix') ->willReturn($suffix); @@ -27,7 +27,7 @@ public function testCreateUsesConfigSuffix(): void $field = $factory->create(); - $this->assertSame( + self::assertSame( 'ibexa_dense_vector_model_123', $field->getType(), 'Factory should prepend "ibexa_dense_vector_" to the suffix from the config' @@ -46,7 +46,7 @@ public function testCreateWithCustomType(): void $field = $factory->create($customType); - $this->assertSame( + self::assertSame( $customType, $field->getType(), 'Factory should use the explicit type when provided' diff --git a/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php b/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php index 599b3c0ae6..325ff0f3b9 100644 --- a/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php +++ b/tests/lib/Search/Embedding/EmbeddingConfigurationTest.php @@ -43,7 +43,7 @@ public function testGetDefaultEmbeddingModel(): void ['embedding_models', null, null, self::MODELS], ]); - $this->assertSame( + self::assertSame( ['name' => 'text-embedding-ada-002', 'dimensions' => 1536, 'field_suffix' => 'ada002', 'embedding_provider' => 'ibexa_openai'], $this->config->getDefaultEmbeddingModel() ); @@ -58,7 +58,7 @@ public function testGetEmbeddingModelIdentifiers(): void ['embedding_models', null, null, self::MODELS], ]); - $this->assertSame( + self::assertSame( ['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'], $this->config->getEmbeddingModelIdentifiers() ); @@ -71,8 +71,8 @@ public function testGetEmbeddingModels(): void ->with('embedding_models') ->willReturn(self::MODELS); - $this->assertSame(self::MODELS, $this->config->getEmbeddingModels()); - $this->assertSame( + self::assertSame(self::MODELS, $this->config->getEmbeddingModels()); + self::assertSame( ['text-embedding-3-small', 'text-embedding-3-large', 'text-embedding-ada-002'], $this->config->getEmbeddingModelIdentifiers() ); @@ -85,7 +85,7 @@ public function testGetEmbeddingModel(): void ->with('embedding_models') ->willReturn(self::MODELS); - $this->assertSame( + self::assertSame( ['name' => 'text-embedding-ada-002', 'dimensions' => 1536, 'field_suffix' => 'ada002', 'embedding_provider' => 'ibexa_openai'], $this->config->getEmbeddingModel('text-embedding-ada-002') ); @@ -98,8 +98,8 @@ public function testGetEmbeddingModelWillThrowException(): void ->with('embedding_models') ->willReturn(self::MODELS); - self::expectException(InvalidArgumentException::class); - self::expectExceptionMessage('Embedding model "non-existing-model" is not configured.'); + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Embedding model "non-existing-model" is not configured.'); $this->config->getEmbeddingModel('non-existing-model'); } @@ -111,7 +111,7 @@ public function testGetDefaultEmbeddingModelIdentifier(): void ->with('default_embedding_model') ->willReturn('text-embedding-ada-002'); - $this->assertSame('text-embedding-ada-002', $this->config->getDefaultEmbeddingModelIdentifier()); + self::assertSame('text-embedding-ada-002', $this->config->getDefaultEmbeddingModelIdentifier()); } public function testGetDefaultEmbeddingProvider(): void @@ -123,7 +123,7 @@ public function testGetDefaultEmbeddingProvider(): void ['embedding_models', null, null, self::MODELS], ]); - $this->assertSame('ibexa_openai', $this->config->getDefaultEmbeddingProvider()); + self::assertSame('ibexa_openai', $this->config->getDefaultEmbeddingProvider()); } public function getDefaultEmbeddingModelFieldSuffix(): void @@ -135,6 +135,6 @@ public function getDefaultEmbeddingModelFieldSuffix(): void ['embedding_models', null, null, self::MODELS], ]); - $this->assertSame('ada002', $this->config->getDefaultEmbeddingModelFieldSuffix()); + self::assertSame('ada002', $this->config->getDefaultEmbeddingModelFieldSuffix()); } } diff --git a/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php b/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php index 7ba3471411..61e2cd3810 100644 --- a/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php +++ b/tests/lib/Search/Embedding/EmbeddingProviderResolverTest.php @@ -56,7 +56,7 @@ public function testResolveReturnsProviderWhenAvailable(): void $resolved = $this->resolver->resolve(); - $this->assertSame($mockProvider, $resolved); + self::assertSame($mockProvider, $resolved); } public function testResolveThrowsWhenProviderMissing(): void