Skip to content

Commit d0bd96d

Browse files
committed
Update schema store
1 parent d035ba2 commit d0bd96d

File tree

5 files changed

+67
-23
lines changed

5 files changed

+67
-23
lines changed

deploy_ai_search/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ The associated scripts in this portion of the repository contains pre-built scri
1515

1616
## Steps for Text2SQL Index Deployment
1717

18-
### Entity Schema Index
18+
### Schema Store Index
1919

2020
1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication.
21-
2. Adjust `text_2_sql.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source.
21+
2. Adjust `text_2_sql_schema_store.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source.
2222
3. Run `deploy.py` with the following args:
2323

24-
- `index_type text_2_sql`. This selects the `Text2SQLAISearch` sub class.
24+
- `index_type text_2_sql_schema_store`. This selects the `Text2SQLSchemaStoreAISearch` sub class.
2525
- `rebuild`. Whether to delete and rebuild the index.
2626
- `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version.
2727
- `single_data_dictionary`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False.

deploy_ai_search/deploy.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Licensed under the MIT License.
33
import argparse
44
from rag_documents import RagDocumentsAISearch
5-
from text_2_sql import Text2SqlAISearch
5+
from text_2_sql_schema_store import Text2SqlSchemaStoreAISearch
66
from text_2_sql_query_cache import Text2SqlQueryCacheAISearch
77
import logging
88

@@ -20,8 +20,8 @@ def deploy_config(arguments: argparse.Namespace):
2020
rebuild=arguments.rebuild,
2121
enable_page_by_chunking=arguments.enable_page_chunking,
2222
)
23-
elif arguments.index_type == "text_2_sql":
24-
index_config = Text2SqlAISearch(
23+
elif arguments.index_type == "text_2_sql_schema_store":
24+
index_config = Text2SqlSchemaStoreAISearch(
2525
suffix=arguments.suffix,
2626
rebuild=arguments.rebuild,
2727
single_data_dictionary=arguments.single_data_dictionary,

deploy_ai_search/environment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class IndexerType(Enum):
1212
"""The type of the indexer"""
1313

1414
RAG_DOCUMENTS = "rag-documents"
15-
TEXT_2_SQL = "text-2-sql"
15+
TEXT_2_SQL_SCHEMA_STORE = "text-2-sql-schema-store"
1616
TEXT_2_SQL_QUERY_CACHE = "text-2-sql-query-cache"
1717

1818

deploy_ai_search/rag_documents.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def get_indexer(self) -> SearchIndexer:
281281
indexer_parameters = IndexingParameters(
282282
batch_size=batch_size,
283283
configuration=IndexingParametersConfiguration(
284-
data_to_extract=BlobIndexerDataToExtract.ALL_METADATA,
284+
data_to_extract=BlobIndexerDataToExtract.STORAGE_METADATA,
285285
query_timeout=None,
286286
execution_environment=execution_environment,
287287
fail_on_unprocessable_document=False,

deploy_ai_search/text_2_sql.py renamed to deploy_ai_search/text_2_sql_schema_store.py

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
)
2727

2828

29-
class Text2SqlAISearch(AISearch):
29+
class Text2SqlSchemaStoreAISearch(AISearch):
3030
"""This class is used to deploy the sql index."""
3131

3232
def __init__(
@@ -41,7 +41,7 @@ def __init__(
4141
suffix (str, optional): The suffix for the indexer. Defaults to None. If an suffix is provided, it is assumed to be a test indexer.
4242
rebuild (bool, optional): Whether to rebuild the index. Defaults to False.
4343
"""
44-
self.indexer_type = IndexerType.TEXT_2_SQL
44+
self.indexer_type = IndexerType.TEXT_2_SQL_SCHEMA_STORE
4545
super().__init__(suffix, rebuild)
4646

4747
if single_data_dictionary:
@@ -62,34 +62,35 @@ def get_index_fields(self) -> list[SearchableField]:
6262
key=True,
6363
analyzer_name="keyword",
6464
),
65+
SearchableField(
66+
name="EntityName", type=SearchFieldDataType.String, filterable=True
67+
),
6568
SearchableField(
6669
name="Entity",
6770
type=SearchFieldDataType.String,
6871
analyzer_name="keyword",
6972
),
7073
SearchableField(
71-
name="EntityName", type=SearchFieldDataType.String, filterable=True
72-
),
73-
SearchableField(
74-
name="Description",
74+
name="Definition",
7575
type=SearchFieldDataType.String,
7676
sortable=False,
7777
filterable=False,
7878
facetable=False,
7979
),
8080
SearchField(
81-
name="DescriptionEmbedding",
81+
name="DefinitionEmbedding",
8282
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
8383
vector_search_dimensions=self.environment.open_ai_embedding_dimensions,
8484
vector_search_profile_name=self.vector_search_profile_name,
85+
hidden=True,
8586
),
8687
ComplexField(
8788
name="Columns",
8889
collection=True,
8990
fields=[
90-
SearchableField(name="Name", type=SearchFieldDataType.String),
91+
SearchableField(name="ColumnName", type=SearchFieldDataType.String),
9192
SearchableField(name="Definition", type=SearchFieldDataType.String),
92-
SearchableField(name="Type", type=SearchFieldDataType.String),
93+
SearchableField(name="DataType", type=SearchFieldDataType.String),
9394
SearchableField(
9495
name="AllowedValues",
9596
type=SearchFieldDataType.String,
@@ -102,6 +103,11 @@ def get_index_fields(self) -> list[SearchableField]:
102103
collection=True,
103104
searchable=False,
104105
),
106+
SearchableField(
107+
name="JoinableEntities",
108+
type=SearchFieldDataType.String,
109+
collection=True,
110+
),
105111
],
106112
),
107113
SearchableField(
@@ -111,6 +117,39 @@ def get_index_fields(self) -> list[SearchableField]:
111117
hidden=True,
112118
# This is needed to enable semantic searching against the column names as complex field types are not used.
113119
),
120+
SearchableField(
121+
name="ColumnDefinitions",
122+
type=SearchFieldDataType.String,
123+
collection=True,
124+
hidden=True,
125+
# This is needed to enable semantic searching against the column names as complex field types are not used.
126+
),
127+
ComplexField(
128+
name="ImmediateRelationships",
129+
collection=True,
130+
fields=[
131+
SearchableField(
132+
name="ImmediateRelationshipName",
133+
type=SearchFieldDataType.String,
134+
),
135+
SearchableField(
136+
name="ImmediateRelationshipEntity",
137+
type=SearchFieldDataType.String,
138+
),
139+
ComplexField(
140+
name="ForeignKeys",
141+
collection=True,
142+
fields=[
143+
SearchableField(
144+
name="SourceColumnName", type=SearchFieldDataType.String
145+
),
146+
SearchableField(
147+
name="TargetColumnName", type=SearchFieldDataType.String
148+
),
149+
],
150+
),
151+
],
152+
),
114153
SimpleField(
115154
name="DateLastModified",
116155
type=SearchFieldDataType.DateTimeOffset,
@@ -131,7 +170,8 @@ def get_semantic_search(self) -> SemanticSearch:
131170
prioritized_fields=SemanticPrioritizedFields(
132171
title_field=SemanticField(field_name="EntityName"),
133172
content_fields=[
134-
SemanticField(field_name="Description"),
173+
SemanticField(field_name="Definition"),
174+
SemanticField(field_name="ColumnDefinitions"),
135175
],
136176
keywords_fields=[
137177
SemanticField(field_name="ColumnNames"),
@@ -151,7 +191,7 @@ def get_skills(self) -> list:
151191
list: The skillsets used in the indexer"""
152192

153193
embedding_skill = self.get_vector_skill(
154-
"/document", "/document/Description", target_name="DescriptionEmbedding"
194+
"/document", "/document/Definition", target_name="DefinitionEmbedding"
155195
)
156196

157197
skills = [embedding_skill]
@@ -222,12 +262,12 @@ def get_indexer(self) -> SearchIndexer:
222262
target_field_name="EntityName",
223263
),
224264
FieldMapping(
225-
source_field_name="/document/Description",
226-
target_field_name="Description",
265+
source_field_name="/document/Definition",
266+
target_field_name="Definition",
227267
),
228268
FieldMapping(
229-
source_field_name="/document/DescriptionEmbedding",
230-
target_field_name="DescriptionEmbedding",
269+
source_field_name="/document/DefinitionEmbedding",
270+
target_field_name="DefinitionEmbedding",
231271
),
232272
FieldMapping(
233273
source_field_name="/document/Columns",
@@ -237,6 +277,10 @@ def get_indexer(self) -> SearchIndexer:
237277
source_field_name="/document/Columns/*/Name",
238278
target_field_name="ColumnNames",
239279
),
280+
FieldMapping(
281+
source_field_name="/document/Columns/*/Definition",
282+
target_field_name="ColumnDefinitions",
283+
),
240284
FieldMapping(
241285
source_field_name="/document/DateLastModified",
242286
target_field_name="DateLastModified",

0 commit comments

Comments
 (0)