Skip to content

Commit a73c113

Browse files
author
Matthew Harris
committed
Adding new fields to search index
1 parent 465e49c commit a73c113

File tree

6 files changed

+52
-6
lines changed

6 files changed

+52
-6
lines changed

azuredeploy-test.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@
171171
"type": "Microsoft.CognitiveServices/accounts",
172172
"apiVersion": "2024-06-01-preview",
173173
"name": "[variables('docIntelServiceName')]",
174-
"location": "[resourceGroup().location]",
174+
"location": "eastus",
175175
"sku": {
176176
"name": "S0"
177177
},

src/Azure.AISearch.FunctionApp.DotNet/ChunkEmbedPush.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,11 @@ public async Task<IActionResult> Run([HttpTrigger(AuthorizationLevel.Anonymous,
9898
SourceDocumentFilePath = record.Data.FilePath,
9999
// mjh
100100
SourceDocumentDataSource = record.Data.DataSource
101+
SourceDocumentSourceUrl = record.Data.SourceUrl
102+
SourceDocumentAuthor = record.Data.Author
103+
SourceDocumentType = record.Data.Type
104+
SourceDocumentPublishDate = record.Data.PublishDate
105+
101106
};
102107
documentChunks.Add(documentChunk);
103108
index++;

src/Azure.AISearch.FunctionApp.DotNet/Models/DocumentChunk.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,9 @@ public class DocumentChunk
1010
public string? SourceDocumentFilePath { get; set; }
1111
// mjh
1212
public string? SourceDocumentDataSource { get; set; }
13+
public string? SourceDocumentSourceUrl { get; set; }
14+
public string? SourceDocumentAuthor { get; set; }
15+
public string? SourceDocumentType { get; set; }
16+
public DateTimeOffset? SourceDocumentPublishDate { get; set; }
17+
1318
}

src/Azure.AISearch.WebApp/Models/DocumentChunk.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,9 @@ public class DocumentChunk
1111
public string? SourceDocumentFilePath { get; set; }
1212
// mjh
1313
public string? SourceDocumentDataSource { get; set; }
14+
public string? SourceDocumentSourceUrl { get; set; }
15+
public string? SourceDocumentAuthor { get; set; }
16+
public string? SourceDocumentType { get; set; }
17+
public DateTimeOffset? SourceDocumentPublishDate { get; set; }
18+
1419
}

src/Azure.AISearch.WebApp/Services/AzureCognitiveSearchConfigurationService.cs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,11 @@ private SearchIndexerSkillset GetDocumentsSearchIndexerSkillset(AppSettingsOverr
270270
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentFilePath)) { Source = $"/document/metadata_storage_path" },
271271
// mjh
272272
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentDataSource)) { Source = $"/document/data_source" }
273+
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentSourceUrl)) { Source = $"/document/source_url" }
274+
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentAuthor)) { Source = $"/document/author" }
275+
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentType)) { Source = $"/document/doc_type" }
276+
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentPublishDate)) { Source = $"/document/publish_date" }
277+
273278
}
274279
}
275280
}
@@ -338,14 +343,23 @@ private SearchIndexerSkillset GetDocumentsSearchIndexerSkillset(AppSettingsOverr
338343
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentId)) { Source = $"/document/{nameof(Document.Id)}" },
339344
// Map the document file path.
340345
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentFilePath)) { Source = $"/document/{nameof(Document.FilePath)}" },
341-
// mjh Map the data source
342-
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentDataSource)) { Source = $"/document/{nameof(Document.DataSource)}" },
343346
// Map the document title.
344347
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentTitle)) { Source = $"/document/{nameof(Document.Title)}" },
345348
// Map the chunked content.
346349
new InputFieldMappingEntry(nameof(DocumentChunk.Content)) { Source = $"/document/{nameof(Document.Content)}/chunks/*/content" },
347350
// Map the embedding vector.
348351
new InputFieldMappingEntry(nameof(DocumentChunk.ContentVector)) { Source = $"/document/{nameof(Document.Content)}/chunks/*/embedding_metadata/embedding" },
352+
// mjh Map the data source
353+
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentDataSource)) { Source = $"/document/{nameof(Document.DataSource)}" },
354+
// mjh Map the source url
355+
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentSourceUrl)) { Source = $"/document/{nameof(Document.SourceUrl)}" },
356+
// mjh Map the author
357+
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentAuthor)) { Source = $"/document/{nameof(Document.Author)}" },
358+
// mjh Map the type
359+
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentType)) { Source = $"/document/{nameof(Document.Type)}" },
360+
// mjh Map the publish date
361+
new InputFieldMappingEntry(nameof(DocumentChunk.SourceDocumentPublishDate)) { Source = $"/document/{nameof(Document.PublishDate)}" }
362+
349363
}
350364
}
351365
}
@@ -417,9 +431,13 @@ private SearchIndex GetChunksSearchIndex(string chunkIndexName, AppSettingsOverr
417431
new SearchField(nameof(DocumentChunk.ContentVector), SearchFieldDataType.Collection(SearchFieldDataType.Single)) { IsFilterable = false, IsSortable = false, IsFacetable = false, IsSearchable = true, VectorSearchDimensions = this.settings.OpenAIEmbeddingVectorDimensions, VectorSearchProfile = Constants.ConfigurationNames.VectorSearchProfileNameDefault },
418432
new SearchField(nameof(DocumentChunk.SourceDocumentId), SearchFieldDataType.String) { IsFilterable = true, IsSortable = true, IsFacetable = false, IsSearchable = false },
419433
new SearchField(nameof(DocumentChunk.SourceDocumentTitle), SearchFieldDataType.String) { IsFilterable = true, IsSortable = true, IsFacetable = false, IsSearchable = true, AnalyzerName = LexicalAnalyzerName.EnMicrosoft },
420-
new SearchField(nameof(DocumentChunk.SourceDocumentFilePath), SearchFieldDataType.String) { IsFilterable = true, IsSortable = true, IsFacetable = false, IsSearchable = true, AnalyzerName = LexicalAnalyzerName.StandardLucene },
434+
new SearchField(nameof(DocumentChunk.SourceDocumentFilePath), SearchFieldDataType.String) { IsFilterable = false, IsSortable = false, IsFacetable = false, IsSearchable = false, AnalyzerName = LexicalAnalyzerName.StandardLucene },
421435
// mjh
422-
new SearchField(nameof(DocumentChunk.SourceDocumentDataSource), SearchFieldDataType.String) { IsFilterable = true, IsSortable = true, IsFacetable = true, IsSearchable = true, AnalyzerName = LexicalAnalyzerName.StandardLucene }
436+
new SearchField(nameof(DocumentChunk.SourceDocumentDataSource), SearchFieldDataType.String) { IsFilterable = true, IsSortable = true, IsFacetable = true, IsSearchable = true, AnalyzerName = LexicalAnalyzerName.StandardLucene },
437+
new SearchField(nameof(DocumentChunk.SourceDocumentSourceUrl), SearchFieldDataType.String) { IsFilterable = false, IsSortable = false, IsFacetable = false, IsSearchable = false, AnalyzerName = LexicalAnalyzerName.StandardLucene }
438+
new SearchField(nameof(DocumentChunk.SourceDocumentAuthor), SearchFieldDataType.String) { IsFilterable = true, IsSortable = true, IsFacetable = true, IsSearchable = true, AnalyzerName = LexicalAnalyzerName.EnMicrosoft },
439+
new SearchField(nameof(DocumentChunk.SourceDocumentType), SearchFieldDataType.String) { IsFilterable = true, IsSortable = true, IsFacetable = true, IsSearchable = true, AnalyzerName = LexicalAnalyzerName.EnMicrosoft },
440+
new SearchField(nameof(DocumentChunk.SourceDocumentPublishDate), SearchFieldDataType.DateTimeOffset) { IsFilterable = true, IsSortable = true, IsFacetable = true, IsSearchable = false },
423441
},
424442
SemanticSettings = new SemanticSettings
425443
{

src/Azure.AISearch.WebApp/Services/AzureCognitiveSearchService.cs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,13 @@ private void SetSearchOptionsForChunksIndex(SearchOptions searchOptions, QueryTy
138138
// sense and will return an error.
139139
searchOptions.HighlightFields.Add(nameof(DocumentChunk.Content));
140140
}
141+
// mjh
142+
searchOptions.Select.Add(nameof(DocumentChunk.SourceDocumentDataSource));
143+
searchOptions.Select.Add(nameof(DocumentChunk.SourceDocumentSourceUrl));
144+
searchOptions.Select.Add(nameof(DocumentChunk.SourceDocumentAuthor));
145+
searchOptions.Select.Add(nameof(DocumentChunk.SourceDocumentType));
146+
searchOptions.Select.Add(nameof(DocumentChunk.SourceDocumentPublishDate));
147+
141148
}
142149

143150
private SearchResult GetSearchResultForChunksIndex(SearchResult<SearchDocument> result, QueryType? queryType)
@@ -154,7 +161,13 @@ private SearchResult GetSearchResultForChunksIndex(SearchResult<SearchDocument>
154161
// to at least show the context of the response.
155162
searchResult.Captions.Add(result.Document.GetString(nameof(DocumentChunk.Content)));
156163
}
157-
164+
// mjh
165+
searchResult.DataSource = result.Document.GetString(nameof(DocumentChunk.SourceDocumentDataSource));
166+
searchResult.SourceUrl = result.Document.GetString(nameof(DocumentChunk.SourceDocumentSourceUrl));
167+
searchResult.Author = result.Document.GetString(nameof(DocumentChunk.SourceDocumentAuthor));
168+
searchResult.Type = result.Document.GetString(nameof(DocumentChunk.SourceDocumentType));
169+
searchResult.PublishDate = result.Document.GetDateTimeOffset(nameof(DocumentChunk.SourceDocumentPublishDate));
170+
158171
return searchResult;
159172
}
160173

0 commit comments

Comments
 (0)