@@ -188,11 +188,12 @@ def get_skills(self) -> list:
188188 if self .enable_page_by_chunking :
189189 embedding_skill = self .get_vector_skill (
190190 "/document/page_wise_layout/*" ,
191- "/document/page_wise_layout/*/cleaned_text " ,
191+ "/document/page_wise_layout/*/final_cleaned_text " ,
192192 )
193193 else :
194194 embedding_skill = self .get_vector_skill (
195- "/document/chunk_mark_ups/*" , "/document/chunk_mark_ups/*/cleaned_text"
195+ "/document/chunk_mark_ups/*" ,
196+ "/document/chunk_mark_ups/*/final_cleaned_text" ,
196197 )
197198
198199 if self .enable_page_by_chunking :
@@ -223,7 +224,7 @@ def get_index_projections(self) -> SearchIndexerIndexProjection:
223224 source_context = "/document/page_wise_layout/*"
224225 mappings = [
225226 InputFieldMappingEntry (
226- name = "Chunk" , source = "/document/page_wise_layout/*/mark_up "
227+ name = "Chunk" , source = "/document/page_wise_layout/*/final_mark_up "
227228 ),
228229 InputFieldMappingEntry (
229230 name = "ChunkEmbedding" ,
@@ -233,24 +234,25 @@ def get_index_projections(self) -> SearchIndexerIndexProjection:
233234 InputFieldMappingEntry (name = "SourceUri" , source = "/document/SourceUri" ),
234235 InputFieldMappingEntry (
235236 name = "Sections" ,
236- source = "/document/page_wise_layout/*/sections " ,
237+ source = "/document/page_wise_layout/*/final_sections " ,
237238 ),
238239 InputFieldMappingEntry (
239240 name = "ChunkFigures" ,
240- source = "/document/page_wise_layout/*/chunk_figures /*" ,
241+ source = "/document/page_wise_layout/*/final_chunk_figures /*" ,
241242 ),
242243 InputFieldMappingEntry (
243244 name = "DateLastModified" , source = "/document/DateLastModified"
244245 ),
245246 InputFieldMappingEntry (
246- name = "PageNumber" , source = "/document/page_wise_layout/*/page_number"
247+ name = "PageNumber" ,
248+ source = "/document/page_wise_layout/*/final_page_number" ,
247249 ),
248250 ]
249251 else :
250252 source_context = "/document/chunk_mark_ups/*"
251253 mappings = [
252254 InputFieldMappingEntry (
253- name = "Chunk" , source = "/document/chunk_mark_ups/*/mark_up "
255+ name = "Chunk" , source = "/document/chunk_mark_ups/*/final_mark_up "
254256 ),
255257 InputFieldMappingEntry (
256258 name = "ChunkEmbedding" ,
@@ -259,17 +261,18 @@ def get_index_projections(self) -> SearchIndexerIndexProjection:
259261 InputFieldMappingEntry (name = "Title" , source = "/document/Title" ),
260262 InputFieldMappingEntry (name = "SourceUri" , source = "/document/SourceUri" ),
261263 InputFieldMappingEntry (
262- name = "Sections" , source = "/document/chunk_mark_ups/*/sections "
264+ name = "Sections" , source = "/document/chunk_mark_ups/*/final_sections "
263265 ),
264266 InputFieldMappingEntry (
265267 name = "ChunkFigures" ,
266- source = "/document/chunk_mark_ups/*/chunk_figures /*" ,
268+ source = "/document/chunk_mark_ups/*/final_chunk_figures /*" ,
267269 ),
268270 InputFieldMappingEntry (
269271 name = "DateLastModified" , source = "/document/DateLastModified"
270272 ),
271273 InputFieldMappingEntry (
272- name = "PageNumber" , source = "/document/chunk_mark_ups/*/page_number"
274+ name = "PageNumber" ,
275+ source = "/document/chunk_mark_ups/*/final_page_number" ,
273276 ),
274277 ]
275278
0 commit comments