@@ -322,8 +322,10 @@ def get_text_split_skill(
322322
323323 return semantic_text_chunker_skill
324324
325- def get_adi_skill (self , chunk_by_page = False ) -> WebApiSkill :
326- """Get the custom skill for adi.
325+ def get_layout_analysis_skill (
326+ self , chunk_by_page = False , extract_figures = True
327+ ) -> WebApiSkill :
328+ """Get the custom skill for layout analysis.
327329
328330 Args:
329331 -----
@@ -343,25 +345,24 @@ def get_adi_skill(self, chunk_by_page=False) -> WebApiSkill:
343345
344346 if chunk_by_page :
345347 output = [
346- OutputFieldMappingEntry (name = "extracted_content " , target_name = "chunks " )
348+ OutputFieldMappingEntry (name = "layout " , target_name = "page_wise_layout " )
347349 ]
348350 else :
349- output = [
350- OutputFieldMappingEntry (
351- name = "extracted_content" , target_name = "extracted_content"
352- )
353- ]
351+ output = [OutputFieldMappingEntry (name = "layout" , target_name = "layout" )]
354352
355- adi_skill = WebApiSkill (
356- name = "ADI Skill" ,
353+ layout_analysis_skill = WebApiSkill (
354+ name = "Layout Analysis Skill" ,
357355 description = "Skill to generate ADI" ,
358356 context = "/document" ,
359- uri = self .environment .get_custom_skill_function_url ("adi " ),
357+ uri = self .environment .get_custom_skill_function_url ("layout_analysis " ),
360358 timeout = "PT230S" ,
361359 batch_size = batch_size ,
362360 degree_of_parallelism = degree_of_parallelism ,
363361 http_method = "POST" ,
364- http_headers = {"chunk_by_page" : chunk_by_page },
362+ http_headers = {
363+ "chunk_by_page" : chunk_by_page ,
364+ "extract_figures" : extract_figures ,
365+ },
365366 inputs = [
366367 InputFieldMappingEntry (
367368 name = "source" , source = "/document/metadata_storage_path"
@@ -371,100 +372,150 @@ def get_adi_skill(self, chunk_by_page=False) -> WebApiSkill:
371372 )
372373
373374 if self .environment .identity_type != IdentityType .KEY :
374- adi_skill .auth_identity = (
375+ layout_analysis_skill .auth_identity = (
375376 self .environment .function_app_app_registration_resource_id
376377 )
377378
378379 if self .environment .identity_type == IdentityType .USER_ASSIGNED :
379- adi_skill .auth_identity = self .environment .ai_search_user_assigned_identity
380+ layout_analysis_skill .auth_identity = (
381+ self .environment .ai_search_user_assigned_identity
382+ )
380383
381- return adi_skill
384+ return layout_analysis_skill
382385
383- def get_vector_skill (
384- self , context , source , target_name = "vector"
385- ) -> AzureOpenAIEmbeddingSkill :
386- """Get the vector skill for the indexer.
386+ def get_figure_analysis_skill (self , figure_source ) -> WebApiSkill :
387+ """Get the custom skill for figure analysis.
388+
389+ Args:
390+ -----
391+ chunk_by_page (bool, optional): Whether to chunk by page. Defaults to False.
387392
388393 Returns:
389- AzureOpenAIEmbeddingSkill: The vector skill for the indexer"""
394+ --------
395+ WebApiSkill: The custom skill for adi"""
390396
391- embedding_skill_inputs = [
392- InputFieldMappingEntry (name = "text" , source = source ),
393- ]
394- embedding_skill_outputs = [
395- OutputFieldMappingEntry (name = "embedding" , target_name = target_name )
397+ if self .test :
398+ batch_size = 1
399+ degree_of_parallelism = 4
400+ else :
401+ # Depending on your GPT Token limit, you may need to adjust the batch size and degree of parallelism
402+ batch_size = 1
403+ degree_of_parallelism = 8
404+
405+ output = [
406+ OutputFieldMappingEntry (name = "updated_figure" , target_name = "updated_figure" )
396407 ]
397408
398- vector_skill = AzureOpenAIEmbeddingSkill (
399- name = "Vector Skill" ,
400- description = "Skill to generate embeddings" ,
401- context = context ,
402- deployment_name = self .environment .open_ai_embedding_deployment ,
403- model_name = self .environment .open_ai_embedding_model ,
404- resource_url = self .environment .open_ai_endpoint ,
405- inputs = embedding_skill_inputs ,
406- outputs = embedding_skill_outputs ,
407- dimensions = self .environment .open_ai_embedding_dimensions ,
409+ figure_analysis_skill = WebApiSkill (
410+ name = "Figure Analysis Skill" ,
411+ description = "Skill to generate figure analysis" ,
412+ context = figure_source ,
413+ uri = self .environment .get_custom_skill_function_url ("figure_analysis" ),
414+ timeout = "PT230S" ,
415+ batch_size = batch_size ,
416+ degree_of_parallelism = degree_of_parallelism ,
417+ http_method = "POST" ,
418+ inputs = [InputFieldMappingEntry (name = "figure" , source = figure_source )],
419+ outputs = output ,
408420 )
409421
410- if self .environment .identity_type == IdentityType .KEY :
411- vector_skill .api_key = self .environment .open_ai_api_key
412- elif self .environment .identity_type == IdentityType .USER_ASSIGNED :
413- vector_skill .auth_identity = (
422+ if self .environment .identity_type != IdentityType .KEY :
423+ figure_analysis_skill .auth_identity = (
424+ self .environment .function_app_app_registration_resource_id
425+ )
426+
427+ if self .environment .identity_type == IdentityType .USER_ASSIGNED :
428+ figure_analysis_skill .auth_identity = (
414429 self .environment .ai_search_user_assigned_identity
415430 )
416431
417- return vector_skill
432+ return figure_analysis_skill
418433
419- def get_key_phrase_extraction_skill (self , context , source ) -> WebApiSkill :
420- """Get the key phrase extraction skill .
434+ def get_layout_and_figure_merger_skill (self , figure_source ) -> WebApiSkill :
435+ """Get the custom skill for layout and figure merger .
421436
422437 Args:
423438 -----
424- context (str): The context of the skill
425- source (str): The source of the skill
439+ chunk_by_page (bool, optional): Whether to chunk by page. Defaults to False.
426440
427441 Returns:
428442 --------
429- WebApiSkill: The key phrase extraction skill """
443+ WebApiSkill: The custom skill for adi """
430444
431445 if self .test :
432- batch_size = 4
446+ batch_size = 1
433447 degree_of_parallelism = 4
434448 else :
435- batch_size = 16
436- degree_of_parallelism = 16
449+ # Depending on your GPT Token limit, you may need to adjust the batch size and degree of parallelism
450+ batch_size = 1
451+ degree_of_parallelism = 8
437452
438- key_phrase_extraction_skill_inputs = [
439- InputFieldMappingEntry (name = "text" , source = source ),
440- ]
441- key_phrase_extraction__skill_outputs = [
442- OutputFieldMappingEntry (name = "key_phrases" , target_name = "keywords" )
453+ output = [
454+ OutputFieldMappingEntry (name = "updated_figure" , target_name = "updated_figure" )
443455 ]
444- key_phrase_extraction_skill = WebApiSkill (
445- name = "Key phrase extraction API" ,
446- description = "Skill to extract keyphrases" ,
447- context = context ,
448- uri = self .environment .get_custom_skill_function_url ("key_phrase_extraction" ),
456+
457+ figure_analysis_skill = WebApiSkill (
458+ name = "Layout and Figure Merger Skill" ,
459+ description = "Skill to merge layout and figure analysis" ,
460+ context = figure_source ,
461+ uri = self .environment .get_custom_skill_function_url (
462+ "layout_and_figure_merger"
463+ ),
449464 timeout = "PT230S" ,
450465 batch_size = batch_size ,
451466 degree_of_parallelism = degree_of_parallelism ,
452467 http_method = "POST" ,
453- inputs = key_phrase_extraction_skill_inputs ,
454- outputs = key_phrase_extraction__skill_outputs ,
468+ inputs = [ InputFieldMappingEntry ( name = "figure" , source = figure_source )] ,
469+ outputs = output ,
455470 )
456471
457472 if self .environment .identity_type != IdentityType .KEY :
458- key_phrase_extraction_skill .auth_identity = (
473+ figure_analysis_skill .auth_identity = (
459474 self .environment .function_app_app_registration_resource_id
460475 )
461476
462477 if self .environment .identity_type == IdentityType .USER_ASSIGNED :
463- key_phrase_extraction_skill .auth_identity = (
478+ figure_analysis_skill .auth_identity = (
479+ self .environment .ai_search_user_assigned_identity
480+ )
481+
482+ return figure_analysis_skill
483+
484+ def get_vector_skill (
485+ self , context , source , target_name = "vector"
486+ ) -> AzureOpenAIEmbeddingSkill :
487+ """Get the vector skill for the indexer.
488+
489+ Returns:
490+ AzureOpenAIEmbeddingSkill: The vector skill for the indexer"""
491+
492+ embedding_skill_inputs = [
493+ InputFieldMappingEntry (name = "text" , source = source ),
494+ ]
495+ embedding_skill_outputs = [
496+ OutputFieldMappingEntry (name = "embedding" , target_name = target_name )
497+ ]
498+
499+ vector_skill = AzureOpenAIEmbeddingSkill (
500+ name = "Vector Skill" ,
501+ description = "Skill to generate embeddings" ,
502+ context = context ,
503+ deployment_name = self .environment .open_ai_embedding_deployment ,
504+ model_name = self .environment .open_ai_embedding_model ,
505+ resource_url = self .environment .open_ai_endpoint ,
506+ inputs = embedding_skill_inputs ,
507+ outputs = embedding_skill_outputs ,
508+ dimensions = self .environment .open_ai_embedding_dimensions ,
509+ )
510+
511+ if self .environment .identity_type == IdentityType .KEY :
512+ vector_skill .api_key = self .environment .open_ai_api_key
513+ elif self .environment .identity_type == IdentityType .USER_ASSIGNED :
514+ vector_skill .auth_identity = (
464515 self .environment .ai_search_user_assigned_identity
465516 )
466517
467- return key_phrase_extraction_skill
518+ return vector_skill
468519
469520 def get_vector_search (self ) -> VectorSearch :
470521 """Get the vector search configuration for compass.
0 commit comments