@@ -57,25 +57,25 @@ class OpsLearningSummaryTask:
5757 MIN_DIF_EXCERPTS = 3
5858
5959 primary_prompt = (
60- "Please aggregate and summarize the provided data into UP TO THREE structured paragraphs. "
61- "The output MUST strictly adhere to the format below: "
62- "- *Title*: Each finding should begin with the main finding TITLE in bold. "
60+ "Please aggregate and summarize the provided data into UP TO THREE structured paragraphs.\n "
61+ "The output MUST strictly adhere to the format below:\n "
62+ "- *Title*: Each finding should begin with the main finding TITLE in bold.\n "
6363 "Should be a high level summary of the finding below. "
64- "The length of the title MUST be between 20 and 30 characters."
65- "- *Excerpts ID*: Identify the ids of the excerpts you took into account for creating the summary. "
64+ "The length of the title MUST be between 20 and 30 characters.\n "
65+ "- *Excerpts ID*: Identify the ids of the excerpts you took into account for creating the summary.\n "
6666 "- Content: Aggregate findings so that they are supported by evidence from more than one report. "
6767 "Always integrate evidence from multiple reports or items into the paragraph, and "
68- "include the year and country of the evidence."
68+ "include the year and country of the evidence.\n "
6969 "- *Confidence Level*: Based on the number of excerpts connected to the finding, "
7070 "assign a score from 1 to 5 where 1 is the lowest and 5 is the highest, e.g. 4/5"
71- "At the end of the summary, please highlight any contradictory country reports. "
72- "Important:"
73- "-- DO NOT mention the excerpts id in the content of the summary."
74- "-- DO NOT mention the confidence level in the content of the summary."
75- "-- DO NOT use data from any source other than the one provided."
76- "Output Format:"
77- "Provide your answer in valid JSON form. Reply with only the answer in valid JSON form and include no other commentary. "
78- "Example: "
71+ "At the end of the summary, please highlight any contradictory country reports.\n "
72+ "Important:\n \n "
73+ "-- DO NOT mention the excerpts id in the content of the summary.\n "
74+ "-- DO NOT mention the confidence level in the content of the summary.\n "
75+ "-- DO NOT use data from any source other than the one provided.\n \n "
76+ "Output Format:\n "
77+ "Provide your answer in valid JSON form. Reply with only the answer in valid JSON form and include no other commentary.\n "
78+ "Example:\n "
7979 '{"0": {"title": "Flexible and Adaptive Response Planning", "excerpts id":"123, 45" '
8080 '"content": "Responses in Honduras, Peru, Ecuador, and Panama highlight the importance of adaptable strategies. '
8181 "The shift from youth-focused MHPSS to inclusive care in Peru in 2021, the pivot from sanitation infrastructure "
@@ -88,22 +88,22 @@ class OpsLearningSummaryTask:
8888 )
8989
9090 secondary_prompt = (
91- "Please aggregate and summarize this data into structured paragraphs (as few as possible, as many as necessary). "
92- "The output SHOULD ALWAYS follow the format below: "
93- "- *Type*: Whether the paragraph is related to a 'sector' or a 'component' "
94- "- *Subtype*: Provides the name of the sector or of the component to which the paragraph refers."
95- "- *Excerpts ID*: Identify the ids of the excerpts you took into account for creating the summary."
91+ "Please aggregate and summarize this data into structured paragraphs (as few as possible, as many as necessary). \n "
92+ "The output SHOULD ALWAYS follow the format below:\n "
93+ "- *Type*: Whether the paragraph is related to a 'sector' or a 'component'\n "
94+ "- *Subtype*: Provides the name of the sector or of the component to which the paragraph refers.\n "
95+ "- *Excerpts ID*: Identify the ids of the excerpts you took into account for creating the summary.\n "
9696 "*Content*: A short summary aggregating findings related to the Subtype, "
9797 "so that they are supported by evidence coming from more than one report, "
9898 "and there is ONLY ONE entry per subtype. Always integrate in the paragraph evidence that supports "
9999 "it from the data available from multiples reports or items, include year and country of the evidence. "
100- "The length of each paragraph MUST be between 20 and 30 words."
101- " Important:"
102- "- ONLY create one summary per subtype"
103- "- DO NOT mention the ids of the excerpts in the content of the summary."
104- "- DO NOT use data from any source other than the one provided. "
105- "Output Format:"
106- "Provide your answer in valid JSON form. Reply with ONLY the answer in JSON form and include NO OTHER COMMENTARY."
100+ "The length of each paragraph MUST be between 20 and 30 words.\n "
101+ " Important:\n \n "
102+ "- ONLY create one summary per subtype\n "
103+ "- DO NOT mention the ids of the excerpts in the content of the summary.\n "
104+ "- DO NOT use data from any source other than the one provided.\n \n "
105+ "Output Format:\n "
106+ "Provide your answer in valid JSON form. Reply with ONLY the answer in JSON form and include NO OTHER COMMENTARY.\n "
107107 '{"0": {"type": "sector", "subtype": "shelter", "excerpts id":"43, 1375, 14543", "content": "lorem ipsum"}, '
108108 '"1": {"type": "component", "subtype": "Information Management", "excerpts id":"23, 235", "content": "lorem ipsum"}, '
109109 '"2": {"type": "sector", "subtype": "WASH", "excerpts id":"30, 40", "content": "lorem ipsum"}}'
@@ -120,21 +120,21 @@ class OpsLearningSummaryTask:
120120 )
121121
122122 primary_instruction_prompt = (
123- "You should:"
124- "1. Describe, Summarize and Compare: Identify and detail the who, what, where and when"
125- "2. Explain and Connect: Analyze why events happened and how they are related"
126- "3. Identify gaps: Assess what data is available, what is missing and potential biases"
127- "4. Identify key messages: Determine important stories and signals hidden in the data"
128- "5. Select top three: Select up to three findings to report"
123+ "You should:\n "
124+ "1. Describe, Summarize and Compare: Identify and detail the who, what, where and when "
125+ "2. Explain and Connect: Analyze why events happened and how they are related "
126+ "3. Identify gaps: Assess what data is available, what is missing and potential biases "
127+ "4. Identify key messages: Determine important stories and signals hidden in the data "
128+ "5. Select top three: Select up to three findings to report "
129129 )
130130
131131 secondary_instruction_prompt = (
132- "You should for each section in the data (TYPE & SUBTYPE combination):"
133- "1. Describe, Summarize and Compare: Identify and detail the who, what, where and when"
134- "2. Explain and Connect: Analyze why events happened and how they are related"
135- "3. Identify gaps: Assess what data is available, what is missing and potential biases"
136- "4. Identify key messages: Determine if there are important stories and signals hidden in the data"
137- "5. Conclude and make your case"
132+ "You should for each section in the data (TYPE & SUBTYPE combination):\n "
133+ "1. Describe, Summarize and Compare: Identify and detail the who, what, where and when "
134+ "2. Explain and Connect: Analyze why events happened and how they are related "
135+ "3. Identify gaps: Assess what data is available, what is missing and potential biases "
136+ "4. Identify key messages: Determine if there are important stories and signals hidden in the data "
137+ "5. Conclude and make your case "
138138 )
139139
140140 @staticmethod
@@ -552,7 +552,7 @@ def _build_intro_section(cls):
552552 return (
553553 "I will provide you with a set of instructions, data, and formatting requests in three sections."
554554 + " I will pass you the INSTRUCTIONS section, are you ready?"
555- + "\n \n \n \n "
555+ + "\n \n "
556556 )
557557
558558 @classmethod
@@ -585,9 +585,9 @@ def _build_instruction_section(cls, request_filter: dict, df: pd.DataFrame, inst
585585 component_str = '", "' .join (components )
586586 instructions .append (f'and "{ component_str } " aspects' )
587587
588- instructions .append ("in Emergency Response." )
588+ instructions .append ("in Emergency Response. " )
589589 instructions .append ("\n \n " + instruction )
590- instructions .append ("\n \n I will pass you the DATA section, are you ready?\n \n \n " )
590+ instructions .append ("\n \n I will pass you the DATA section, are you ready?\n \n " )
591591 return "\n " .join (instructions )
592592
593593 @classmethod
@@ -814,13 +814,30 @@ def _modify_summary(summary: dict) -> dict:
814814 Checks if the "Confidence level" is present in the primary response and skipping for the secondary summary
815815 """
816816 for key , value in summary .items ():
817- confidence_level = "confidence level"
818- if key == "contradictory reports" or confidence_level in value :
817+ if key == "contradictory reports" :
819818 continue
820- if confidence_level in value ["content" ].lower ():
821- parts = re .split (rf"(?i)\b{ confidence_level } \b" , value ["content" ])
819+
820+ content = value .get ("content" , "" )
821+ excerpt_ids = value .get ("excerpts id" , "" )
822+ excerpt_id_list = (
823+ list (set (excerpt_ids ))
824+ if isinstance (excerpt_ids , list )
825+ else list (set (int (id .strip ()) for id in excerpt_ids .split ("," ) if excerpt_ids and excerpt_ids != "" ))
826+ )
827+
828+ # Check if any excerpt id is present in the content and regenerate the summary if found
829+ if any (re .search (rf"\b{ id } \b" , content ) for id in excerpt_id_list ):
830+ return cls .generate_summary (prompt , type )
831+
832+ value ["content" ] = content
833+ value ["excerpts id" ] = excerpt_id_list
834+
835+ # Extract and remove if `confidence level` exists in the content
836+ confidence_level = "confidence level"
837+ if confidence_level not in value and confidence_level in content .lower ():
838+ parts = re .split (rf"(?i)\b{ confidence_level } \b" , content , maxsplit = 1 )
822839 value ["content" ] = parts [0 ].strip () + "."
823- value ["confidence level" ] = parts [1 ][ 1 :] .strip ()
840+ value ["confidence level" ] = parts [1 ].strip ()
824841
825842 return summary
826843
@@ -906,11 +923,7 @@ def secondary_response_save_to_db(
906923 type = value ["type" ].strip ()
907924 subtype = value ["subtype" ].strip ()
908925 content = value ["content" ].strip ()
909- excerpt_ids = value ["excerpts id" ]
910- if isinstance (excerpt_ids , list ):
911- excerpt_id_list = list (set (excerpt_ids if excerpt_ids else []))
912- else :
913- excerpt_id_list = list (set (int (id .strip ()) for id in excerpt_ids .split ("," ) if excerpt_ids and excerpt_ids != "" ))
926+ excerpt_id_list = value ["excerpts id" ]
914927
915928 if type == "component" and len (excerpt_id_list ) > 0 :
916929 cls .add_used_ops_learnings_component (
0 commit comments