Merge pull request #1704 from oracle-devrel/aliottoman-patch-2-smart-invoice

al3xne · web-flow · commit 555d941e335f · 2025-04-17T17:38:48.000+03:00
Update SmartInvoiceExtraction.py
diff --git a/ai/generative-ai-service/smart-invoice-extraction/SmartInvoiceExtraction.py b/ai/generative-ai-service/smart-invoice-extraction/SmartInvoiceExtraction.py
@@ -70,116 +70,153 @@ def extractor(image_list):
 # Main Streamlit app function
 def invoiceAnalysisPlus():
     st.title("Invoice Data Extraction")
-
+    
     with st.sidebar:
         st.title("Parameters")
-        # Replace with your own compartment ID
-        compID = "<YOUR_COMPARTMENT_OCID_HERE>"
+        # User prompt input
         user_prompt = st.text_input("Input the elements you are looking to extract here")
         st.caption("Our AI assistant has extracted the following key elements from the invoice. Please select the elements you wish to extract.")
 
+        
     uploaded_file = st.file_uploader("Upload your invoices here:", type=["pdf"])
-
+    
     if uploaded_file is not None:
         with st.spinner("Processing..."):
-            # Convert PDF to image list
             if uploaded_file.type == "application/pdf":
                 images = convert_from_bytes(uploaded_file.read(), fmt="jpeg")
             else:
                 images = [convert_from_bytes(uploaded_file.read(), fmt="jpeg")[0]]
-
-            # Save as byte streams
-            image_list = save_images(images)
-
-        # Load both image-based and text-based LLMs
+            
+            image_list = save_images(images)  # Convert to byte arrays
+        
         llm = ChatOCIGenAI(
-            model_id="meta.llama-3.2-90b-vision-instruct",  # Replace with your model ID
-            compartment_id=compID,
+            model_id="meta.llama-3.2-90b-vision-instruct",
+            compartment_id="", #TO-DO: Add your compartment ID here
             model_kwargs={"max_tokens": 2000, "temperature": 0}
         )
         llm_for_prompts = ChatOCIGenAI(
-            model_id="cohere.command-r-plus-08-2024",  # Replace with your model ID
-            compartment_id=compID,
+            model_id="cohere.command-r-plus-08-2024",
+            compartment_id="",#TO-DO: Add your compartment ID here
             model_kwargs={"max_tokens": 2000, "temperature": 0}
         )
-
-        # Select box UI for user to pick elements and their data types
-        data_types = ["Text", "Number", "Percentage", "Date"]
+        
+        # Options for data types
+        data_types = [ "Text", "Number", "Percentage", "Date"]
+        
+        # Lists to store names and their types
         elements = []
-
         if "availables" not in st.session_state:
             st.session_state.availables = extractor(image_list)
-
-        for i in range(3):  # Max 3 fields
-            col1, col2 = st.columns([2, 1])
-            with col1:
+        for i in range(3):  # Adjust 'n' for the maximum number of selections
+            col1, col2 = st.columns([2, 1])  # Adjust width ratio if needed
+            
+            with col1:                
+                # Preserve user selection across reruns
                 name = st.selectbox(f"Select an element {i+1}", st.session_state.availables, key=f"name_{i}", index=i)
             with col2:
                 data_type = st.selectbox(f"Type {i+1}", data_types, key=f"type_{i}")
                 elements.append((name, data_type))
 
-        # Generate appropriate prompt based on selected or input fields
-        if elements:
+        if elements is not None:
             system_message_cohere = SystemMessage(
-                content=f"""
-                Based on the following set of elements {elements}, with their respective types, extract their values and respond only in valid JSON format (no explanation):
-                {', '.join([f'- {e[0]}' for e in elements])}
-                For example:
-                {{
-                    {elements[0][0]}: "296969",
-                    {elements[1][0]}: "296969",
-                    {elements[2][0]}: "296969"
-                }}
-                """
-            )
+            content=f"""
+            Based on the following set of elements {elements}, with their respective types ({elements[0][1]}, {elements[1][1]}, {elements[2][1]}), Extract the following details and provide the response only in valid JSON format (no extra explanation or text):
+            - {elements[0][0]}
+            - {elements[1][0]}
+            - {elements[2][0]}
+            Ensure the extracted data is formatted correctly as JSON and include nothing else at all in the response, not even a greeting or closing.
+            For example:
+            {{
+                {elements[0][0]}: "296969",
+                {elements[1][0]}: "296969",
+                {elements[2][0]}: "296969",
+            }}
+            """)
             ai_response_cohere = system_message_cohere
         else:
+            # Cohere section for generating the prompt
             system_message_cohere = SystemMessage(
-                content=f"""
-                Generate a system prompt to extract fields based on user-defined elements: {user_prompt}.
-                Output should be JSON only. No other text.
-                """
-            )
+            content=f"""
+            Based on the following system prompt, create a new prompt accordingly based on the elements specified in the user prompt here ({user_prompt}). 
+
+            This is the system prompt template:
+            "
+            Extract the following details and provide the response only in valid JSON format (no extra explanation or text):
+            - **Debit / Credit Note No.**
+            - **Policy Period** 
+            - **Insured** 
+            - **Vessel Name** 
+            - **Details** 
+            - **Currency** 
+            - **Gross Premium 100%**
+            - **OIMSL Share** 
+            - **Total Deductions**
+            - **Net Premium** 
+            - **Premium Schedule**
+            - **Installment Amount**
+
+            Ensure the extracted data is formatted correctly as JSON and include nothing else at all in the response, not even a greeting or closing.
+
+            For example:
+            
+                "Debit / Credit Note No.": "296969",
+                "Policy Period": "Feb 20, 2024 to Jul 15, 2025",
+                "Insured": "Stealth Maritime Corp. S.A.",
+                "Vessel Name": "SUPRA DUKE - HULL & MACHINERY", (Make sure this is the entire vessel name only)
+                "Details": "SUPRA DUKE - Original Premium",
+                "Currency": "USD",
+                "Gross Premium 100%": 56973.63,
+                "OIMSL Share": 4557.89,
+                "Total Deductions": 979.92,
+                "Net Premium": 3577.97,
+                "Premium Schedule": ["Apr 20, 2024", "Jun 14, 2024", "Sep 13, 2024", "Dec 14, 2024", "Mar 16, 2025", "Jun 14, 2025"],
+                "Installment Amount": [372.87, 641.02, 641.02, 641.02, 641.02, 641.02]
+            
+            )" ensure your response is a system prompt format with an example of what the ouput should look like. Also ensure to mention in your gernerated prompt that no other content whatsover should appear except the JSON
+            """)
             ai_response_cohere = llm_for_prompts.invoke(input=[system_message_cohere])
+        print(ai_response_cohere)
 
-        # Extracted data list
         extracted_data = []
-
+        
         with st.spinner("Analyzing invoice..."):
             for idx, img_byte_arr in enumerate(image_list):
                 try:
+                    # Convert the image to base64 directly from memory
                     encoded_frame = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")
-
-                    if elements:
+                    if elements is not None:
                         system_message = ai_response_cohere
                     else:
-                        system_message = SystemMessage(content=ai_response_cohere.content)
-
+                        system_message = SystemMessage(
+                            content=ai_response_cohere.content)
                     human_message = HumanMessage(
                         content=[
                             {"type": "text", "text": "This is my invoice"},
                             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_frame}"}},
                         ]
                     )
-
                     ai_response = llm.invoke(input=[human_message, system_message])
-                    json_start = ai_response.content.find('{')
-                    json_end = ai_response.content.find('}', json_start)
-                    json_data = ai_response.content[json_start:json_end + 1]
-
-                    response_dict = json.loads(json_data)
+                    print(ai_response.content)
+                    index = ai_response.content.find('{')
+                    index2 = ai_response.content.find('}')
+                    x = ai_response.content[index:]
+                    x2 = x[:index2+1]
+                    print(x2)
+                    response_dict = json.loads(x2)
+                    
+                    # Add metadata for tracking
                     response_dict["File Name"] = uploaded_file.name
-                    response_dict["Page Number"] = idx + 1
+                    response_dict["Page Number"] = idx + 1  
+
                     extracted_data.append(response_dict)
 
                 except Exception as e:
                     st.error(f"Error processing page {idx+1}: {str(e)}")
-
-        # Display and save results
+                    
         if extracted_data:
             save_to_csv(extracted_data)
             st.dataframe(pd.DataFrame(extracted_data))
 
-# Run the app
+# Run the chatbot function
 if __name__ == "__main__":
-    invoiceAnalysisPlus()
+    invoiceAnalysisPlus()