updating python

WSPluta · WSPluta · commit 45663b00031c · 2024-11-07T16:32:48.000Z
diff --git a/.gitignore b/.gitignore
@@ -75,4 +75,5 @@ build/
 .idea
 bin/
 dist/
-application-local.yaml
+application-local.yaml
+service/python/config.json
diff --git a/service/python/requirements.txt b/service/python/requirements.txt
@@ -4,7 +4,7 @@ cached-property==1.5.2
 certifi==2024.7.4
 cffi==1.16.0
 circuitbreaker==1.4.0
-cryptography==43.0.1
+cryptography==42.0.6
 oci==2.126.2
 pycparser==2.21
 pyOpenSSL==24.1.0
diff --git a/service/python/server.py b/service/python/server.py
@@ -1,98 +1,101 @@
+import oci
 import asyncio
 import websockets
 import json
-import oci
 from throttler import throttle
 from pypdf import PdfReader
 from io import BytesIO
 from typing import Any, Dict, List
 import re
 from types import SimpleNamespace
 
-# TODO: Please update config profile name and use the compartmentId that has policies grant permissions for using Generative AI Service
-compartment_id = "<compartment_ocid>"
-CONFIG_PROFILE = "DEFAULT"
-config = oci.config.from_file('~/.oci/config', CONFIG_PROFILE)
-
-# Service endpoint
-endpoint = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
-generative_ai_inference_client = (
-    oci.generative_ai_inference.GenerativeAiInferenceClient(
-        config=config,
-        service_endpoint=endpoint,
-        retry_strategy=oci.retry.NoneRetryStrategy(),
-        timeout=(10, 240),
-    )
-)
-
-@throttle(rate_limit=15, period=65.0)
-async def generate_ai_response(prompts):
-    prompt = ""
-    llm_inference_request = (
-        oci.generative_ai_inference.models.CohereLlmInferenceRequest()
-    )
-    llm_inference_request.prompt = prompts
-    llm_inference_request.max_tokens = 1000
-    llm_inference_request.temperature = 0.75
-    llm_inference_request.top_p = 0.7
-    llm_inference_request.frequency_penalty = 1.0
+with open('config.json') as f:
+    config = json.load(f)
 
-    generate_text_detail = oci.generative_ai_inference.models.GenerateTextDetails()
-    generate_text_detail.serving_mode = oci.generative_ai_inference.models.DedicatedServingMode(endpoint_id="ocid1.generativeaiendpoint.oc1.us-chicago-1.amaaaaaaeras5xiavrsefrftfupp42lnniddgjnxuwbv5jypl64i7ktan65a")
+# Load configuration parameters
+compartment_id = config['compartment_id']
+CONFIG_PROFILE = config['config_profile']
+endpoint = config['service_endpoint']
+model_type = config['model_type']
+model_id = config[f'{model_type}_model_id']
 
-    generate_text_detail.compartment_id = compartment_id
-    generate_text_detail.inference_request = llm_inference_request
-
-    if "<compartment_ocid>" in compartment_id:
-        print("ERROR:Please update your compartment id in target python file")
-        quit()
+config = oci.config.from_file('~/.oci/config', CONFIG_PROFILE)
 
-    generate_text_response = generative_ai_inference_client.generate_text(generate_text_detail)
-    # Print result
-    print("**************************Generate Texts Result**************************")
-    print(vars(generate_text_response))
+generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(
+    config=config,
+    service_endpoint=endpoint,
+    retry_strategy=oci.retry.NoneRetryStrategy(),
+    timeout=(10, 240)
+)
 
-    return generate_text_response
+chat_detail = oci.generative_ai_inference.models.ChatDetails()
 
+# Define a function to generate an AI response
 @throttle(rate_limit=15, period=65.0)
-async def generate_ai_summary(summary_txt, prompt):
-    # You can also load the summary text from a file, or as a parameter in main
-    #with open('files/summarize_data.txt', 'r') as file:
-    #    text_to_summarize = file.read()
-
-    summarize_text_detail = oci.generative_ai_inference.models.SummarizeTextDetails()
-    summarize_text_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id="cohere.command")
-    summarize_text_detail.compartment_id = compartment_id
-    #summarize_text_detail.input = text_to_summarize
-    summarize_text_detail.input = summary_txt
-    summarize_text_detail.additional_command = prompt
-    summarize_text_detail.extractiveness = "AUTO" # HIGH, LOW
-    summarize_text_detail.format = "AUTO" # brackets, paragraph
-    summarize_text_detail.length = "LONG" # high, AUTO
-    summarize_text_detail.temperature = .25 # [0,1]
-
+async def generate_ai_response(prompts):
+    # Determine the request type based on the model type
+    if model_type == 'cohere':
+        chat_request = oci.generative_ai_inference.models.CohereChatRequest()
+        chat_request.max_tokens = 2000
+        chat_request.temperature = 0.25
+        chat_request.frequency_penalty = 0
+        chat_request.top_p = 0.75
+        chat_request.top_k = 0
+    elif model_type == 'llama':
+        chat_request = oci.generative_ai_inference.models.GenericChatRequest()
+        chat_request.api_format = oci.generative_ai_inference.models.BaseChatRequest.API_FORMAT_GENERIC
+        chat_request.max_tokens = 2000
+        chat_request.temperature = 1
+        chat_request.frequency_penalty = 0
+        chat_request.presence_penalty = 0
+        chat_request.top_p = 0.75
+        chat_request.top_k = -1
+    else:
+        raise ValueError("Unsupported model type")
+
+    # Process the prompts
+    if isinstance(prompts, str):
+        if model_type == 'cohere':
+            chat_request.message = prompts
+        else:
+            content = oci.generative_ai_inference.models.TextContent()
+            content.text = prompts
+            message = oci.generative_ai_inference.models.Message()
+            message.role = "USER"
+            message.content = [content]
+            chat_request.messages = [message]
+    elif isinstance(prompts, list):
+        chat_request.messages = prompts
+    else:
+        raise ValueError("Invalid input type for generate_ai_response")
+
+    # Set up the chat detail object
+    chat_detail.chat_request = chat_request
+    on_demand_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=model_id)
+    chat_detail.serving_mode = on_demand_mode
+    chat_detail.compartment_id = compartment_id
+
+    # Send the request and get the response
+    chat_response = generative_ai_inference_client.chat(chat_detail)
+
+    # Validate the compartment ID
     if "<compartment_ocid>" in compartment_id:
-        print("ERROR:Please update your compartment id in target python file")
+        print("ERROR: Please update your compartment id in target python file")
         quit()
 
-    summarize_text_response = generative_ai_inference_client.summarize_text(summarize_text_detail)
-
-    # Print result
-    #print("**************************Summarize Texts Result**************************")
-    #print(summarize_text_response.data)
+    # Print the chat result
+    print("**************************Chat Result**************************")
+    print(vars(chat_response))
 
-    return summarize_text_response.data
+    return chat_response
 
 async def parse_pdf(file: BytesIO) -> List[str]:
     pdf = PdfReader(file)
     output = []
     for page in pdf.pages:
         text = page.extract_text()
-        # Merge hyphenated words
         text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
-        # Fix newlines in the middle of sentences
         text = re.sub(r"(?<!\n\s)\n(?!\s\n)", " ", text.strip())
-        # Remove multiple newlines
         text = re.sub(r"\n\s*\n", "\n\n", text)
         output.append(text)
     return output
@@ -102,34 +105,73 @@ async def handle_websocket(websocket, path):
         while True:
             data = await websocket.recv()
             if isinstance(data, str):
-            # if we are dealing with text, make it JSON
-                objData = json.loads(data,object_hook=lambda d: SimpleNamespace(**d))
+                objData = json.loads(data, object_hook=lambda d: SimpleNamespace(**d))
                 if objData.msgType == "question":
                     prompt = objData.data
-                    if objData.msgType == "question":
-                        response = await generate_ai_response(prompt)
-                        answer = response.data.inference_response.generated_texts[0].text
-                        buidJSON = {"msgType":"answer","data":answer}
-                        await websocket.send(json.dumps(buidJSON))
-            # if it's not text, we have a binary and we will treat it as a PDF
-            if not isinstance(data,str):
-                # split the ArrayBuffer into metadata and the actual PDF file
+                    response = await generate_ai_response(prompt)
+                    
+                    if model_type == 'llama':
+                        answer = response.data.chat_response.choices[0].message.content[0].text
+                    elif model_type == 'cohere':
+                        answer = response.data.chat_response.text
+                    else:
+                        answer = ""
+                        
+                    buidJSON = {"msgType": "answer", "data": answer}
+                    await websocket.send(json.dumps(buidJSON))
+                elif objData.msgType == "summary":
+                    pdfFileObj = BytesIO(objData.data)
+                    output = await parse_pdf(pdfFileObj)
+                    chunk_size = 512
+                    chunks = [' '.join(output[i:i + chunk_size]) for i in range(0, len(output), chunk_size)]
+                    
+                    print(f"Processing {len(chunks)} chunks...")
+                    
+                    summaries = []
+                    for index, chunk in enumerate(chunks):
+                        print(f"Processing chunk {index+1}/{len(chunks)}...")
+                        response = await generate_ai_response(f"Summarize: {chunk}")
+                        if model_type == 'llama':
+                            summary = response.data.chat_response.choices[0].message.content[0].text
+                        elif model_type == 'cohere':
+                            summary = response.data.chat_response.text
+                        else:
+                            summary = ""
+                        summaries.append(summary)
+                        
+                    final_summary = ' '.join(summaries)
+                    buidJSON = {"msgType": "summary", "data": final_summary}
+                    await websocket.send(json.dumps(buidJSON))
+            else:
                 objData = data.split(b'\r\n\r\n')
-                # decode the metadata and parse the JSON data.  Creating Dict properties from the JSON
-                metadata = json.loads(objData[0].decode('utf-8'),object_hook=lambda d: SimpleNamespace(**d))
+                metadata = json.loads(objData[0].decode('utf-8'), object_hook=lambda d: SimpleNamespace(**d))
                 pdfFileObj = BytesIO(objData[1])
                 output = await parse_pdf(pdfFileObj)
-                response = await generate_ai_summary(''.join(output),metadata.msgPrompt)
-                summary = response.summary
-                buidJSON = {"msgType":"summary","data": summary}
+                chunk_size = 512
+                chunks = [' '.join(output[i:i + chunk_size]) for i in range(0, len(output), chunk_size)]
+                
+                print(f"Processing {len(chunks)} chunks...")
+                
+                summaries = []
+                for index, chunk in enumerate(chunks):
+                    print(f"Processing chunk {index+1}/{len(chunks)}...")
+                    response = await generate_ai_response(f"Summarize: {chunk}")
+                    if model_type == 'llama':
+                        summary = response.data.chat_response.choices[0].message.content[0].text
+                    elif model_type == 'cohere':
+                        summary = response.data.chat_response.text
+                    else:
+                        summary = ""
+                    summaries.append(summary)
+                    
+                final_summary = ' '.join(summaries)
+                buidJSON = {"msgType": "summary", "data": final_summary}
                 await websocket.send(json.dumps(buidJSON))
     except websockets.exceptions.ConnectionClosedOK as e:
         print(f"Connection closed: {e}")
-
-
+        
 async def start_server():
-    await websockets.serve(handle_websocket, "localhost", 1986, max_size=200000000)
-
+    async with websockets.serve(handle_websocket, "localhost", 1986, max_size=200000000):
+        await asyncio.Future()  # run forever
 
-asyncio.get_event_loop().run_until_complete(start_server())
-asyncio.get_event_loop().run_forever()
+asyncio.run(start_server())