Skip to content

Commit 81f31ea

Browse files
committed
summrization ansh
1 parent 687bfd3 commit 81f31ea

File tree

5 files changed

+51
-20
lines changed

5 files changed

+51
-20
lines changed

ai-and-app-modernisation/ai-services/generative-ai-service/summarize-genai/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ Text summarization, a core NLP task, unlocks the ability to distill lengthy cont
44

55
In this article, we'll delve into the creation of a powerful document summarization solution leveraging Oracle Generative AI. Through the integration of Oracle Gen AI's advanced capabilities with cutting-edge technologies such as langchain. This codebase empowers users to effortlessly summarize extensive documents, harnessing the power of Oracle Generative AI Service.
66

7+
<img src="./files/docSummarize.png">
8+
</img>
79

810
# When to use this asset?
911

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Prerequisites
2+
You need the latest versions of LangChain and the OCI software developer kit (SDK). To install and upgrade these two Python packages, use the following command:
3+
4+
pip install -U langchain oci
5+
pip install -r requirements. txt
6+
7+
# Running the application
8+
9+
You need to have your compartment id ready to use that
10+
11+
just run the command to launch the application
12+
13+
streamlit run ocidocumentSummarizeUpload.py
14+
15+
# More Info Links
16+
17+
How to run the application : https://www.youtube.com/watch?v=6A3KGyKy91Q&t=21s
18+
19+
Different methods of sumarization : https://medium.com/@anshuman4luv/revolutionizing-document-summarization-innovative-methods-with-langchain-and-large-language-models-f12272c7e8cd
20+
21+
22+
# License
23+
24+
Copyright (c) 2024 Oracle and/or its affiliates.
25+
26+
Licensed under the Universal Permissive License (UPL), Version 1.0.
27+
28+
See [LICENSE](https://github.com/oracle-devrel/technology-engineering/blob/main/LICENSE) for more details.
107 KB
Loading

ai-and-app-modernisation/ai-services/generative-ai-service/summarize-genai/files/ocidocumentSummarizeUpload.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1+
#Author: Anshuman Panda
12
import streamlit as st
23
import os
34
from langchain.document_loaders import PyPDFLoader
45
from langchain.prompts import PromptTemplate
56
from langchain.text_splitter import RecursiveCharacterTextSplitter
67
from langchain.chains.summarize import load_summarize_chain
7-
from langchain.chains import LLMChain
88
from langchain_community.llms import OCIGenAI
9-
# from genai_langchain_integration.langchain_oci import OCIGenAI
109
from pypdf import PdfReader
1110
from io import BytesIO
1211
from typing import Any, Dict, List
@@ -46,7 +45,7 @@ def text_to_docs(text: str,chunk_size,chunk_overlap) -> List[Document]:
4645
for i, doc in enumerate(page_docs):
4746
doc.metadata["page"] = i + 1
4847

49-
# Split pages into chunks
48+
# Ansh Split pages into chunks
5049
doc_chunks = []
5150

5251
for doc in page_docs:
@@ -60,7 +59,7 @@ def text_to_docs(text: str,chunk_size,chunk_overlap) -> List[Document]:
6059
doc = Document(
6160
page_content=chunk, metadata={"page": doc.metadata["page"], "chunk": i}
6261
)
63-
# Add sources a metadata
62+
# Ansh Add sources a metadata
6463
doc.metadata["source"] = f"{doc.metadata['page']}-{doc.metadata['chunk']}"
6564
doc_chunks.append(doc)
6665
return doc_chunks
@@ -69,13 +68,13 @@ def text_to_docs(text: str,chunk_size,chunk_overlap) -> List[Document]:
6968
def custom_summary(docs, llm, custom_prompt, chain_type, num_summaries):
7069
print("I am inside custom summary")
7170
custom_prompt = custom_prompt + """:\n {text}"""
72-
print("custom Prompt is ------>")
71+
print("Ansh custom Prompt is ------>")
7372
print(custom_prompt)
7473
COMBINE_PROMPT = PromptTemplate(template=custom_prompt, input_variables = ["text"])
75-
print("combine Prompt is ------>")
74+
print("Ansh combine Prompt is ------>")
7675
print(COMBINE_PROMPT)
7776
MAP_PROMPT = PromptTemplate(template="Summarize:\n{text}", input_variables=["text"])
78-
print("MAP_PROMPT Prompt is ------>")
77+
print("Ansh MAP_PROMPT Prompt is ------>")
7978
print(MAP_PROMPT)
8079
if chain_type == "map_reduce":
8180
chain = load_summarize_chain(llm,chain_type=chain_type,
@@ -106,18 +105,21 @@ def main():
106105
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
107106
st.title("Document Summarization App")
108107

109-
llm = st.sidebar.selectbox("LLM",["OracleGenAI","Other (other source in the future)"])
108+
llm_name = st.sidebar.selectbox("LLM",["cohere.command","meta.llama-2-70b-chat"])
109+
110110
chain_type = st.sidebar.selectbox("Chain Type", ["map_reduce", "stuff", "refine"])
111-
chunk_size = st.sidebar.slider("Chunk Size", min_value=20, max_value = 10000,
112-
step=10, value=4000)
111+
chunk_size = st.sidebar.slider("Chunk Size", min_value=20, max_value = 5000,
112+
step=10, value=2000)
113113
chunk_overlap = st.sidebar.slider("Chunk Overlap", min_value=5, max_value = 5000,
114114
step=10, value=200)
115-
user_prompt = st.text_input("Enter the document summary prompt", value= "Compose a concise and brief summary of this text. ")
115+
user_prompt = st.text_input("Enter the document summary prompt", value= "Compose a brief summary of this text. ")
116116
temperature = st.sidebar.number_input("Set the GenAI Temperature",
117117
min_value = 0.0,
118118
max_value=1.0,
119119
step=0.1,
120120
value=0.5)
121+
max_token = st.sidebar.slider("Max Output size", min_value=200, max_value = 1000,step=10, value=200)
122+
compartment_id = st.sidebar.text_input("Enter the compartment id", value= "")
121123

122124
opt = "Upload-own-file"
123125
pages = None
@@ -144,17 +146,11 @@ def main():
144146
pages
145147

146148

147-
148-
# llm = OCIGenAI(
149-
# model_id="cohere.command",
150-
# service_endpoint="https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com",
151-
# compartment_id = "ocid1.tenancy.oc1..aaaaaaaa5hwtrus75rauufcfvtnjnz3mc4xm2bzibbigva2bw4ne7ezkvzha",
152-
# temperature=temperature
153-
# )
154149
llm = OCIGenAI(
155-
model_id="cohere.command",
150+
model_id=llm_name,
156151
service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
157-
compartment_id = "ocid1.compartment.oc1..aaaaaaaa7ggqkd4ptkeb7ugk6ipsl3gqjofhkr6yacluwj4fitf2ufrdm65q",
152+
compartment_id = compartment_id,
153+
model_kwargs={"temperature": temperature, "max_tokens": max_token}
158154
)
159155

160156
if st.button("Summarize"):
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
streamlit
2+
langchain
3+
unstructured
4+
langchain_community
5+
pypdf

0 commit comments

Comments
 (0)