Skip to content

Commit 38c946b

Browse files
authored
Add How to better configure your cache document (#450)
Signed-off-by: SimFG <bang.fu@zilliz.com>
1 parent c663379 commit 38c946b

File tree

8 files changed

+571
-21
lines changed

8 files changed

+571
-21
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ cache.set_openai_key()
162162
questions = [
163163
"what's github",
164164
"can you explain what GitHub is",
165-
"can you tell me more about GitHub"
165+
"can you tell me more about GitHub",
166166
"what is the purpose of GitHub"
167167
]
168168

docs/bootcamp/langchain/question_answering.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@
113113
"metadata": {},
114114
"source": [
115115
"## Prepare Data\n",
116-
"First we [prepare the data](https://raw.githubusercontent.com/hwchase17/langchain/master/docs/modules/state_of_the_union.txt). For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents). You can learn more detail about Milvus in Langchain refer to [it](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/milvus.html?highlight=milvus)."
116+
"First we [prepare the data](https://raw.githubusercontent.com/hwchase17/langchain/master/docs/extras/modules/state_of_the_union.txt). For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents). You can learn more detail about Milvus in Langchain refer to [it](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/milvus.html?highlight=milvus)."
117117
]
118118
},
119119
{
@@ -386,7 +386,7 @@
386386
],
387387
"metadata": {
388388
"kernelspec": {
389-
"display_name": "Python 3 (ipykernel)",
389+
"display_name": "Python 3",
390390
"language": "python",
391391
"name": "python3"
392392
},
@@ -400,7 +400,7 @@
400400
"name": "python",
401401
"nbconvert_exporter": "python",
402402
"pygments_lexer": "ipython3",
403-
"version": "3.9.12"
403+
"version": "3.8.8"
404404
},
405405
"vscode": {
406406
"interpreter": {

docs/configure_it.md

Lines changed: 530 additions & 0 deletions
Large diffs are not rendered by default.

docs/toc.bak

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
usage.md
88
feature.md
9+
configure_it.md
910
release_note.md
1011

1112
.. toctree::

gptcache/adapter/adapter.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ def post_process():
188188
kwargs["cache_context"] = context
189189
kwargs["cache_skip"] = cache_skip
190190
kwargs["cache_factor"] = cache_factor
191+
kwargs["search_only_flag"] = search_only_flag
191192
llm_data = adapt(
192193
llm_handler, cache_data_convert, update_cache_callback, *args, **kwargs
193194
)
@@ -199,6 +200,9 @@ def post_process():
199200
llm_handler, func_name="llm_request", report_func=chat_cache.report.llm
200201
)(*args, **kwargs)
201202

203+
if not llm_data:
204+
return None
205+
202206
if cache_enable:
203207
try:
204208
def update_cache_func(handled_llm_data, question=None):

gptcache/manager/vector_data/manager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,9 @@ def get(name, **kwargs):
201201
from gptcache.manager.vector_data.redis_vectorstore import RedisVectorStore
202202
host = kwargs.get("host", "localhost")
203203
port = kwargs.get("port", "6379")
204-
user = kwargs.get("user")
205-
password = kwargs.get("password")
206-
namespace = kwargs.get("namespace")
204+
user = kwargs.get("user", "")
205+
password = kwargs.get("password", "")
206+
namespace = kwargs.get("namespace", "")
207207
dimension = kwargs.get("dimension", DIMENSION)
208208
collection_name = kwargs.get("collection_name", COLLECTION_NAME)
209209

gptcache/processor/context/summarization_context.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,33 @@
99

1010
import transformers # pylint: disable=C0413
1111

12+
1213
def summarize_to_length(summarizer, text, target_len, max_len=1024):
1314
tokenizer = summarizer.tokenizer
15+
1416
def token_length(text):
1517
return len(tokenizer.encode(text))
18+
1619
segment_len = max_len - 100
1720
summary_result = text
1821
while token_length(text) > target_len:
1922
tokens = tokenizer.encode(text)
20-
segments = [tokens[i:i+segment_len] for i in range(0, len(tokens), segment_len-1)]
23+
segments = [
24+
tokens[i : i + segment_len] for i in range(0, len(tokens), segment_len - 1)
25+
]
2126
summary_result = ""
2227
for segment in segments:
23-
len_seg = int(len(segment)/4)
24-
summary = summarizer(tokenizer.decode(segment), min_length=max(len_seg-10, 1), max_length=len_seg)
28+
len_seg = int(len(segment) / 4)
29+
summary = summarizer(
30+
tokenizer.decode(segment),
31+
min_length=max(len_seg - 10, 1),
32+
max_length=len_seg,
33+
)
2534
summary_result += summary[0]["summary_text"]
2635
text = summary_result
2736
return summary_result
2837

38+
2939
class SummarizationContextProcess(ContextProcess):
3040
"""A context processor for summarizing large amounts of text data using a summarizer model.
3141
@@ -45,8 +55,10 @@ class SummarizationContextProcess(ContextProcess):
4555
context_process = SummarizationContextProcess()
4656
cache.init(pre_embedding_func=context_process.pre_process)
4757
"""
48-
def __init__(self, model_name="facebook/bart-large-cnn",
49-
tokenizer=None, target_length=512):
58+
59+
def __init__(
60+
self, model_name="facebook/bart-large-cnn", tokenizer=None, target_length=512
61+
):
5062
summarizer = transformers.pipeline(task="summarization", model=model_name)
5163
self.summarizer = summarizer
5264
self.target_length = target_length
@@ -64,7 +76,9 @@ def summarize_to_sentence(self, sentences, target_size=1000):
6476
target_sentences = []
6577
for sent, target_len in zip(sentences, target_lengths):
6678
if len(self.tokenizer.tokenize(sent)) > target_len:
67-
response = summarize_to_length(self.summarizer, sent, target_len, self.tokenizer.model_max_length)
79+
response = summarize_to_length(
80+
self.summarizer, sent, target_len, self.tokenizer.model_max_length
81+
)
6882
target_sentence = response
6983
else:
7084
target_sentence = sent
@@ -84,9 +98,14 @@ def process_all_content(self) -> (Any, Any):
8498
def serialize_content(content):
8599
ret = ""
86100
for message in content:
87-
ret += "[#RS]{}[#RE][#CS]{}[#CE]".format(message["role"], message["content"])
101+
ret += "[#RS]{}[#RE][#CS]{}[#CE]".format(
102+
message["role"], message["content"]
103+
)
88104
return ret
89-
result = self.summarize_to_sentence([message["content"] for message in self.content], self.target_length)
105+
106+
result = self.summarize_to_sentence(
107+
[message["content"] for message in self.content], self.target_length
108+
)
90109
save_content = serialize_content(self.content)
91110
embedding_content = result
92111
return save_content, embedding_content

tests/unit_tests/adapter/test_langchain_models.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from gptcache.adapter import openai
88
from gptcache.adapter.api import init_similar_cache, get
99
from gptcache.adapter.langchain_models import LangChainLLMs, LangChainChat, _cache_msg_data_convert
10-
from gptcache.processor.pre import get_prompt, last_content_without_template
10+
from gptcache.processor.pre import get_prompt, last_content_without_template, get_messages_last_content
1111
from gptcache.utils import import_pydantic, import_langchain
1212
from gptcache.utils.response import get_message_from_openai_answer
1313

@@ -60,10 +60,6 @@ def test_langchain_llms():
6060
assert expect_answer == answer
6161

6262

63-
def get_msg_func(data, **_):
64-
return data.get("messages")[-1].content
65-
66-
6763
def test_langchain_chats():
6864
question = [HumanMessage(content="test_langchain_chats")]
6965
question2 = [HumanMessage(content="test_langchain_chats2")]
@@ -76,7 +72,7 @@ def test_langchain_chats():
7672

7773
llm_cache = Cache()
7874
llm_cache.init(
79-
pre_embedding_func=get_msg_func,
75+
pre_embedding_func=get_messages_last_content,
8076
)
8177

8278
os.environ["OPENAI_API_KEY"] = "API"

0 commit comments

Comments
 (0)