88
99
1010DEFAULT_PROMPT = """
11- Q: I have a topic that contains the following documents:
11+ This is a list of texts where each collection of texts describe a topic. After each collection of texts, the name of the topic they represent is mentioned as a short-highly-descriptive title
12+ ---
13+ Topic:
14+ Sample texts from this topic:
15+ - Traditional diets in most cultures were primarily plant-based with a little meat on top, but with the rise of industrial style meat production and factory farming, meat has become a staple food.
16+ - Meat, but especially beef, is the word food in terms of emissions.
17+ - Eating meat doesn't make you a bad person, not eating meat doesn't make you a good one.
18+
19+ Keywords: meat beef eat eating emissions steak food health processed chicken
20+ Topic name: Environmental impacts of eating meat
21+ ---
22+ Topic:
23+ Sample texts from this topic:
24+ - I have ordered the product weeks ago but it still has not arrived!
25+ - The website mentions that it only takes a couple of days to deliver but I still have not received mine.
26+ - I got a message stating that I received the monitor but that is not true!
27+ - It took a month longer to deliver than was advised...
28+
29+ Keywords: deliver weeks product shipping long delivery received arrived arrive week
30+ Topic name: Shipping and delivery issues
31+ ---
32+ Topic:
33+ Sample texts from this topic:
1234[DOCUMENTS]
35+ Keywords: [KEYWORDS]
36+ Topic name:"""
1337
14- The topic is described by the following keywords: '[KEYWORDS]'.
15-
16- Based on the above information, can you give a short label of the topic?
17- A: """
38+ DEFAULT_SYSTEM_PROMPT = "You are an assistant that extracts high-level topics from texts."
1839
1940
2041class LlamaCPP (BaseRepresentation ):
@@ -28,6 +49,8 @@ class LlamaCPP(BaseRepresentation):
2849 NOTE: Use `"[KEYWORDS]"` and `"[DOCUMENTS]"` in the prompt
2950 to decide where the keywords and documents need to be
3051 inserted.
52+ system_prompt: The system prompt to be used in the model. If no system prompt is given,
53+ `self.default_system_prompt_` is used instead.
3154 pipeline_kwargs: Kwargs that you can pass to the `llama_cpp.Llama`
3255 when it is called such as `max_tokens` to be generated.
3356 nr_docs: The number of documents to pass to OpenAI if a prompt
@@ -93,14 +116,15 @@ def __init__(
93116 self ,
94117 model : Union [str , Llama ],
95118 prompt : str = None ,
119+ system_prompt : str = None ,
96120 pipeline_kwargs : Mapping [str , Any ] = {},
97121 nr_docs : int = 4 ,
98122 diversity : float = None ,
99123 doc_length : int = None ,
100124 tokenizer : Union [str , Callable ] = None ,
101125 ):
102126 if isinstance (model , str ):
103- self .model = Llama (model_path = model , n_gpu_layers = - 1 , stop = "Q: " )
127+ self .model = Llama (model_path = model , n_gpu_layers = - 1 , stop = "\n " , chat_format = "ChatML " )
104128 elif isinstance (model , Llama ):
105129 self .model = model
106130 else :
@@ -110,7 +134,9 @@ def __init__(
110134 "local LLM or a ` llama_cpp.Llama` object."
111135 )
112136 self .prompt = prompt if prompt is not None else DEFAULT_PROMPT
137+ self .system_prompt = system_prompt if system_prompt is not None else DEFAULT_SYSTEM_PROMPT
113138 self .default_prompt_ = DEFAULT_PROMPT
139+ self .default_system_prompt_ = DEFAULT_SYSTEM_PROMPT
114140 self .pipeline_kwargs = pipeline_kwargs
115141 self .nr_docs = nr_docs
116142 self .diversity = diversity
@@ -151,33 +177,39 @@ def extract_topics(
151177 self .prompts_ .append (prompt )
152178
153179 # Extract result from generator and use that as label
154- topic_description = self .model (prompt , ** self .pipeline_kwargs )["choices" ]
155- topic_description = [( description [ "text" ]. replace ( prompt , "" ), 1 ) for description in topic_description ]
156-
157- if len ( topic_description ) < 10 :
158- topic_description += [( "" , 0 ) for _ in range ( 10 - len ( topic_description ))]
159-
160- updated_topics [topic ] = topic_description
180+ # topic_description = self.model(prompt, **self.pipeline_kwargs)["choices"]
181+ topic_description = self . model . create_chat_completion (
182+ messages = [{ "role" : "system" , "content" : self . system_prompt }, { "role" : "user" , "content" : prompt }],
183+ ** self . pipeline_kwargs ,
184+ )
185+ label = topic_description [ "choices" ][ 0 ][ "message" ][ "content" ]. strip ()
186+ updated_topics [topic ] = [( label , 1 )] + [( "" , 0 ) for _ in range ( 9 )]
161187
162188 return updated_topics
163189
164190 def _create_prompt (self , docs , topic , topics ):
165- keywords = ", " . join ( list (zip (* topics [topic ]))[0 ])
191+ keywords = list (zip (* topics [topic ]))[0 ]
166192
167- # Use the default prompt and replace keywords
193+ # Use the Default Chat Prompt
168194 if self .prompt == DEFAULT_PROMPT :
169- prompt = self .prompt .replace ("[KEYWORDS]" , keywords )
195+ prompt = self .prompt .replace ("[KEYWORDS]" , ", " .join (keywords ))
196+ prompt = self ._replace_documents (prompt , docs )
170197
171- # Use a prompt that leverages either keywords or documents in
172- # a custom location
198+ # Use a custom prompt that leverages keywords, documents or both using
199+ # custom tags, namely [KEYWORDS] and [DOCUMENTS] respectively
173200 else :
174201 prompt = self .prompt
175202 if "[KEYWORDS]" in prompt :
176- prompt = prompt .replace ("[KEYWORDS]" , keywords )
203+ prompt = prompt .replace ("[KEYWORDS]" , ", " . join ( keywords ) )
177204 if "[DOCUMENTS]" in prompt :
178- to_replace = ""
179- for doc in docs :
180- to_replace += f"- { doc } \n "
181- prompt = prompt .replace ("[DOCUMENTS]" , to_replace )
205+ prompt = self ._replace_documents (prompt , docs )
206+
207+ return prompt
182208
209+ @staticmethod
210+ def _replace_documents (prompt , docs ):
211+ to_replace = ""
212+ for doc in docs :
213+ to_replace += f"- { doc } \n "
214+ prompt = prompt .replace ("[DOCUMENTS]" , to_replace )
183215 return prompt
0 commit comments