|
36 | 36 | from typing import Any |
37 | 37 |
|
38 | 38 | GRAPH_FIELD_SEP = "<SEP>" |
39 | | - |
40 | | -PROMPTS: dict[str, Any] = {} |
41 | | - |
42 | | -PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>" |
43 | | -PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##" |
44 | | -PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>" |
45 | | - |
46 | | -PROMPTS["DEFAULT_ENTITY_TYPES"] = [ |
| 39 | +DEFAULT_TUPLE_DELIMITER = "<|>" |
| 40 | +DEFAULT_RECORD_DELIMITER = "##" |
| 41 | +DEFAULT_COMPLETION_DELIMITER = "<|COMPLETE|>" |
| 42 | +DEFAULT_ENTITY_TYPES = [ |
47 | 43 | "organization", |
48 | 44 | "person", |
49 | 45 | "geo", |
|
54 | 50 | "category", |
55 | 51 | ] |
56 | 52 |
|
| 53 | +PROMPTS: dict[str, Any] = {} |
| 54 | + |
| 55 | +# Keys: language, entity_types, tuple_delimiter, record_delimiter, completion_delimiter, examples, input_text |
57 | 56 | PROMPTS["entity_extraction"] = """---Goal--- |
58 | 57 | Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities. |
59 | 58 | Use {language} as output language. |
60 | 59 |
|
61 | 60 | ---Steps--- |
62 | 61 | 1. Identify all entities. For each identified entity, extract the following information: |
63 | | -- entity_name: Full Name of the entity, must use **same language** as input text, it's important. If English, capitalized the name. |
| 62 | +- entity_name: Full Name of the entity, must use **same language** as Real Data Text, it's important. If English, capitalized the name. |
64 | 63 | - entity_type: One of the following types: [{entity_types}] |
65 | 64 | - entity_description: Comprehensive description of the entity's attributes and activities |
66 | 65 | Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>) |
|
95 | 94 | ###################### |
96 | 95 | Output:""" |
97 | 96 |
|
| 97 | +# Keys: tuple_delimiter, record_delimiter, completion_delimiter (rendered into entity_extraction via {examples}) |
98 | 98 | PROMPTS["entity_extraction_examples"] = [ |
99 | 99 | """Example 1: |
100 | 100 |
|
|
211 | 211 | #############################""", |
212 | 212 | ] |
213 | 213 |
|
| 214 | +# Keys: language, entity_name, description_list |
214 | 215 | PROMPTS[ |
215 | 216 | "summarize_entity_descriptions" |
216 | 217 | ] = """You are a helpful assistant responsible for generating a comprehensive summary of the data provided below. |
|
228 | 229 | Output: |
229 | 230 | """ |
230 | 231 |
|
| 232 | +# Keys: language, entity_types, tuple_delimiter, record_delimiter, completion_delimiter |
231 | 233 | PROMPTS["entity_continue_extraction"] = """ |
232 | 234 | MANY entities and relationships were missed in the last extraction. |
233 | 235 |
|
234 | 236 | ---Remember Steps--- |
235 | 237 |
|
236 | 238 | 1. Identify all entities. For each identified entity, extract the following information: |
237 | | -- entity_name: Name of the entity, use same language as input text. If English, capitalized the name. |
| 239 | +- entity_name: Name of the entity, use same language as Real Data Text. If English, capitalized the name. |
238 | 240 | - entity_type: One of the following types: [{entity_types}] |
239 | 241 | - entity_description: Comprehensive description of the entity's attributes and activities |
240 | 242 | Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>) |
|
260 | 262 | Add them below using the same format:\n |
261 | 263 | """.strip() |
262 | 264 |
|
| 265 | +# Keys: (none) |
263 | 266 | PROMPTS["entity_if_loop_extraction"] = """ |
264 | 267 | ---Goal---' |
265 | 268 |
|
|
270 | 273 | Answer ONLY by `YES` OR `NO` if there are still entities that need to be added. |
271 | 274 | """.strip() |
272 | 275 |
|
| 276 | +# Keys: (none) |
273 | 277 | PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question.[no-context]" |
274 | 278 |
|
275 | | -PROMPTS["rag_response"] = """---Role--- |
276 | | -
|
277 | | -You are a helpful assistant responding to user query about Knowledge Graph and Document Chunks provided in JSON format below. |
278 | | -
|
279 | | -
|
280 | | ----Goal--- |
281 | | -
|
282 | | -Generate a concise response based on Knowledge Base and follow Response Rules, considering both the conversation history and the current query. Summarize all information in the provided Knowledge Base, and incorporating general knowledge relevant to the Knowledge Base. Do not include information not provided by Knowledge Base. |
283 | | -
|
284 | | -When handling relationships with timestamps: |
285 | | -1. Each relationship has a "created_at" timestamp indicating when we acquired this knowledge |
286 | | -2. When encountering conflicting relationships, consider both the semantic content and the timestamp |
287 | | -3. Don't automatically prefer the most recently created relationships - use judgment based on the context |
288 | | -4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps |
289 | | -
|
290 | | ----Conversation History--- |
291 | | -{history} |
292 | | -
|
293 | | ----Knowledge Graph and Document Chunks--- |
294 | | -{context_data} |
295 | | -
|
296 | | ----Response Rules--- |
297 | | -
|
298 | | -- Target format and length: {response_type} |
299 | | -- Use markdown formatting with appropriate section headings |
300 | | -- Please respond in the same language as the user's question. |
301 | | -- Ensure the response maintains continuity with the conversation history. |
302 | | -- List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Document Chunks (DC), and include the file path if available, in the following format: [KG/DC] file_path |
303 | | -- If you don't know the answer, just say so. |
304 | | -- Do not make anything up. Do not include information not provided by the Knowledge Base. |
305 | | -- Addtional user prompt: {user_prompt} |
306 | | -
|
307 | | -Response:""" |
308 | | - |
| 279 | +# Keys: examples, history, query |
309 | 280 | PROMPTS["keywords_extraction"] = """---Role--- |
310 | 281 |
|
311 | 282 | You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query and conversation history. |
|
335 | 306 |
|
336 | 307 | Current Query: {query} |
337 | 308 | ###################### |
338 | | -The `Output` should be human text, not unicode characters. Keep the same language as `Query`. |
| 309 | +The `Output` should be human text, not unicode characters. Keep the same language as `Current Query`. |
339 | 310 | Output: |
340 | 311 |
|
341 | 312 | """ |
342 | 313 |
|
| 314 | +# Keys: (none, static examples rendered into keywords_extraction via {examples}) |
343 | 315 | PROMPTS["keywords_extraction_examples"] = [ |
344 | 316 | """Example 1: |
345 | 317 |
|
|
373 | 345 | #############################""", |
374 | 346 | ] |
375 | 347 |
|
376 | | -PROMPTS["naive_rag_response"] = """---Role--- |
377 | | -
|
378 | | -You are a helpful assistant responding to user query about Document Chunks provided provided in JSON format below. |
379 | | -
|
380 | | ----Goal--- |
381 | | -
|
382 | | -Generate a concise response based on Document Chunks and follow Response Rules, considering both the conversation history and the current query. Summarize all information in the provided Document Chunks, and incorporating general knowledge relevant to the Document Chunks. Do not include information not provided by Document Chunks. |
383 | | -
|
384 | | -When handling content with timestamps: |
385 | | -1. Each piece of content has a "created_at" timestamp indicating when we acquired this knowledge |
386 | | -2. When encountering conflicting information, consider both the content and the timestamp |
387 | | -3. Don't automatically prefer the most recent content - use judgment based on the context |
388 | | -4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps |
389 | | -
|
390 | | ----Conversation History--- |
391 | | -{history} |
392 | | -
|
393 | | ----Document Chunks(DC)--- |
394 | | -{content_data} |
395 | | -
|
396 | | ----Response Rules--- |
397 | | -
|
398 | | -- Target format and length: {response_type} |
399 | | -- Use markdown formatting with appropriate section headings |
400 | | -- Please respond in the same language as the user's question. |
401 | | -- Ensure the response maintains continuity with the conversation history. |
402 | | -- List up to 5 most important reference sources at the end under "References" section. Clearly indicating each source from Document Chunks(DC), and include the file path if available, in the following format: [DC] file_path |
403 | | -- If you don't know the answer, just say so. |
404 | | -- Do not include information not provided by the Document Chunks. |
405 | | -- Addtional user prompt: {user_prompt} |
406 | | -
|
407 | | -Response:""" |
408 | | - |
| 348 | +# Keys: tuple_delimiter, record_delimiter, completion_delimiter, graph_field_sep, entities_list |
409 | 349 | PROMPTS["batch_merge_analysis"] = """---Goal--- |
410 | 350 | Given a list of entities from a knowledge graph, identify groups of entities that should be merged because they refer to the EXACT SAME real-world object/individual/specific instance. |
411 | 351 |
|
|
0 commit comments