From fc70819d7bdbc8d393b1230dd70d7805d0b2b38f Mon Sep 17 00:00:00 2001 From: GitJournal Date: Wed, 10 Jan 2024 23:41:18 +0800 Subject: [PATCH 01/25] update --- .fdignore | 4 + docs/.gitignore | 5 + docs/codeview.html | 594 ++++++++ docs/data/0.json | 546 ++++++++ docs/data/1.json | 551 ++++++++ docs/data/2.json | 548 ++++++++ docs/data/3.json | 542 ++++++++ docs/data/4.json | 543 ++++++++ docs/data/5.json | 545 ++++++++ docs/data/6.json | 24 + .../0cea9af3-6d7e-464b-9c98-67d890a84704.json | 10 + .../210f2677-be0b-44ed-911c-75384dfccc5e.json | 360 +++++ .../211f5959-9ce5-4512-bcce-0c8afe4961a9.json | 60 + .../33c839ec-9562-4993-8007-5179237aa1f5.json | 50 + .../3a316a1b-8eab-437e-a0af-d0bb26acbfbb.json | 20 + .../3b0530b9-09df-4bf9-92e1-a25262e77d4c.json | 20 + .../3e9ea6a0-67fc-4b96-a38e-eb83e5cb25bd.json | 15 + .../42b586af-f715-430a-8eaa-489513178ae3.json | 10 + .../442814a1-6938-498f-a984-d0c2c323e6a4.json | 40 + .../4ad0e7eb-9359-46ee-91a5-5765b4dbec15.json | 30 + .../4f89764f-01c4-48c9-aaae-47f193ac7c6c.json | 35 + .../51b07dd0-8338-4393-8a08-9a958d2115ea.json | 10 + .../57b25d90-f16a-47ed-8acc-1205c076e73e.json | 165 +++ .../6535c412-26cb-40f0-b4a2-9b9fa1702577.json | 20 + .../66a35b11-6817-4e60-ad59-f13cbbeb8722.json | 20 + .../76401061-be28-4fb3-a263-4b6fca10497d.json | 15 + .../83cd9c2d-9a53-466b-91e1-c27f04849955.json | 20 + .../8ef4e234-d54a-4d43-a867-e9d2e137c744.json | 35 + .../9628d64c-cdf3-43c9-bfe3-6aa1c1fe9db2.json | 35 + .../9e090150-5d69-43c5-8187-800d2da3b4a1.json | 35 + .../9ef559b4-a179-4352-a8d1-a4f60266badc.json | 75 ++ .../a23616f6-ad8f-4cc1-8642-d64d19c48cde.json | 20 + .../a9ed2c0d-bced-4bd8-8e98-f89309111026.json | 25 + .../b1ebbf1c-ed23-4a46-a071-011a2df7c24c.json | 30 + .../b3d07cc5-ef2d-4fc4-aba2-4324001369f1.json | 10 + .../ba444083-0879-4ed5-bd02-c751b6e84203.json | 20 + .../bc29c96d-2474-4e87-90c7-ba3a25757fee.json | 15 + .../bceed44f-7aac-4b5f-a653-e8689e5d160c.json | 10 + .../c275354a-cb53-400e-9eeb-4b68c825d129.json | 15 + .../c5e61b9e-7b0c-4187-845f-ad264dfef3ba.json | 20 + .../d4d79625-27f5-4744-89d1-7db44ae551cb.json | 140 ++ .../d5de7461-a64a-4f34-93a3-2d622b4cae73.json | 10 + .../d632b16c-9b55-42f4-b40a-ba725f9e6b7d.json | 35 + .../d67c0757-c845-401d-986c-330d77c79d0f.json | 25 + .../dfc74651-7417-43a9-b64d-74c1b2eb667f.json | 15 + .../dff07df9-81f0-4854-b618-a4bbd8d283bd.json | 10 + .../f03e6678-1954-4890-bdb6-f984291581f8.json | 35 + .../f7b1b6c4-e340-42de-8dd2-3744278951ef.json | 15 + docs/github-markdown.css | 1197 +++++++++++++++++ docs/index.html | 838 ++++++++++++ docs/metadata.json | 195 +++ docs/src/README.md | 150 +++ docs/src/examples/README.md | 7 + docs/src/examples/doc_merge/README.md | 38 + docs/src/examples/doc_merge/doc_merge.py | 767 +++++++++++ docs/src/examples/doc_merge/plot.py | 170 +++ .../examples/doc_merge/pure_documents.json | 52 + docs/src/examples/keyword_counting/README.md | 45 + .../keyword_counting/dataset_gen_countries.py | 535 ++++++++ docs/src/examples/keyword_counting/plot.py | 167 +++ docs/src/examples/set_intersection/README.md | 52 + .../dataset_gen_intersection.py | 92 ++ docs/src/examples/set_intersection/plot.py | 184 +++ docs/src/examples/set_intersection/utils.py | 99 ++ docs/src/examples/sorting/README.md | 46 + docs/src/examples/sorting/plot.py | 186 +++ docs/src/examples/sorting/utils.py | 78 ++ .../graph_of_thoughts/controller/README.md | 28 + .../graph_of_thoughts/controller/__init__.py | 1 + .../controller/controller.py | 152 +++ .../language_models/README.md | 95 ++ .../language_models/__init__.py | 3 + .../abstract_language_model.py | 92 ++ .../language_models/chatgpt.py | 157 +++ .../language_models/config_template.json | 49 + .../language_models/llamachat_hf.py | 119 ++ .../graph_of_thoughts/operations/README.md | 70 + .../graph_of_thoughts/operations/__init__.py | 14 + .../operations/graph_of_operations.py | 69 + .../operations/operations.py | 900 +++++++++++++ .../graph_of_thoughts/operations/thought.py | 117 ++ docs/src/graph_of_thoughts/parser/__init__.py | 1 + docs/src/graph_of_thoughts/parser/parser.py | 90 ++ .../graph_of_thoughts/prompter/__init__.py | 1 + .../graph_of_thoughts/prompter/prompter.py | 86 ++ docs/src/paper/README.md | 5 + docs/src/paper/plots.py | 337 +++++ docs/src/pyproject.toml | 39 + docs/tree.html | 180 +++ examples/doc_merge/.fdignore | 1 + examples/keyword_counting/.fdignore | 2 + examples/set_intersection/.fdignore | 2 + examples/sorting/.fdignore | 2 + 93 files changed, 12942 insertions(+) create mode 100644 .fdignore create mode 100644 docs/.gitignore create mode 100644 docs/codeview.html create mode 100644 docs/data/0.json create mode 100644 docs/data/1.json create mode 100644 docs/data/2.json create mode 100644 docs/data/3.json create mode 100644 docs/data/4.json create mode 100644 docs/data/5.json create mode 100644 docs/data/6.json create mode 100644 docs/doc/0cea9af3-6d7e-464b-9c98-67d890a84704.json create mode 100644 docs/doc/210f2677-be0b-44ed-911c-75384dfccc5e.json create mode 100644 docs/doc/211f5959-9ce5-4512-bcce-0c8afe4961a9.json create mode 100644 docs/doc/33c839ec-9562-4993-8007-5179237aa1f5.json create mode 100644 docs/doc/3a316a1b-8eab-437e-a0af-d0bb26acbfbb.json create mode 100644 docs/doc/3b0530b9-09df-4bf9-92e1-a25262e77d4c.json create mode 100644 docs/doc/3e9ea6a0-67fc-4b96-a38e-eb83e5cb25bd.json create mode 100644 docs/doc/42b586af-f715-430a-8eaa-489513178ae3.json create mode 100644 docs/doc/442814a1-6938-498f-a984-d0c2c323e6a4.json create mode 100644 docs/doc/4ad0e7eb-9359-46ee-91a5-5765b4dbec15.json create mode 100644 docs/doc/4f89764f-01c4-48c9-aaae-47f193ac7c6c.json create mode 100644 docs/doc/51b07dd0-8338-4393-8a08-9a958d2115ea.json create mode 100644 docs/doc/57b25d90-f16a-47ed-8acc-1205c076e73e.json create mode 100644 docs/doc/6535c412-26cb-40f0-b4a2-9b9fa1702577.json create mode 100644 docs/doc/66a35b11-6817-4e60-ad59-f13cbbeb8722.json create mode 100644 docs/doc/76401061-be28-4fb3-a263-4b6fca10497d.json create mode 100644 docs/doc/83cd9c2d-9a53-466b-91e1-c27f04849955.json create mode 100644 docs/doc/8ef4e234-d54a-4d43-a867-e9d2e137c744.json create mode 100644 docs/doc/9628d64c-cdf3-43c9-bfe3-6aa1c1fe9db2.json create mode 100644 docs/doc/9e090150-5d69-43c5-8187-800d2da3b4a1.json create mode 100644 docs/doc/9ef559b4-a179-4352-a8d1-a4f60266badc.json create mode 100644 docs/doc/a23616f6-ad8f-4cc1-8642-d64d19c48cde.json create mode 100644 docs/doc/a9ed2c0d-bced-4bd8-8e98-f89309111026.json create mode 100644 docs/doc/b1ebbf1c-ed23-4a46-a071-011a2df7c24c.json create mode 100644 docs/doc/b3d07cc5-ef2d-4fc4-aba2-4324001369f1.json create mode 100644 docs/doc/ba444083-0879-4ed5-bd02-c751b6e84203.json create mode 100644 docs/doc/bc29c96d-2474-4e87-90c7-ba3a25757fee.json create mode 100644 docs/doc/bceed44f-7aac-4b5f-a653-e8689e5d160c.json create mode 100644 docs/doc/c275354a-cb53-400e-9eeb-4b68c825d129.json create mode 100644 docs/doc/c5e61b9e-7b0c-4187-845f-ad264dfef3ba.json create mode 100644 docs/doc/d4d79625-27f5-4744-89d1-7db44ae551cb.json create mode 100644 docs/doc/d5de7461-a64a-4f34-93a3-2d622b4cae73.json create mode 100644 docs/doc/d632b16c-9b55-42f4-b40a-ba725f9e6b7d.json create mode 100644 docs/doc/d67c0757-c845-401d-986c-330d77c79d0f.json create mode 100644 docs/doc/dfc74651-7417-43a9-b64d-74c1b2eb667f.json create mode 100644 docs/doc/dff07df9-81f0-4854-b618-a4bbd8d283bd.json create mode 100644 docs/doc/f03e6678-1954-4890-bdb6-f984291581f8.json create mode 100644 docs/doc/f7b1b6c4-e340-42de-8dd2-3744278951ef.json create mode 100644 docs/github-markdown.css create mode 100644 docs/index.html create mode 100644 docs/metadata.json create mode 100644 docs/src/README.md create mode 100644 docs/src/examples/README.md create mode 100644 docs/src/examples/doc_merge/README.md create mode 100644 docs/src/examples/doc_merge/doc_merge.py create mode 100644 docs/src/examples/doc_merge/plot.py create mode 100644 docs/src/examples/doc_merge/pure_documents.json create mode 100644 docs/src/examples/keyword_counting/README.md create mode 100644 docs/src/examples/keyword_counting/dataset_gen_countries.py create mode 100644 docs/src/examples/keyword_counting/plot.py create mode 100644 docs/src/examples/set_intersection/README.md create mode 100644 docs/src/examples/set_intersection/dataset_gen_intersection.py create mode 100644 docs/src/examples/set_intersection/plot.py create mode 100644 docs/src/examples/set_intersection/utils.py create mode 100644 docs/src/examples/sorting/README.md create mode 100644 docs/src/examples/sorting/plot.py create mode 100644 docs/src/examples/sorting/utils.py create mode 100644 docs/src/graph_of_thoughts/controller/README.md create mode 100644 docs/src/graph_of_thoughts/controller/__init__.py create mode 100644 docs/src/graph_of_thoughts/controller/controller.py create mode 100644 docs/src/graph_of_thoughts/language_models/README.md create mode 100644 docs/src/graph_of_thoughts/language_models/__init__.py create mode 100644 docs/src/graph_of_thoughts/language_models/abstract_language_model.py create mode 100644 docs/src/graph_of_thoughts/language_models/chatgpt.py create mode 100644 docs/src/graph_of_thoughts/language_models/config_template.json create mode 100644 docs/src/graph_of_thoughts/language_models/llamachat_hf.py create mode 100644 docs/src/graph_of_thoughts/operations/README.md create mode 100644 docs/src/graph_of_thoughts/operations/__init__.py create mode 100644 docs/src/graph_of_thoughts/operations/graph_of_operations.py create mode 100644 docs/src/graph_of_thoughts/operations/operations.py create mode 100644 docs/src/graph_of_thoughts/operations/thought.py create mode 100644 docs/src/graph_of_thoughts/parser/__init__.py create mode 100644 docs/src/graph_of_thoughts/parser/parser.py create mode 100644 docs/src/graph_of_thoughts/prompter/__init__.py create mode 100644 docs/src/graph_of_thoughts/prompter/prompter.py create mode 100644 docs/src/paper/README.md create mode 100644 docs/src/paper/plots.py create mode 100644 docs/src/pyproject.toml create mode 100644 docs/tree.html create mode 100644 examples/doc_merge/.fdignore create mode 100644 examples/keyword_counting/.fdignore create mode 100644 examples/set_intersection/.fdignore create mode 100644 examples/sorting/.fdignore diff --git a/.fdignore b/.fdignore new file mode 100644 index 0000000..1c65e4b --- /dev/null +++ b/.fdignore @@ -0,0 +1,4 @@ +.* +*.{bz2,svg,csv} +docs +LICENSE \ No newline at end of file diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..07f4fe3 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,5 @@ +!.gitignore +!* +!*/* +cache_db.json +cache_tree.json diff --git a/docs/codeview.html b/docs/codeview.html new file mode 100644 index 0000000..7f30366 --- /dev/null +++ b/docs/codeview.html @@ -0,0 +1,594 @@ + + + + + + + + + Code View + + + + + + + + + + + + + + + + + + + +
+

Code Preview

+
+
+ +
+ + + \ No newline at end of file diff --git a/docs/data/0.json b/docs/data/0.json new file mode 100644 index 0000000..badd127 --- /dev/null +++ b/docs/data/0.json @@ -0,0 +1,546 @@ +{ + "0": { + "file_id": 0, + "content": "/README.md", + "type": "filepath" + }, + "1": { + "file_id": 0, + "content": "The GoT framework is a Python 3.8+ language model that solves sorting problems, outputs JSON graphs, and provides detailed instructions for usage with real-world examples in the examples directory. Users are encouraged to star the repository, ask questions, provide feedback, and cite the reference when using it in other projects.", + "type": "summary" + }, + "2": { + "file_id": 0, + "content": "# Graph of Thoughts (GoT)\n

\n \n

\nThis is the official implementation of [Graph of Thoughts: Solving Elaborate Problems with Large Language Models](https://arxiv.org/pdf/2308.09687.pdf). \nThis framework gives you the ability to solve complex problems by modeling them as a Graph of Operations (GoO), which is automatically executed with a Large Language Model (LLM) as the engine. \nThis framework is designed to be flexible and extensible, allowing you to not only solve problems using the new GoT approach, but also to implement GoOs resembling previous approaches like CoT or ToT.\n## Setup Guide\nIn order to use this framework, you need to have a working installation of Python 3.8 or newer.\n### Installing GoT\nBefore running either of the following two installation methods, make sure to activate your Python environment (if any) beforehand. \nIf you are a user and you just want to use `graph_of_thoughts`, you can install it directly from PyPI:\n```bash\npip install graph_of_thoughts", + "type": "code", + "location": "/README.md:1-20" + }, + "3": { + "file_id": 0, + "content": "Installation instructions for the Graph of Thoughts (GoT) framework. Requires Python 3.8 or newer and can be installed directly from PyPI using pip.", + "type": "comment" + }, + "4": { + "file_id": 0, + "content": "```\nIf you are a developer and you want to modify the code, you can install it in editable mode from source:\n```bash\ngit clone https://github.com/spcl/graph-of-thoughts.git\ncd graph-of-thoughts\npip install -e .\n```\n### Configuring the LLM\nIn order to use the framework, you need to have access to an LLM.\nPlease follow the instructions in the [Controller README](graph_of_thoughts/controller/README.md) to configure the LLM of your choice.\n## Quick Start\nThe following code snippet shows how to use the framework to solve the sorting problem for a list of 32 numbers using a CoT-like approach. \nMake sure you have followed the [Setup Guide](#setup-guide) before running the code.\n```python\nfrom examples.sorting.sorting_032 import SortingPrompter, SortingParser, utils\nfrom graph_of_thoughts import controller, language_models, operations\n# Problem input\nto_be_sorted = \"[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]\"\n# Create the Graph of Operations\ngop = operations.GraphOfOperations()", + "type": "code", + "location": "/README.md:21-48" + }, + "5": { + "file_id": 0, + "content": "This code provides instructions for installing and configuring an LLM (Language Model) to use the Graph of Thoughts framework. The code also shows a quick start example for solving the sorting problem with a list of 32 numbers using a CoT-like approach, assuming the setup guide has been followed.", + "type": "comment" + }, + "6": { + "file_id": 0, + "content": "gop.append_operation(operations.Generate())\ngop.append_operation(operations.Score(scoring_function=utils.num_errors))\ngop.append_operation(operations.GroundTruth(utils.test_sorting))\n# Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)\nlm = language_models.ChatGPT(\"config.json\", model_name=\"chatgpt\")\n# Create the Controller\nctrl = controller.Controller(\n lm, \n gop, \n SortingPrompter(), \n SortingParser(),\n # The following dictionary is used to configure the initial thought state\n {\n \"original\": to_be_sorted,\n \"current\": \"\",\n \"method\": \"cot\"\n }\n)\n# Run the Controller and generate the output graph\nctrl.run()\nctrl.output_graph(\"output_cot.json\")\n```\nTo run the more sophisticated GoT approach, you can use the following code snippet.\n```python\nfrom examples.sorting.sorting_032 import SortingPrompter, SortingParser, got, utils\nfrom graph_of_thoughts import controller, language_models, operations\n# Problem input\nto_be_sorted = \"[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]\"", + "type": "code", + "location": "/README.md:49-83" + }, + "7": { + "file_id": 0, + "content": "This code generates a graph of thoughts using the GoT approach. It appends operations to generate, score (using num_errors function), and ground truth (using test_sorting function). It then initializes a language model with an API key from config.json and creates a controller with given parameters. Finally, it runs the controller and outputs the graph in JSON format. The example problem input is provided for usage.", + "type": "comment" + }, + "8": { + "file_id": 0, + "content": "# Retrieve the Graph of Operations\ngop = got()\n# Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)\nlm = language_models.ChatGPT(\"config.json\", model_name=\"chatgpt\")\n# Create the Controller\nctrl = controller.Controller(\n lm, \n gop, \n SortingPrompter(), \n SortingParser(),\n # The following dictionary is used to configure the initial thought state\n {\n \"original\": to_be_sorted,\n \"current\": \"\",\n \"phase\": 0,\n \"method\": \"got\"\n }\n)\n# Run the Controller and generate the output graph\nctrl.run()\nctrl.output_graph(\"output_got.json\")\n```\nYou can compare the two results by inspecting the output graphs `output_cot.json` and `output_got.json`. \nThe final thought states' scores indicate the number of errors in the sorted list.\n## Documentation\nThe paper gives a high-level overview of the framework and its components. \nIn order to understand the framework in more detail, you can read the documentation of the individual modules. \nEspecially the [Controller](grap", + "type": "code", + "location": "/README.md:85-116" + }, + "9": { + "file_id": 0, + "content": "This code retrieves the Graph of Operations (gop), configures a language model (lm) using config.json, creates a Controller object (ctrl) with the necessary components, and runs the controller to generate output graphs \"output_cot.json\" and \"output_got.json\". The final thought states' scores in the output graphs indicate the number of errors in the sorted list. Read the documentation for more detailed information on the framework's individual modules.", + "type": "comment" + }, + "10": { + "file_id": 0, + "content": "h_of_thoughts/controller/README.md) and [Operations](graph_of_thoughts/operations/README.md) modules are important for understanding how to make the most out of the framework. \nWe took extra care to fully document the code, so that you can easily understand how it works and how to extend it.\n## Examples\nThe [examples](examples) directory contains several examples of problems that can be solved using the framework, including the ones presented in the paper. \nIt is a great starting point for learning how to use the framework to solve real problems. \nEach example contains a `README.md` file with instructions on how to run it and play with it. The code is fully documented and should be easy to follow.\nYou can also run the examples straight from the main directory. Note that the results will be stored in the respective examples sub-directory.\nTry for instance:\n```bash\npython -m examples.sorting.sorting_032\npython -m examples.keyword_counting.keyword_counting\n```\n## Paper Results\nYou can run the experiments from the paper by following the instructions in the [examples](examples) directory. ", + "type": "code", + "location": "/README.md:116-133" + }, + "11": { + "file_id": 0, + "content": "This code provides instructions on understanding and utilizing the framework, mentioning the importance of documentation for easy comprehension. It highlights the examples directory containing real-world problem solutions as a learning resource, with each example having a detailed README file. Additionally, it explains how to run the examples directly from the main directory and mentions that results will be stored in respective sub-directories. Lastly, it informs about running experiments from the paper through the examples directory.", + "type": "comment" + }, + "12": { + "file_id": 0, + "content": "However, if you just want to inspect and replot the results, you can use the [paper](paper) directory.\n## Citations\nIf you find this repository valuable, please give it a star! \nGot any questions or feedback? Feel free to reach out to [nils.blach@inf.ethz.ch](mailto:nils.blach@inf.ethz.ch) or open an issue. \nUsing this in your work? Please reference us using the provided citation:\n```bibtex\n@misc{besta2023got,\n title = {{Graph of Thoughts: Solving Elaborate Problems with Large Language Models}},\n author = {Besta, Maciej and Blach, Nils and Kubicek, Ales and Gerstenberger, Robert and Gianinazzi, Lukas and Gajda, Joanna and Lehmann, Tomasz and Podstawski, Micha{\\l} and Niewiadomski, Hubert and Nyczyk, Piotr and Hoefler, Torsten},\n year = 2023,\n eprinttype = {arXiv},\n eprint = {2308.09687}\n}\n```", + "type": "code", + "location": "/README.md:134-150" + }, + "13": { + "file_id": 0, + "content": "The code provides instructions to access the project's results, suggests using the 'paper' directory for inspection and replotting, encourages starring the repository if valuable, offers contact information for questions or feedback, and recommends citing the provided reference when using the work in other projects.", + "type": "comment" + }, + "14": { + "file_id": 1, + "content": "/pyproject.toml", + "type": "filepath" + }, + "15": { + "file_id": 1, + "content": "The code uses Hatchling to define project settings for the Python package \"graph_of_thoughts,\" including package details, dependencies, and URLs. It also includes a TOML configuration file setting up an entry point for executable scripts under the project's namespace within the \"scripts\" section of the \"project\" block.", + "type": "summary" + }, + "16": { + "file_id": 1, + "content": "[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n[project]\nname = \"graph_of_thoughts\"\nversion = \"0.0.3\"\nauthors = [\n { name=\"Maciej Besta\", email=\"maciej.besta@inf.ethz.ch\" },\n { name=\"Nils Blach\", email=\"nils.blach@inf.ethz.ch\" },\n { name=\"Ales Kubicek\", email=\"akubicek@student.ethz.ch\" },\n { name=\"Robert Gerstenberger\", email=\"gerstenberger.robert@gmail.com\" },\n]\ndescription = \"Python package for Graph of Thoughts that enables solving elaborate problems with Large Language Models\"\nreadme = \"README.md\"\nlicense = {file = \"LICENSE\"}\nrequires-python = \">=3.8\"\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"Operating System :: OS Independent\",\n]\ndependencies = [\n \"backoff>=2.2.1,<3.0.0\",\n \"openai>=1.0.0,<2.0.0\",\n \"matplotlib>=3.7.1,<4.0.0\",\n \"numpy>=1.24.3,<2.0.0\",\n \"pandas>=2.0.3,<3.0.0\",\n \"sympy>=1.12,<2.0\",\n \"torch>=2.0.1,<3.0.0\",\n \"transformers>=4.31.0,<5.0.0\",\n \"accelerate>=0.21.0,<1.0.0\",\n \"bitsandbytes>=0.41.0,<1.0.0\",\n \"scipy>=1.10.1,<2.0.0\",\n]\n[project.urls]\nHomepage = \"https://github.com/spcl/graph-of-thoughts\"", + "type": "code", + "location": "/pyproject.toml:1-37" + }, + "17": { + "file_id": 1, + "content": "This code defines the project settings for a Python package called \"graph_of_thoughts\" using Hatchling as the build system. It specifies the package name, version, authors, description, dependencies, and URLs for further information.", + "type": "comment" + }, + "18": { + "file_id": 1, + "content": "[project.scripts]", + "type": "code", + "location": "/pyproject.toml:39-39" + }, + "19": { + "file_id": 1, + "content": "The code snippet is a part of a TOML configuration file, specifically defining the \"scripts\" section within the \"project\" block. It sets up an entry point for executable scripts under the project's namespace.", + "type": "comment" + }, + "20": { + "file_id": 2, + "content": "/examples/README.md", + "type": "filepath" + }, + "21": { + "file_id": 2, + "content": "This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example, with prompt files available to test prompts manually in a console. Individual example directories provide more information on specific examples.", + "type": "summary" + }, + "22": { + "file_id": 2, + "content": "# Examples\nThis directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example.\nWe further include prompt files for each example that can be used to test prompts manually in a console.\nPlease refer to the individual example directories for more information on the specific example.", + "type": "code", + "location": "/examples/README.md:1-7" + }, + "23": { + "file_id": 2, + "content": "This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example, with prompt files available to test prompts manually in a console. Individual example directories provide more information on specific examples.", + "type": "comment" + }, + "24": { + "file_id": 3, + "content": "/examples/doc_merge/README.md", + "type": "filepath" + }, + "25": { + "file_id": 3, + "content": "The code showcases a document merging approach using various methods like IO, CoT, ToT, and GoT. It takes 50 sample documents from `documents.csv`, applies chosen techniques, and outputs results in an LLM-named directory with debug logs and separate JSON files for each approach.", + "type": "summary" + }, + "26": { + "file_id": 3, + "content": "# Document Merging\nThe use case in this directory generates new Non-Disclosure Agreement (NDA) based on several input ones that partially overlap in terms of their contents. \nWe provide implementations of five different approaches:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT)\n- Graph of Thoughts (GoT):\n - GoT: aggregation of fully merged NDAs\n - GoT2: aggregation of partially merged NDAs\n## Data\nWe provide an input file with 50 samples: `documents.csv`.\n## Execution\nThe file to execute the use case is called\n`doc_merge.py`. In the main body, one can\nselect the specific samples to be run (variable samples) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are", + "type": "code", + "location": "/examples/doc_merge/README.md:1-28" + }, + "27": { + "file_id": 3, + "content": "This code demonstrates a document merging use case using different approaches, including IO, Chain-of-Thought (CoT), Tree of Thought (ToT), and Graph of Thoughts (GoT). It uses 50 sample documents from `documents.csv`, executes the selected samples with chosen approaches, and saves results in a directory named by the LLM, approaches, day, and start time.", + "type": "comment" + }, + "28": { + "file_id": 3, + "content": "created. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 158 of `plot.py` and run `python3\nplot.py` to plot your data.", + "type": "code", + "location": "/examples/doc_merge/README.md:29-38" + }, + "29": { + "file_id": 3, + "content": "This code generates a configuration file named `config.json` that contains input data, selected approaches, LLM name, and budget information. Additionally, it logs prompts, responses, and debug data in `log.log`. Each approach directory holds separate JSON files for every sample with the Graph Reasoning State (GRS) included. To plot the data, change the results directory at line 158 of `plot.py` and run `python3 plot.py`.", + "type": "comment" + }, + "30": { + "file_id": 4, + "content": "/examples/doc_merge/doc_merge.py", + "type": "filepath" + }, + "31": { + "file_id": 4, + "content": "The code develops an efficient NDA merging class with language model prompts and redundancy handling, generating a graph for document merge and language model inference within budget limits. It utilizes input data from \"documents.csv\", manages exceptions, and scores output based on coverage.", + "type": "summary" + }, + "32": { + "file_id": 4, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport os\nimport re\nimport logging\nimport datetime\nimport json\nimport csv\nfrom statistics import fmean\nfrom typing import Dict, List, Callable, Set, Union\nfrom graph_of_thoughts import controller, language_models, operations, prompter, parser\nclass DocMergePrompter(prompter.Prompter):\n \"\"\"\n DocMergePrompter provides the generation of prompts specific to the document\n merge example for the language models.\n Inherits from the Prompter class and implements its abstract methods.\n \"\"\"\n merge_doc_prompt_start = \"\"\"Merge the following {num} NDA documents - into a single NDA, maximizing retained information and minimizing redundancy. Output only the created NDA between the tags and , without any additional text.\nHere are NDAs - \n\"\"\"\n merge_doc_prompt_block = \"\"\"", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:1-31" + }, + "33": { + "file_id": 4, + "content": "This code defines a class DocMergePrompter that inherits from Prompter and provides prompts for merging NDA documents. It includes a merge_doc_prompt_start string for generating the prompt and a merge_doc_prompt_block string for displaying NDAs to be merged. The goal is to create a single NDA by maximizing information retention and minimizing redundancy, with the output between and .", + "type": "comment" + }, + "34": { + "file_id": 4, + "content": "\n{document}\n\n\"\"\"\n merge_doc_prompt_cot_start = \"\"\"Merge the following {num} NDA documents - into a single NDA, maximizing retained information and minimizing redundancy.\nYou can generate any intermediate thoughts and documents you want, but the final output should be the merged NDA, placed between the two tags and .\nFor instance you might want to follow this approach:\n1. Split each NDA into their logical subparts.\n2. Merge the subparts of the {num} NDAs.\n3. Combine the merged subparts into a single NDA.\n4. Place the merged NDA between the tags and .\nHere are NDAs - :\n\"\"\"\n improve_summary_prompt_start = \"\"\"The following NDA merges initial NDAs - .\nPlease improve the summary NDA by adding more information and removing redundancy. Output only the improved NDA, placed between the two tags and , without any additional text.\nHere are NDAs - :\n\"\"\"\n improve_summary_prompt_block = \"\"\"", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:32-54" + }, + "35": { + "file_id": 4, + "content": "The code defines two prompts for merging and improving NDA documents. The first prompt instructs to merge the provided NDAs into a single one, preserving information and minimizing redundancy. It also provides an example approach. The second prompt asks to improve the merged document by adding more information and removing redundancies, with output placed between specific tags. Both prompts include the input NDAs as \"Doc1\" to \"Doc{num}\".", + "type": "comment" + }, + "36": { + "file_id": 4, + "content": "\n{document}\n\n\"\"\"\n improve_summary_prompt_end = \"\"\"\nHere is the summary NDA :\n\n{summary}\n\n\"\"\"\n score_prompt_base = \"\"\"The following NDA merges NDAs - .\nPlease score the merged NDA in terms of how much redundant information is contained, independent of the original NDAs, as well as how much information is retained from the original NDAs.\nA score of 10 for redundancy implies that absolutely no information is redundant, while a score of 0 implies that at least half of the information is redundant (so everything is at least mentioned twice).\nA score of 10 for retained information implies that all information from the original NDAs is retained, while a score of 0 implies that no information is retained.\nYou may provide reasoning for your scoring, but the final score for redundancy should be between the tags and , and the final score for retained information should be between the tags and , without any additional text within any of those tags.", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:55-71" + }, + "37": { + "file_id": 4, + "content": "This code contains various prompts for different tasks, such as improving summaries and scoring merged documents. The prompts are designed to assist in the task of merging NDAs while considering redundancy and retained information scores, with specific tags provided for clarity.", + "type": "comment" + }, + "38": { + "file_id": 4, + "content": "Here are NDAs - :\n\"\"\"\n score_prompt_block = \"\"\"\n\n{document}\n\n\"\"\"\n score_prompt_end = \"\"\"\nHere is the summary NDA :\n\n{summary}\n\n\"\"\"\n aggregate_full_prompt_base = \"\"\"The following NDAs - each merge the initial NDAs - .\nCombine the merged NDAs - into a new one, maximizing their advantages and overall information retention, while minimizing redundancy.\nOutput only the new NDA between the tags and , without any additional text. \nHere are the original NDAs - :\n\"\"\"\n aggregate_full_prompt_block1 = \"\"\"\n\n{document}\n\n\"\"\"\n aggregate_full_prompt_mid = \"\"\"\nHere are the summary NDAs - :\n\"\"\"\n aggregate_full_prompt_block2 = \"\"\"\n\n{summary}\n\n\"\"\"\n aggregate_sub_prompt_base = \"\"\"The following NDAs - are summaries of some other NDAs.\nCombine them into a new one, make sure to maximize their advantages and overall information retention, while minimizing redundancy.", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:73-112" + }, + "39": { + "file_id": 4, + "content": "This code appears to be part of a larger program that deals with merging and summarizing Non-Disclosure Agreements (NDAs). It uses string formatting to generate prompts for the user, asking them to provide NDAs in a specific format. The code snippet includes various placeholders (, ) for incorporating the user's provided information.", + "type": "comment" + }, + "40": { + "file_id": 4, + "content": "Output only the new NDA between the tags and , without any additional text.\nHere are NDAs - :\n\"\"\"\n aggregate_sub_prompt_generate = \"\"\"\nNDA :\n{nda}\n\n\"\"\"\n def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str:\n \"\"\"\n Generate an aggregation prompt for the language model.\n :param state_dicts: The thought states that should be aggregated.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The aggregation prompt.\n :rtype: str\n \"\"\"\n if len(state_dicts[0][\"parts\"]) > 0 and len(state_dicts[0][\"parts\"]) < len(\n state_dicts[0][\"documents\"]\n ):\n prompt = self.aggregate_sub_prompt_base.format(\n num_ndas=len(state_dicts),\n )\n for i, state_dict in enumerate(state_dicts):\n prompt += self.aggregate_sub_prompt_generate.format(\n nda=state_dict[\"current\"], num=i + 1", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:113-143" + }, + "41": { + "file_id": 4, + "content": "This code generates an aggregation prompt for a language model, using the provided state_dicts. It concatenates NDAs from each state_dict and formats them into a final prompt. The output is a string containing the merged NDAs between \"\" and \"\".", + "type": "comment" + }, + "42": { + "file_id": 4, + "content": " )\n return prompt\n else:\n prompt = self.aggregate_full_prompt_base.format(\n num_ndas=len(state_dicts[0][\"documents\"]),\n num_ndas_summary=len(state_dicts),\n )\n for i, document in enumerate(state_dicts[0][\"documents\"]):\n prompt += self.aggregate_full_prompt_block1.format(\n document=document, num=i + 1\n )\n prompt += self.aggregate_full_prompt_mid.format(\n num_ndas_summary=len(state_dicts),\n )\n for i, state_dict in enumerate(state_dicts):\n prompt += self.aggregate_full_prompt_block2.format(\n summary=state_dict[\"current\"], num=i + 1\n )\n return prompt\n def generate_prompt(\n self,\n num_branches: int,\n documents: List[str],\n method: str,\n parts: Set[str],\n current: str,\n **kwargs,\n ) -> str:\n \"\"\"\n Generate a generate prompt for the language model.", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:144-174" + }, + "43": { + "file_id": 4, + "content": "This code defines a class with methods for generating prompts. The `generate_prompt` method takes in parameters like number of branches, documents, and current state. It returns a prompt for the language model using string formatting based on input parameters.", + "type": "comment" + }, + "44": { + "file_id": 4, + "content": " :param num_branches: The number of responses the prompt should ask the LM to generate.\n :type num_branches: int\n :param documents: The list of documents to be merged.\n :type documents: List[str]\n :param method: Method for which the generate prompt is generated.\n :type method: str\n :param parts: Indices of the already processed document parts.\n :type parts: Set[str]\n :param current: The intermediate solution.\n :type current: str\n :param kwargs: Additional keyword arguments.\n :return: The generate prompt.\n :rtype: str\n :raise AssertionError: If method is not implemented yet.\n \"\"\"\n prompt = \"\"\n if method.startswith(\"io\") or method.startswith(\"cot\"):\n if method.startswith(\"io\"):\n prompt += self.merge_doc_prompt_start.format(num=len(documents))\n else:\n prompt += self.merge_doc_prompt_cot_start.format(num=len(documents))\n for i, document in enumerate(documents):", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:176-198" + }, + "45": { + "file_id": 4, + "content": "This function takes in the number of responses, a list of documents to merge, method for generating the prompt, indices of already processed document parts, an intermediate solution, and additional keyword arguments. It returns the generate prompt used for merging the documents. If the method is not implemented yet, it raises AssertionError.", + "type": "comment" + }, + "46": { + "file_id": 4, + "content": " prompt += self.merge_doc_prompt_block.format(\n document=document, num=i + 1\n )\n return prompt\n elif method.startswith(\"tot\"):\n if current is None or current == \"\":\n prompt += self.merge_doc_prompt_start.format(num=len(documents))\n for i, document in enumerate(documents):\n prompt += self.merge_doc_prompt_block.format(\n document=document, num=i + 1\n )\n return prompt\n else:\n prompt += self.improve_summary_prompt_start.format(\n num=len(documents),\n )\n for i, document in enumerate(documents):\n prompt += self.improve_summary_prompt_block.format(\n document=document, num=i + 1\n )\n prompt += self.improve_summary_prompt_end.format(summary=current)\n return prompt\n elif method.startswith(\"got\"):", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:199-221" + }, + "47": { + "file_id": 4, + "content": "The code provides a prompt for merging multiple documents or improving a given summary based on the specified method. It dynamically generates the prompt by concatenating predefined blocks of text with placeholders for document numbers and the original summary. If no current summary is provided, it creates a prompt to merge documents, otherwise, it improves the given summary using those documents.", + "type": "comment" + }, + "48": { + "file_id": 4, + "content": " parts = (\n sorted(list(parts)) if len(parts) > 0 else list(range(len(documents)))\n )\n if current is None or current == \"\":\n prompt += self.merge_doc_prompt_start.format(num=len(parts))\n for i, part in enumerate(sorted(list(parts))):\n prompt += self.merge_doc_prompt_block.format(\n document=documents[part], num=i + 1\n )\n return prompt\n else:\n prompt += self.improve_summary_prompt_start.format(\n num=len(parts),\n )\n for i, part in enumerate(sorted(list(parts))):\n prompt += self.improve_summary_prompt_block.format(\n document=documents[part], num=i + 1\n )\n prompt += self.improve_summary_prompt_end.format(summary=current)\n return prompt\n else:\n assert False, \"Not implemented yet.\"\n def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str:", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:222-245" + }, + "49": { + "file_id": 4, + "content": "The code checks if the current summary is provided. If not, it generates a prompt for merging documents into one coherent summary. If the current summary is provided, it generates a prompt for improving an existing summary by incorporating information from multiple documents. The code also sorts the parts of the document and formats them in a specific way for the prompts.", + "type": "comment" + }, + "50": { + "file_id": 4, + "content": " \"\"\"\n Generate a score prompt for the language model.\n :param state_dicts: The thought states that should be scored,\n if more than one, they should be scored together.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The score prompt.\n :rtype: str\n :raise AssertionError: If more than one thought state is supplied.\n \"\"\"\n if len(state_dicts) > 1:\n assert False, \"Not implemented yet.\"\n else:\n # perform individual scoring\n parts = (\n [\n state_dicts[0][\"documents\"][part]\n for part in sorted(list(state_dicts[0][\"parts\"]))\n ]\n if len(state_dicts[0][\"parts\"]) > 0\n else state_dicts[0][\"documents\"]\n )\n prompt = self.score_prompt_base.format(\n num=len(parts),\n )\n for i, part in enumerate(parts):\n prompt += self.score_prompt_block.format(document=part, num=i + 1)", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:246-274" + }, + "51": { + "file_id": 4, + "content": "This function generates a score prompt for the language model using a single thought state provided as an argument. It checks if only one thought state is supplied and handles the case where more than one is given. The prompt is created by formatting the base and block prompts with the number of documents.", + "type": "comment" + }, + "52": { + "file_id": 4, + "content": " prompt += self.score_prompt_end.format(\n summary=state_dicts[0][\"current\"],\n )\n return prompt\n def improve_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate an improve prompt for the language model.\n :param kwargs: Additional keyword arguments.\n :return: The improve prompt.\n :rtype: str\n \"\"\"\n pass\n def validation_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate a validation prompt for the language model.\n :param kwargs: Additional keyword arguments.\n :return: The validation prompt.\n :rtype: str\n \"\"\"\n pass\nclass DocMergeParser(parser.Parser):\n \"\"\"\n DocMergeParser provides the parsing of language model reponses specific to the\n document merge example.\n Inherits from the Parser class and implements its abstract methods.\n \"\"\"\n def __init__(self) -> None:\n \"\"\"\n Inits the response cache.\n \"\"\"\n self.cache = {}\n def strip_answer_helper(self, text: str, tag: str = \"\") -> str:", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:275-315" + }, + "53": { + "file_id": 4, + "content": "This code defines a class DocMergeParser that extends the Parser class and provides specific functionality for parsing language model responses in the document merge example. It includes methods to generate improve prompt, validation prompt, and handles answer stripping with optional tags. The response cache is initialized in the constructor.", + "type": "comment" + }, + "54": { + "file_id": 4, + "content": " \"\"\"\n Helper function to remove tags from a text.\n :param text: The input text.\n :type text: str\n :param tag: The tag to be stripped. Defaults to \"\".\n :type tag: str\n :return: The stripped text.\n :rtype: str\n \"\"\"\n text = text.strip()\n if \"Output:\" in text:\n text = text[text.index(\"Output:\") + len(\"Output:\") :].strip()\n if tag != \"\":\n start = text.rfind(f\"<{tag}>\")\n end = text.rfind(f\"\")\n if start != -1 and end != -1:\n text = text[start + len(f\"<{tag}>\") : end].strip()\n elif start != -1:\n logging.warning(\n f\"Only found the start tag <{tag}> in answer: {text}. Returning everything after the tag.\"\n )\n text = text[start + len(f\"<{tag}>\") :].strip()\n elif end != -1:\n logging.warning(\n f\"Only found the end tag in answer: {text}. Returning everything before the tag.\"", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:316-342" + }, + "55": { + "file_id": 4, + "content": "This function removes specified tags from a text. It first strips whitespace and checks if \"Output:\" is in the text. Then, it searches for start and end tags to remove the enclosed content while handling cases of only one tag found. If no matching tags are found, it logs a warning and returns everything after or before the found tag.", + "type": "comment" + }, + "56": { + "file_id": 4, + "content": " )\n text = text[:end].strip()\n else:\n logging.warning(\n f\"Could not find any tag {tag} in answer: {text}. Returning the full answer.\"\n )\n return text\n def parse_aggregation_answer(\n self, states: List[Dict], texts: List[str]\n ) -> Union[Dict, List[Dict]]:\n \"\"\"\n Parse the response from the language model for an aggregation prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the respones from the language model.\n :rtype: Union[Dict, List[Dict]]\n \"\"\"\n new_states = []\n for text in texts:\n if len(states[0][\"parts\"]) < len(states[0][\"documents\"]):\n # subpart aggregation\n text = self.strip_answer_helper(text, \"Merged\")", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:343-369" + }, + "57": { + "file_id": 4, + "content": "The code is parsing the response from a language model for an aggregation prompt. It checks if there are enough thought states and performs subpart aggregation by stripping the answer to a single text using a helper function.", + "type": "comment" + }, + "58": { + "file_id": 4, + "content": " new_state = states[0].copy()\n new_state[\"current\"] = text\n new_state[\"parts\"] = set()\n for state in states:\n new_state[\"parts\"] = new_state[\"parts\"] | state[\"parts\"]\n new_states.append(new_state)\n else:\n # full NDA aggregation\n text = self.strip_answer_helper(text, \"Merged\")\n new_state = states[0].copy()\n new_state[\"current\"] = text\n new_states.append(new_state)\n return new_states\n def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:\n \"\"\"\n Parse the response from the language model for a generate prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the respones from the language model.", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:370-393" + }, + "59": { + "file_id": 4, + "content": "The code appears to be a part of a larger function that generates new thought states by aggregating inputs from multiple sources. It seems to handle both partial and full non-disclosure agreement (NDA) cases, stripping the answer text and creating new states accordingly. The `parse_generate_answer` function processes response from the language model for generate prompts and returns new thought states after parsing the responses.", + "type": "comment" + }, + "60": { + "file_id": 4, + "content": " :rtype: List[Dict]\n \"\"\"\n new_states = []\n for text in texts:\n text = self.strip_answer_helper(text, \"Merged\")\n new_state = state.copy()\n new_state[\"current\"] = text\n new_states.append(new_state)\n return new_states\n def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]:\n \"\"\"\n Parse the response from the language model for a score prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The scores for the thought states.\n :rtype: List[float]\n :raise AssertionError: If the number of thought states is not one.\n \"\"\"\n assert len(states) == 1, \"Only one state is allowed for scoring.\"\n if len(states) == 1:\n # individual scoring\n redundancy_scores = []\n retain_scores = []", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:394-420" + }, + "61": { + "file_id": 4, + "content": "The function `get_new_states()` takes a list of texts and returns a list of dictionaries, where each dictionary represents a thought state with the current text as its value.\n\nThe function `parse_score_answer()` takes a list of thought states and responses from the language model, asserts that only one thought state is allowed for scoring, and then initializes lists for redundancy and retain scores.", + "type": "comment" + }, + "62": { + "file_id": 4, + "content": " for text in texts:\n answer = self.strip_answer_helper(text, \"Redundancy\")\n res = re.findall(r\"\\d+\\.?\\d*\", answer)\n if len(res) == 1:\n redundancy_scores.append(float(res[0]))\n elif len(res) > 1:\n logging.warning(\n f\"Found multiple redundancy scores in answer: {text}. Returning the last one.\"\n )\n redundancy_scores.append(float(res[-1]))\n else:\n logging.warning(\n f\"Could not find any redundancy score in answer: {text}. Ignoring this answer.\"\n )\n answer = self.strip_answer_helper(text, \"Retained\")\n res = re.findall(r\"\\d+\\.?\\d*\", answer)\n if len(res) == 1:\n retain_scores.append(float(res[0]))\n elif len(res) > 1:\n logging.warning(\n f\"Found multiple retained scores in answer: {text}. Returning the last one.\"", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:421-441" + }, + "63": { + "file_id": 4, + "content": "This code iterates through text inputs, extracts redundancy and retained scores using regex, handles multiple score cases by logging a warning and selecting the last one or ignoring if no scores found.", + "type": "comment" + }, + "64": { + "file_id": 4, + "content": " )\n retain_scores.append(float(res[-1]))\n else:\n logging.warning(\n f\"Could not find any retained score in answer: {text}. Ignoring this answer.\"\n )\n if len(redundancy_scores) == 0 or len(retain_scores) == 0:\n logging.warning(\n f\"Could not find any valid score in any answer. Returning 0.0.\"\n )\n return [0.0]\n mean_redundancy = fmean(redundancy_scores)\n mean_retain = fmean(retain_scores)\n f1 = 2 * mean_redundancy * mean_retain / (mean_redundancy + mean_retain)\n return [f1]\n def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict:\n \"\"\"\n Parse the response from the language model for an improve prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:442-464" + }, + "65": { + "file_id": 4, + "content": "This code snippet is a part of a function responsible for parsing the responses from a language model for an 'improve' prompt. It calculates redundancy and retain scores for each answer, then returns the F1 score based on these scores. If no valid scores are found in any answer, it returns 0.0.", + "type": "comment" + }, + "66": { + "file_id": 4, + "content": " :type texts: List[str]\n :return: The new thought state after parsing the responses from the language model.\n :rtype: Dict\n \"\"\"\n pass\n def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool:\n \"\"\"\n Parse the response from the language model for a validation prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: Whether the thought state is valid or not.\n :rtype: bool\n \"\"\"\n pass\ndef io() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the IO method.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n operations_graph.append_operation(operations.Generate(1, 1))\n operations_graph.append_operation(operations.Score(3, False))\n return operations_graph", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:465-497" + }, + "67": { + "file_id": 4, + "content": "This code contains functions for thought state management, parsing responses from a language model, and generating the Graph of Operations for IO method. It uses Dict and List[str] as inputs and returns bool or Dict outputs. The code block defines three functions: update_thought_state, parse_validation_answer, and io. The last function generates the Graph of Operations by appending Generate and Score operations to an instance of operations.GraphOfOperations().", + "type": "comment" + }, + "68": { + "file_id": 4, + "content": "def cot() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the CoT method.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n operations_graph.append_operation(operations.Generate(1, 1))\n operations_graph.append_operation(operations.Score(3, False))\n return operations_graph\ndef tot() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the ToT method.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n branch_factor = 10\n operations_graph.append_operation(operations.Generate(1, branch_factor))\n operations_graph.append_operation(operations.Score(3, False))\n keep_best_1 = operations.KeepBestN(1, True)\n operations_graph.append_operation(keep_best_1)\n for _ in range(2):\n operations_graph.append_operation(operations.Generate(1, branch_factor))\n operations_graph.append_operation(operations.Score(3, False))", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:500-533" + }, + "69": { + "file_id": 4, + "content": "The code defines two functions, `cot()` and `tot()`, which generate the Graph of Operations for CoT and ToT methods respectively. The CoT method involves generating one child node, scoring it, while the ToT method generates 10 children nodes initially, keeps the best one, then generates two additional children per iteration.", + "type": "comment" + }, + "70": { + "file_id": 4, + "content": " keep_best_2 = operations.KeepBestN(1, True)\n keep_best_2.add_predecessor(keep_best_1)\n operations_graph.append_operation(keep_best_2)\n keep_best_1 = keep_best_2\n return operations_graph\ndef got() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the GoT method, where full documents\n are merged.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n operations_graph.append_operation(operations.Generate(1, 5))\n operations_graph.append_operation(operations.Score(3, False))\n keep_best = operations.KeepBestN(3, True)\n operations_graph.append_operation(keep_best)\n operations_graph.append_operation(operations.Aggregate(5))\n operations_graph.append_operation(operations.Score(3, False))\n keep_best2 = operations.KeepBestN(1, True)\n keep_best2.add_predecessor(keep_best)\n operations_graph.append_operation(keep_best2)\n operations_graph.append_operation(operations.Generate(1, 10))", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:534-561" + }, + "71": { + "file_id": 4, + "content": "This code generates a Graph of Operations for merging full documents. It first appends operations to generate, score, aggregate, and keep the best scores. The last two operations add a predecessor to keep_best and append an additional generate operation with parameters 1 and 10.", + "type": "comment" + }, + "72": { + "file_id": 4, + "content": " operations_graph.append_operation(operations.Score(3, False))\n keep_best3 = operations.KeepBestN(1, True)\n keep_best3.add_predecessor(keep_best2)\n operations_graph.append_operation(keep_best3)\n return operations_graph\ndef got2() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the GoT2 method, where partial\n documents are merged.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n sub_parts = []\n for i in range(0, 4, 2): # should be at most 16 parts\n sub_text = operations.Selector(\n lambda thoughts, list_id=i: [\n operations.Thought(\n state={**thoughts[0].state, \"parts\": {list_id, list_id + 1}}\n )\n ]\n )\n operations_graph.add_operation(sub_text)\n gen_nda = operations.Generate(1, 5)\n gen_nda.add_predecessor(sub_text)\n operations_graph.add_operation(gen_nda)\n score_nda = operations.Score(3, False)", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:562-593" + }, + "73": { + "file_id": 4, + "content": "This code generates a Graph of Operations for the GoT2 method, which merges partial documents. It creates an initial GraphOfOperations object and iteratively adds operations such as Selectors, Generators, and Scorers to the graph. Each iteration consists of selecting specific thoughts, generating new documents, and scoring them. The resulting graph is returned.", + "type": "comment" + }, + "74": { + "file_id": 4, + "content": " score_nda.add_predecessor(gen_nda)\n operations_graph.add_operation(score_nda)\n keep_best_nda = operations.KeepBestN(1, True)\n keep_best_nda.add_predecessor(score_nda)\n operations_graph.add_operation(keep_best_nda)\n sub_parts.append(keep_best_nda)\n while len(sub_parts) > 1:\n new_sub_parts = []\n for i in range(0, len(sub_parts), 2):\n if i + 1 == len(sub_parts):\n new_sub_parts.append(sub_parts[i])\n continue\n aggregate = operations.Aggregate(5)\n aggregate.add_predecessor(sub_parts[i])\n aggregate.add_predecessor(sub_parts[i + 1])\n operations_graph.add_operation(aggregate)\n score = operations.Score(3, False)\n score.add_predecessor(aggregate)\n operations_graph.add_operation(score)\n keep_best = operations.KeepBestN(1, True)\n keep_best.add_predecessor(score)\n operations_graph.add_operation(keep_best)\n gen_nda = operations.Generate(1, 5)", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:594-619" + }, + "75": { + "file_id": 4, + "content": "This code is creating an operations graph for a document merge process. It starts with adding Score and Generate nodes, then iteratively adds Aggregate, Score, and KeepBestN nodes until there's only one node left in the sub_parts list. The Score nodes are used to calculate similarity scores, while the KeepBestN nodes keep the best result from the previous operation. The operations graph is then built with these operations added in sequence.", + "type": "comment" + }, + "76": { + "file_id": 4, + "content": " gen_nda.add_predecessor(keep_best)\n operations_graph.add_operation(gen_nda)\n score_nda = operations.Score(3, False)\n score_nda.add_predecessor(gen_nda)\n operations_graph.add_operation(score_nda)\n keep_best_nda = operations.KeepBestN(1, True)\n keep_best_nda.add_predecessor(score_nda)\n keep_best_nda.add_predecessor(keep_best)\n operations_graph.add_operation(keep_best_nda)\n new_sub_parts.append(keep_best_nda)\n sub_parts = new_sub_parts\n return operations_graph\ndef run(\n data_ids: List[int],\n methods: List[Callable[[], operations.GraphOfOperations]],\n budget: float,\n lm_name: str,\n) -> float:\n \"\"\"\n Controller function that executes each specified method for each specified\n sample while the budget is not exhausted.\n :param data_ids: Indices of the sample to be run.\n :type data_ids: List[int]\n :param methods: List of functions to generate Graphs of Operations.\n :type methods: Each function generates a Graph of Operation.", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:620-649" + }, + "77": { + "file_id": 4, + "content": "This code is creating a graph of operations for language model inference. It defines several nodes and adds them to the operations graph, including generation, scoring, and keeping the best node. The function run() executes methods for each specified sample within the budget limit.", + "type": "comment" + }, + "78": { + "file_id": 4, + "content": " :param budget: Language model budget for the execution in dollars.\n :type budget: float\n :param lm_name: Name of the language model to be used.\n :type lm_name: str\n :return: Spent budget in dollars.\n :rtype: float\n \"\"\"\n orig_budget = budget\n data_path = os.path.join(os.path.dirname(__file__), \"documents.csv\")\n data = []\n with open(data_path, \"r\", encoding=\"utf8\") as f:\n reader = csv.reader(f)\n next(reader)\n for row in reader:\n row[0] = int(row[0])\n data.append(row)\n if data_ids is None or len(data_ids) == 0:\n data_ids = list(range(len(data)))\n selected_data = [data[i] for i in data_ids]\n results_dir = os.path.join(os.path.dirname(__file__), \"results\")\n if not os.path.exists(results_dir):\n os.makedirs(results_dir)\n timestamp = datetime.datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n extra_info = f\"{lm_name}_{'-'.join([method.__name__ for method in methods])}\"\n folder_name = f\"{extra_info}_{timestamp}\"\n results_folder = os.path.join(results_dir, folder_name)", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:650-679" + }, + "79": { + "file_id": 4, + "content": "This function takes a budget, language model name, and optional data IDs as input. It reads the \"documents.csv\" file, filters the data based on provided data IDs, and then creates folders to save results for different methods using the specified language model. The function returns the spent budget in dollars.", + "type": "comment" + }, + "80": { + "file_id": 4, + "content": " os.makedirs(results_folder)\n config = {\n \"data\": selected_data,\n \"methods\": [method.__name__ for method in methods],\n \"lm\": lm_name,\n \"budget\": budget,\n }\n with open(os.path.join(results_folder, \"config.json\"), \"w\") as f:\n json.dump(config, f)\n logging.basicConfig(\n filename=os.path.join(results_folder, \"log.log\"),\n filemode=\"w\",\n format=\"%(name)s - %(levelname)s - %(message)s\",\n level=logging.DEBUG,\n )\n for method in methods:\n os.makedirs(os.path.join(results_folder, method.__name__))\n for data in selected_data:\n logging.info(f\"Running data {data[0]}: {data[1]}\")\n if budget <= 0.0:\n logging.error(\n f\"Budget has been depleted, stopping. Data {data[0]} has not been run.\"\n )\n break\n for method in methods:\n logging.info(f\"Running method {method.__name__}\")\n logging.info(f\"Budget left: {budget}\")\n if budget <= 0.0:\n logging.error(", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:680-712" + }, + "81": { + "file_id": 4, + "content": "This code sets up a results folder, saves the configuration file in JSON format, and initializes logging. It then iterates over selected data and methods, keeping track of remaining budget. If the budget becomes zero, it stops execution and logs an error message.", + "type": "comment" + }, + "82": { + "file_id": 4, + "content": " f\"Budget has been depleted, stopping. Method {method.__name__} has not been run.\"\n )\n break\n lm = language_models.ChatGPT(\n os.path.join(\n os.path.dirname(__file__),\n \"../../graph_of_thoughts/language_models/config.json\",\n ),\n model_name=lm_name,\n cache=True,\n )\n operations_graph = method()\n executor = controller.Controller(\n lm,\n operations_graph,\n DocMergePrompter(),\n DocMergeParser(),\n {\n \"documents\": [data[2], data[3], data[4], data[5]],\n \"parts\": set(),\n \"current\": \"\",\n \"method\": method.__name__,\n },\n )\n try:\n executor.run()\n except Exception as e:\n logging.error(f\"Exception: {e}\")\n path = os.path.join(", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:713-741" + }, + "83": { + "file_id": 4, + "content": "This code chunk initializes a language model, creates an operations graph, and sets up an executor for running the method. If the budget is depleted, it will stop execution. The code then attempts to run the executor and logs any exceptions that occur during execution.", + "type": "comment" + }, + "84": { + "file_id": 4, + "content": " results_folder,\n method.__name__,\n f\"{data[0]}.json\",\n )\n for operation in operations_graph.operations:\n for thought in operation.thoughts:\n thought.state[\"parts\"] = list(thought.state[\"parts\"])\n executor.output_graph(path)\n budget -= lm.cost\n return orig_budget - budget\nif __name__ == \"__main__\":\n \"\"\"\n Input (x1, x2, x3, x4): Four NDAs\n Output (y): A new combined NDA\n Evaluation: According to information coverage without repetition (scored by the LLM)\n \"\"\"\n budget = 30\n samples = [item for item in range(0, 50)]\n approaches = [io, cot, tot, got, got2]\n spent = run(samples, approaches, budget, \"chatgpt\")\n logging.info(f\"Spent {spent} out of {budget} budget.\")", + "type": "code", + "location": "/examples/doc_merge/doc_merge.py:742-767" + }, + "85": { + "file_id": 4, + "content": "This code takes input NDAs, combines them, and evaluates the combined result using an LLM (Language Model). The output is scored based on information coverage without repetition. A budget of 30 is set, with sampling from range(0, 50), and approaches io, cot, tot, got, and got2 are used. The code logs the spent budget after running the function \"run\".", + "type": "comment" + }, + "86": { + "file_id": 5, + "content": "/examples/doc_merge/plot.py", + "type": "filepath" + }, + "87": { + "file_id": 5, + "content": "The code imports libraries, defines a get_complete_results() function, reads JSON data and stores it in a dictionary, sorts the keys, retrieves final scores for each method using results_complete dictionary, and includes functions to retrieve plotting data and plot boxplots for scores with total cost bar plots on a secondary y-axis. It also sets custom y-axis positions and labels for plotting the solved status of various methods, saving it as a PDF, and generates data from given results while initializing an instance of the DocMerge class with a cost_upper limit of 15.", + "type": "summary" + }, + "88": { + "file_id": 5, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():\n results_complete[key] = sorted(", + "type": "code", + "location": "/examples/doc_merge/plot.py:1-29" + }, + "89": { + "file_id": 5, + "content": "The code imports necessary libraries, defines a function get_complete_results(), and reads data from JSON files in specified directories. It collects this information into a dictionary, sorts the keys, and returns the complete results for further processing.", + "type": "comment" + }, + "90": { + "file_id": 5, + "content": " results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 0\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in reversed(result[\"data\"]):\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n if \"operation\" in op and op[\"operation\"] == \"score\":\n try:\n score = max(op[\"scores\"])\n break\n except:\n continue\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]\n )\n scores[method] = sorted(scores[method], key=lambda x: x[0])", + "type": "code", + "location": "/examples/doc_merge/plot.py:30-59" + }, + "91": { + "file_id": 5, + "content": "This code retrieves and sorts final scores for each method in the results_complete dictionary. It loops through each method, then through each result for that method, calculating the score, solved status, prompt/completion tokens, and cost from the reversed data list. Finally, it appends these values to the corresponding method's scores list, then sorts those scores by key.", + "type": "comment" + }, + "92": { + "file_id": 5, + "content": " return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"got\", \"got2\"],\n model=\"GPT-3.5\",\n num_ndas=4,\n y_lower=0,\n y_upper=10,\n cost_upper=1.8,\n display_solved=True,\n annotation_offset=1,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n scores_ordered = [\n [score for score in results[method][\"scores\"]] for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis", + "type": "code", + "location": "/examples/doc_merge/plot.py:60-96" + }, + "93": { + "file_id": 5, + "content": "Function get_plotting_data returns a dictionary of plotting data for different methods, which includes scores, number of solved problems, and costs. Function plot_results plots the results using given parameters like methods order, model, number of nodes, y-axis limits, cost upper limit, etc. The function first ensures that the specified methods are in the result dictionary and then extracts ordered scores and total costs for each method from the results dictionary.", + "type": "comment" + }, + "94": { + "file_id": 5, + "content": " fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n methods_labels = [\"IO\", \"CoT\", \"ToT\", \"GoT\", \"GoT2\"]\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(methods_labels)\n # ax.set_xlabel(\"Approach\")\n ax.set_ylim(y_lower, 12 if display_solved else 9.75)\n plt.yticks(fontsize=fig_fontsize)\n if display_left_ylabel:\n ax.set_ylabel(\n f\"Score (out of 10); the higher the better\", fontsize=fig_fontsize\n )\n # ax.set_title(f\"Document Merging\")\n ax2 = ax.twinx()\n ax2.bar(\n positions,\n total_costs,\n alpha=0.5,\n color=\"blue\",\n label=\"Total Cost ($); the lower the better\",\n )\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())", + "type": "code", + "location": "/examples/doc_merge/plot.py:97-132" + }, + "95": { + "file_id": 5, + "content": "Creates a boxplot for scores, sets ticks and labels for x-axis, adjusts y-limits, adds a blue bar plot with total costs on the right y-axis, and sets corresponding tick colors and limits.", + "type": "comment" + }, + "96": { + "file_id": 5, + "content": " tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]\n ax.text(\n count, annotation_height, f\"Solved: {solved}\", ha=\"center\", va=\"bottom\"\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"doc_merge_{model}_{num_ndas}.pdf\", bbox_inches=\"tight\")\nplot_results(\n get_plotting_data(\"results/\"),\n num_ndas=4,\n display_solved=False,\n model=\"GPT-3.5\",\n y_upper=10,\n display_left_ylabel=True,", + "type": "code", + "location": "/examples/doc_merge/plot.py:133-168" + }, + "97": { + "file_id": 5, + "content": "This code is setting custom tick positions and labels for the y-axis of a plot, displaying the solved status of various methods, saving the plot as a PDF, and generating plotting data from given results.", + "type": "comment" + }, + "98": { + "file_id": 5, + "content": " cost_upper=15,\n)", + "type": "code", + "location": "/examples/doc_merge/plot.py:169-170" + }, + "99": { + "file_id": 5, + "content": "This code snippet is initializing a function, specifically an instance of the class \"DocMerge\", with the parameter 'cost_upper' set to 15. The purpose of this function might be to perform document merging or some similar operation with a specified upper cost limit.", + "type": "comment" + } +} \ No newline at end of file diff --git a/docs/data/1.json b/docs/data/1.json new file mode 100644 index 0000000..d31dc52 --- /dev/null +++ b/docs/data/1.json @@ -0,0 +1,551 @@ +{ + "100": { + "file_id": 6, + "content": "/examples/doc_merge/pure_documents.json", + "type": "filepath" + }, + "101": { + "file_id": 6, + "content": "Both comments discuss company-supplier agreements, covering aspects such as NDAs, IT system maintenance, late delivery penalties, termination provisions, confidentiality clauses, and governing laws.", + "type": "summary" + }, + "102": { + "file_id": 6, + "content": "[\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\n1. Agreement between [Your Company Name] and [Recipient Name] on [Date].\\n2. Information sharing for the purpose of [specific project or purpose].\\n3. \\\"Confidential Information\\\" includes all potentially commercially valuable information, specifically software development tactics, processes, and in-house research results.\\n4. Receiving party is obligated to protect the Confidential Information, use it solely for the disclosed purpose, and not disclose it without consent.\\n5. Breach penalties include injunctive relief, other remedies, and a $200,000 fee per breach.\\n6. The Agreement applies to the Parties and their successors and assigns. It contains all related agreements and lack of enforcement doesn't imply waiver.\\n7. The Agreement is under the laws of [State].\\n8. Signed by [Your Company Name] and [Recipient Name] at the above date.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective from [Effective Date], this NDA involves [Your Company Name]", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:1-3" + }, + "103": { + "file_id": 6, + "content": "This code appears to contain two example Non-Disclosure Agreements (NDA) in JSON format, with fields such as parties involved, purpose of information sharing, definition of Confidential Information, obligations and penalties for breaching the agreement, applicable laws, and signatures.", + "type": "comment" + }, + "104": { + "file_id": 6, + "content": " (\\\"Disclosing Party\\\"), and [Recipient Name] (\\\"Receiving Party\\\").\\n\\n1. Purpose: The Disclosing Party will disclose confidential information related to [Topic of Research] to the Receiving Party for [Purpose].\\n\\n2. Confidential Information: Defined as all non-public reports, data, designs, and other materials provided by the Disclosing Party to the Receiving Party.\\n\\n3. Receiving Party's Obligations:\\n a. Use, reproduce, or distribute the confidential information only for the agreed purpose.\\n b. Restrict access to the information to necessary parties, ensuring they abide by strict confidentiality.\\n c. Return or destroy all confidential information upon request or at the end of the agreement.\\n\\n4. Exclusions: Information will not be classified as confidential if it is already known to the Receiving Party, publicly known, or independently developed by the Receiving Party.\\n\\n5. Non-Competition: The Receiving Party will not engage in any competing business against the Disclo", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:3-3" + }, + "105": { + "file_id": 6, + "content": "The code is a confidentiality agreement between a Disclosing Party and a Recipient (Receiving Party) involving the exchange of confidential information related to a specific topic for a stated purpose. It outlines obligations for the Receiving Party, exclusions, and non-competition clauses.", + "type": "comment" + }, + "106": { + "file_id": 6, + "content": "sing Party during the agreement and one year after its termination.\\n\\n6. Term and Termination: The agreement is valid for [e.g., \\\"two years\\\"], unless terminated earlier with [e.g., \\\"30 days\\\"] written notice. The Receiving Party's non-disclosure and non-competition obligations persist post-termination.\\n\\n7. General Provisions:\\n a. Governing Law: [Your State]'s laws apply.\\n b. Amendments: Only valid if written and signed by both parties.\\n c. Entire Agreement: This contract overrules previous related agreements.\\n\\nSigned as of the Effective Date by [Your Company Name] - Disclosing Party [Recipient Name] - Receiving Party.\",\n \"CONFIDENTIALITY & NON-DISCLOSURE AGREEMENT\\n\\n Entities Involved:\\n Effective [Date], between [AquaBlue Innovations], established in [State], and [PineTree Solutions], a registered entity.\\n\\n Objective:\\n To safeguard classified data during talks of a potential technological alliance.\\n\\n Specification of Protected Information:\\n ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:3-4" + }, + "107": { + "file_id": 6, + "content": "The code defines a confidentiality and non-disclosure agreement between AquaBlue Innovations and PineTree Solutions, with provisions for protecting classified information during potential technological alliance discussions.", + "type": "comment" + }, + "108": { + "file_id": 6, + "content": "Particularly:\\n\\na. System designs and architectural schematics.\\nb. Proprietary computational algorithms.\\n\\n Receiver's Obligations:\\n a. Maintain strict non-disclosure using best practices.\\n b. Employ solely for the aforementioned aim.\\n c. No unveiling without explicit authorization.\\n\\n Violation Ramifications:\\n A charge of $280,000 for every infringement, plus possible legal proceedings.\\n\\n General Terms:\\n Binding for both parties and any successors. This encapsulates the entire accord.\\n\\n Legal Reference:\\n Governed as per [State]'s legal framework.\\n\\n Attestation:\\n Duly signed on [Date].\\n\\n[AquaBlue Innovations] [PineTree Solutions]\",\n \"SECRECY & DISCLOSURE AGREEMENT\\n\\n Contracting Parties:\\n Dated [Date], drawn between [AquaBlue Innovations], a [State]-based corporation, and [PineTree Solutions], a licensed organization.\\n\\n Aim:\\n To protect exclusive insights amidst dialogues for a technological partnership.\\n\\n C", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:4-5" + }, + "109": { + "file_id": 6, + "content": "Code snippet describes a confidentiality agreement between AquaBlue Innovations and PineTree Solutions, detailing non-disclosure obligations, violation consequences, and legal reference.", + "type": "comment" + }, + "110": { + "file_id": 6, + "content": "ategorization of Sensitive Data:\\n Includes:\\n\\na. Internal software blueprints.\\nb. Intellectual property awaiting patents.\\n\\n Commitments of Recipient:\\n a. Uphold confidentiality, ensuring data integrity.\\n b. Utilize strictly for collaborative ventures.\\n c. No exposure without prior consensus.\\n\\n Repercussions for Non-Compliance:\\n $295,000 fine for each transgression, and the option for legal recourse.\\n\\n Overall Provisions:\\n Legally enforceable for signatories and successors. Complete and sole agreement.\\n\\n Juridical Standpoint:\\n Under the auspices of [State] laws.\\n\\n Ratification:\\n Confirmed and endorsed on [Date].\\n\\n[AquaBlue Innovations] [PineTree Solutions]\",\n \"This Non-Disclosure and Non-Competition Agreement is made between [Your Company Name] and [Contractor Name/Company].\\n\\n1. Confidentiality: The Contractor acknowledges access to the Company's confidential information during their relationship.\\n\\n2. Non-Disclosure: The", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:5-6" + }, + "111": { + "file_id": 6, + "content": "The code provides the details of a non-disclosure and non-competition agreement between AquaBlue Innovations and PineTree Solutions. It outlines the categorization of sensitive data, commitments of the recipient, repercussions for non-compliance, overall provisions, jurisdictional standpoint, and ratification details.", + "type": "comment" + }, + "112": { + "file_id": 6, + "content": " Contractor agrees not to disclose, use, reproduce, or distribute this confidential information unless necessary for their obligations.\\n\\n3. Non-Competition: The Contractor agrees not to compete with the company or assist others in doing so for one year after the termination of their relationship. They also agree not to solicit the company's clients or customers for the benefit of a competitor for one year.\\n\\n4. Return of Confidential Information: At the end of the relationship or upon the company's request, the Contractor will return all confidential information and copies thereof.\\n\\n5. Remedies: For any breach, the Company may seek specific performance and injunctive relief, in addition to other remedies.\\n\\n6. Governing Law: The Agreement is governed by the laws of [Your State].\\n\\n7. Entire Agreement: This document replaces all previous agreements and understandings on the subject.\\n\\nBoth parties acknowledge understanding and voluntarily accepting the Agreement.\\n\\nSignatures required from [Your Company Name] and [Contractor Name/Company].\",", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:6-6" + }, + "113": { + "file_id": 6, + "content": "This code contains the terms of a contract between a company and a contractor, outlining confidentiality obligations, non-competition agreement, return of information, legal remedies, governing law, and acknowledgment by both parties.", + "type": "comment" + }, + "114": { + "file_id": 6, + "content": " \"This Loyalty Agreement is between [Company Name] and [Employee Full Name], where the company agrees to provide specialized training at no cost to the employee, who in turn commits to work for the company for a specified period. If the employee leaves the company within two years after completing training, they must pay $50,000 as compensation for training costs, payable within 30 days of termination. Exceptions to this repayment include termination without cause, resignation due to breach of agreement by the company, or other agreed upon circumstances. Any changes to this agreement must be in writing and signed by both parties, and the agreement will be governed by the laws of [State/Country]. This agreement is binding to all involved parties and their successors. Both the company and the employee sign to attest to these terms.\",\n \"EMPLOYEE LOYALTY AGREEMENT\\n\\nThis agreement is entered into by [Company Name] and [Employee Name] to protect the company's business interests, goodw", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:7-8" + }, + "115": { + "file_id": 6, + "content": "This is a loyalty agreement between a company and an employee, outlining the specialized training provided to the employee at no cost in exchange for their commitment to work for the company for a specified period. The employee may need to pay compensation if they leave within two years after completing training, subject to certain exceptions. The agreement is governed by the laws of [State/Country] and is binding to all involved parties and their successors.", + "type": "comment" + }, + "116": { + "file_id": 6, + "content": "ill, and confidential information, and affirm employee's loyalty. \\n\\n1. Non-disclosure: Employee agrees to not disclose or use company's confidential information, during or post-employment. \\n\\n2. Non-competition: Employee will not work for or establish a competitor within [e.g., \\\"50\\\"] miles from the company for [e.g., \\\"12\\\"] months post-employment.\\n\\n3. Non-solicitation: Employee will not solicit clients or employees of the company for [e.g., \\\"12\\\"] months post-employment.\\n\\n4. Return of Property: Employee will return all company property upon termination.\\n\\n5. Remedies: Company can seek injunction for a breach or potential breach of this agreement.\\n\\n6. Severability: If any provision of this agreement is held invalid, the remainder of the Agreement will continue.\\n\\n7. Governing Law: This agreement will be governed by the laws of [State, e.g., \\\"California\\\"].\\n\\n8. Agreement: This is the entire agreement and supersedes prior negotiations.\\n\\n9. Amendments: Any changes must ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:8-8" + }, + "117": { + "file_id": 6, + "content": "This code outlines the terms and conditions of an employment agreement, including non-disclosure, non-competition, non-solicitation clauses, property return policy, legal remedies, severability, governing law, and amendment procedures.", + "type": "comment" + }, + "118": { + "file_id": 6, + "content": "be in writing and signed by both parties.\\n\\nSignatures of both parties indicate agreement to these terms.\\n\\n[Company Name] - Authorized Signatory [Employee Name]\",\n \"This Loyalty Agreement is between [Company Name] and [Contractor Company Name]. The Agreement ensures the Contractor's loyalty and confidentiality towards the Company during and post engagement. Contractor agrees not to use or disclose the Company's confidential information, or engage in competing business or solicitation for a period of [e.g., \\\"12\\\"] months post termination. Contractor must return all Company property upon termination. In case of breach, Company can seek legal remedies including injunction. The Agreement remains valid even if a provision is held invalid. The Agreement follows [State, e.g., \\\"California\\\"] laws and replaces all previous understandings. It can be amended only in writing with both parties' signature.\",\n \"B2B CONTRACTOR LOYALTY AGREEMENT\\n\\nThis Agreement is made on _____ day of ______", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:8-10" + }, + "119": { + "file_id": 6, + "content": "Code snippet represents a loyalty agreement template between a company and a contractor. It includes confidentiality, non-compete and non-solicitation clauses, return of property upon termination, legal remedies in case of breach, and adherence to specified state laws. The agreement can only be amended with both parties' written signature.", + "type": "comment" + }, + "120": { + "file_id": 6, + "content": ", 20, between [Company Name], located at [Company Address] (\\\"Company\\\"), and [Contractor Company Name], located at [Contractor Address] (\\\"Contractor\\\").\\n\\n1. CONFIDENTIALITY\\n\\nContractor agrees not to disclose, use, or allow the use of the Company's confidential information during or after the relationship, except as required for their services to the Company.\\n\\n2. NON-COMPETITION\\n\\nFor 12 months post-relationship, the Contractor won't provide similar services to any entity competing with the Company within a 50-mile radius of any Company location.\\n\\n3. NON-SOLICITATION\\n\\nFor 12 months post-relationship, the Contractor won't solicit or induce any entity or individual connected to the Company to cease or reduce their relationship with the Company.\\n\\n4. RETURN OF PROPERTY\\n\\nUpon relationship termination or on Company's request, the Contractor will immediately return all Company property and data.\\n\\n5. PENALTY FOR BREACH\\n\\nIn the event of a breach of this Agreement, the Contra", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:10-10" + }, + "121": { + "file_id": 6, + "content": "This code is a contractual agreement between Company Name and Contractor Company Name, outlining confidentiality, non-competition, non-solicitation, property return, and penalty for breach clauses.", + "type": "comment" + }, + "122": { + "file_id": 6, + "content": "ctor shall pay the Company a penalty of $50,000.\\n\\n6. GOVERNING LAW\\n\\nThis Agreement is governed by [State, e.g., \\\"California\\\"] laws.\\n\\n7. ENTIRE AGREEMENT\\n\\nThis Agreement supersedes prior discussions and agreements between the parties.\\n\\nBy signing below, the parties agree to these terms.\\n\\n[Company Name] - Signatory [Contractor Company Name] - Signatory\\nDate: _______________________ Date: _______________________\",\n \"B2B CONTRACTOR LOYALTY AGREEMENT\\n\\nThis Agreement is made on _____ day of ______, 20, between [Company Name], located at [Company Address] (\\\"Company\\\"), and [Contractor Company Name], located at [Contractor Address] (\\\"Contractor\\\").\\n\\n1. DEFINITION OF CONFIDENTIAL INFORMATION\\n\\nFor the purposes of this Agreement, \\\"confidential information\\\" shall refer to research results, software created, devices produced by the Company, and any other information deemed proprietary or not generally known to the public.\\n\\n2. CONFIDENTIALITY\\n\\nContractor agrees not to ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:10-11" + }, + "123": { + "file_id": 6, + "content": "This code represents a B2B contractor loyalty agreement between Company Name and Contractor Company Name, effective on a specific date. The contract outlines terms including the definition of confidential information, non-disclosure obligations, payment terms, governing law, and agreement superseding prior discussions or agreements. Both parties must sign below to agree to these terms.", + "type": "comment" + }, + "124": { + "file_id": 6, + "content": "disclose, use, or allow the use of the Company's confidential information, as defined herein, during or after the relationship, except as required for their services to the Company.\\n\\n3. NON-COMPETITION\\n\\nFor 12 months post-relationship, the Contractor won't provide similar services to any entity competing with the Company within a 50-mile radius of any Company location.\\n\\n4. NON-SOLICITATION\\n\\nFor 12 months post-relationship, the Contractor won't solicit or induce any entity or individual connected to the Company to cease or reduce their relationship with the Company.\\n\\n5. RETURN OF PROPERTY\\n\\nUpon relationship termination or on Company's request, the Contractor will immediately return all Company property, including all items containing or pertaining to confidential information.\\n\\n6. PENALTY FOR BREACH\\n\\nIn the event of a breach of this Agreement, the Contractor shall pay the Company a penalty of $50,000.\\n\\n7. GOVERNING LAW\\n\\nThis Agreement is governed by [State, e.g., \\\"Ca", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:11-11" + }, + "125": { + "file_id": 6, + "content": "Contract specifying confidentiality, non-compete, non-solicitation, property return, breach penalty, and governing law for a relationship between a company and contractor.", + "type": "comment" + }, + "126": { + "file_id": 6, + "content": "lifornia\\\"] laws.\\n\\n8. ENTIRE AGREEMENT\\n\\nThis Agreement supersedes prior discussions and agreements between the parties.\\n\\nBy signing below, the parties agree to these terms.\\n\\n[Company Name] - Signatory [Contractor Company Name] - Signatory\",\n \"The Non-Disclosure Agreement (NDA) dated [Date] is between [Company], based in [Country/State], and [Supplier], also incorporated in [Country/State]. The Company intends to disclose confidential information to the Supplier for [purpose]. This confidential data can include business strategies, financial data, customer information, and product designs. The Supplier agrees to refrain from sharing this information, barring any legal requirements. Exceptions to this confidentiality are in cases where the information becomes public or was already known by the Supplier before the Company's disclosure. If the Supplier breaches this agreement, they face a financial penalty of [$]. The NDA is valid for [X years], unless the Company provides writte", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:11-12" + }, + "127": { + "file_id": 6, + "content": "The code contains the text of a Non-Disclosure Agreement (NDA) between Company and Supplier, including terms such as purpose of sharing confidential information, non-disclosure obligations for Supplier, exceptions, penalties, and agreement validity.", + "type": "comment" + }, + "128": { + "file_id": 6, + "content": "n termination. Upon the Company's request, the Supplier must return or destroy all copies of Confidential Information. This agreement supersedes previous agreements and can only be altered by a written document approved by both parties. The NDA is governed by the laws of [specific country/state].\",\n \"NON-DISCLOSURE AND NON-COMPETE AGREEMENT\\n\\nEffective Date: [Date]\\n\\nPARTIES:\\n\\n Company: [Full Legal Name of Company], located at [Company Address].\\n Supplier: [Full Legal Name of Supplier], located at [Supplier Address].\\n\\n1. CONFIDENTIALITY:\\n\\nSupplier shall not disclose Company's confidential information, which includes business strategies, financial data, and customer details, to any third party. This confidentiality obligation lasts for [X years, e.g., \\\"5 years\\\"] from the date of disclosure.\\n\\n2. NON-COMPETITION:\\n\\nFor [X years, e.g., \\\"3 years\\\"] following the termination of their business relationship, Supplier agrees not to engage in or start any business that dir", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:12-13" + }, + "129": { + "file_id": 6, + "content": "This code contains a non-disclosure and non-compete agreement between a company and a supplier, with details including the effective date, parties involved, confidentiality obligations, non-competition restrictions, termination procedures, and governing laws.", + "type": "comment" + }, + "130": { + "file_id": 6, + "content": "ectly competes with Company within a [X mile/km radius, e.g., \\\"50-mile radius\\\"] of Company's primary business location.\\n\\n3. PENALTY FOR BREACH:\\n\\nShould Supplier breach this Agreement, they shall pay Company a penalty of [specific amount, e.g., \\\"$50,000\\\"], in addition to any other legal remedies available to Company.\\n\\n4. RETURN OF INFORMATION:\\n\\nUpon request, Supplier shall return or destroy all of Company's confidential information and confirm its deletion in writing.\\n\\n5. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of New York\\\"].\\n\\nAGREEMENT ACKNOWLEDGEMENT:\\n\\n__________ [Company] __________ [Supplier]\",\n \"DATA ANALYSIS EMPLOYEE AGREEMENT\\n\\nThis Agreement (\\\"Agreement\\\") is made and entered into as of [Date], by and between [Company Name], a [legal structure, e.g., \\\"corporation\\\"] incorporated under the laws of [State/Country], with its principal place of business at [Company Address], herein referred to as ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:13-14" + }, + "131": { + "file_id": 6, + "content": "This code represents a contract or agreement between a company and a supplier, outlining the terms of their business relationship. It includes provisions such as competition limits, penalty for breach, return of information, governing law, and acknowledgement by both parties.", + "type": "comment" + }, + "132": { + "file_id": 6, + "content": "the \\\"Company,\\\" and [Employee Name], an individual residing at [Employee Address], herein referred to as the \\\"Employee.\\\"\\n\\n Position and Duties:\\n a. The Company hereby employs Employee in the capacity of Data Analyst.\\n b. The Employee's primary duties will be to [specific data analysis tasks, e.g., \\\"analyze sales data, forecast trends, and produce reports for managerial review\\\"].\\n\\n Term: The Employee's engagement will commence on [Start Date] and will terminate on [End Date].\\n\\n Compensation: For the services rendered by the Employee under this Agreement, the Company will pay Employee a total sum of [specific amount, e.g., \\\"$5,000\\\"] payable on [payment schedule, e.g., \\\"a monthly basis\\\"].\\n\\n Confidentiality: The Employee agrees not to disclose or use, either during or after the term of employment, any proprietary or confidential information or data of the Company without the Company's prior written consent, except as necessary in the course of performin", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:14-14" + }, + "133": { + "file_id": 6, + "content": "This code defines an employment agreement for a Data Analyst, specifying the employee's duties, term, compensation, and confidentiality obligations.", + "type": "comment" + }, + "134": { + "file_id": 6, + "content": "g their duties for the Company.\\n\\n Intellectual Property: Any works, developments, or inventions created by the Employee in the course of this employment related to the Company's business will remain the sole property of the Company.\\n\\n Termination: Either party may terminate this Agreement with [e.g., \\\"30\\\"] days written notice. Upon termination, Employee agrees to return all company property and data.\\n\\n Governing Law: This Agreement shall be governed by and construed under the laws of [State/Country].\\n\\n Amendments: This Agreement may only be amended in writing and signed by both parties.\\n\\n Entire Agreement: This Agreement contains the entire agreement between the parties and supersedes all prior negotiations, understandings, and agreements between the parties.\\n\\nThe parties hereto have executed this Agreement as of the date first above written.\\n\\n[Company Name or Authorized [Employee Name]\\nRepresentative Name, Title]\",\n \"DATA ANALYSIS SERVICE AGREEMENT\\n\\", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:14-15" + }, + "135": { + "file_id": 6, + "content": "Code is an agreement between a company and an employee, defining intellectual property ownership, termination terms, governing law, amendments process, and more. It outlines the legal relationship and responsibilities for both parties.", + "type": "comment" + }, + "136": { + "file_id": 6, + "content": "nThis Agreement (\\\"Agreement\\\") is made and entered into as of [Date], by and between [Company Name], a [legal structure, e.g., \\\"corporation\\\"] incorporated under the laws of [State/Country], with its principal place of business at [Company Address], herein referred to as the \\\"Company,\\\" and [Contractor Business Name], a [legal structure, e.g., \\\"limited liability company\\\"] organized under the laws of [State/Country], with its principal place of business at [Contractor Business Address], herein referred to as the \\\"Contractor.\\\"\\n\\n Scope of Work:\\n a. The Contractor agrees to provide data analysis services to the Company.\\n b. The specific services will include [specific data analysis tasks, e.g., \\\"analyzing sales data, forecasting trends, and producing reports for managerial review\\\"].\\n\\n Term: The Contractor's engagement will commence on [Start Date] and will terminate on [End Date].\\n\\n Compensation: For the services rendered by the Contractor under this Agreeme", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:15-15" + }, + "137": { + "file_id": 6, + "content": "This code snippet is the beginning of a legal agreement between a company and a contractor, outlining the scope of work, term, and compensation for data analysis services.", + "type": "comment" + }, + "138": { + "file_id": 6, + "content": "nt, the Company will pay the Contractor a total sum of [specific amount, e.g., \\\"$5,000\\\"] payable on [payment schedule, e.g., \\\"a monthly basis\\\"].\\n\\n Confidentiality: The Contractor agrees not to disclose or use, either during or after the term of this Agreement, any proprietary or confidential information or data of the Company without the Company's prior written consent, except as necessary in the course of providing the services.\\n\\n Intellectual Property: Any works, developments, or inventions created by the Contractor in the course of providing the services related to the Company's business will remain the sole property of the Company.\\n\\n Termination: Either party may terminate this Agreement with [e.g., \\\"30\\\"] days written notice. Upon termination, Contractor agrees to return all company data and any other proprietary materials.\\n\\n Governing Law: This Agreement shall be governed by and construed under the laws of [State/Country].\\n\\n Amendments: This Agreemen", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:15-15" + }, + "139": { + "file_id": 6, + "content": "This code is a contract between the Company and Contractor, outlining payment terms, confidentiality agreements, intellectual property ownership, termination conditions, governing law, and amendments to the agreement.", + "type": "comment" + }, + "140": { + "file_id": 6, + "content": "t may only be amended in writing and signed by both parties.\\n\\n Entire Agreement: This Agreement contains the entire agreement between the parties and supersedes all prior negotiations, understandings, and agreements between the parties.\\n\\nThe parties hereto have executed this Agreement as of the date first above written.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nDate: [Insert Date]\\n\\nParties: [University Name], [University Address] (\\\"Disclosing Party\\\") and [Researcher's Full Name], [Researcher's Address] (\\\"Receiving Party\\\").\\n\\n1. Purpose: For the research of [Briefly Describe the Research or Project].\\n\\n2. Confidential Information: Includes data, studies, reports, patents, and other valuable business-related material.\\n\\n3. Obligations:\\na. Confidential Information must remain secret.\\nb. It's for the specified purpose only.\\nc. No third-party disclosure without consent.\\n\\n4. Exceptions: Public knowledge, third-party shared info, or independently developed.\\n\\n5. Duration: ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:15-16" + }, + "141": { + "file_id": 6, + "content": "This code appears to be a non-disclosure agreement (NDA) between a researcher and a university. The NDA outlines the terms of confidentiality, purpose, obligations, exceptions, and duration for sharing research-related materials. It also mentions the possibility of amending the agreement in writing and signed by both parties, and that this agreement contains the entire understanding between them, superseding any prior agreements.", + "type": "comment" + }, + "142": { + "file_id": 6, + "content": "Confidentiality lasts [X years, e.g., 2 years] from disclosure.\\n\\n6. Return: All Confidential Information must be returned or destroyed upon request.\\n\\n7. No Transfer: This doesn't grant property rights or licenses.\\n\\n8. Law: Governed by [State/Country] laws.\\n\\n9. Amendments: Only in writing and signed.\\n\\nAgreement: By signing, parties agree to the terms.\",\n \"UNIVERSITY-BUSINESS COOPERATION AGREEMENT\\n\\nThis Cooperation Agreement (the \\\"Agreement\\\") is made and entered into on [Insert Date], by and between [Business Name], a [business type, e.g., \\\"corporation\\\"] located at [Business Address], hereinafter referred to as the \\\"Business', and [University Name], a higher education institution located at [University Address], hereinafter referred to as the \\\"University\\\".\\n\\n1. Objective:\\n\\nThe purpose of this Agreement is to define the terms under which the Business and the University will cooperate to [Objective e.g., \\\"jointly conduct research, promote innovation, and facilitate", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:16-17" + }, + "143": { + "file_id": 6, + "content": "This code represents a legal agreement between a business and a university, outlining the terms of their cooperation for research, innovation, and facilitation. The agreement includes provisions regarding confidentiality, return/destruction of information, transfer restrictions, governing law, amendment process, and signing parties' agreement to the terms.", + "type": "comment" + }, + "144": { + "file_id": 6, + "content": " the exchange of knowledge in the field of _________\\\"].\\n\\n2. Scope of Cooperation:\\n\\na. Research Collaborations: Joint research initiatives, sharing of resources, and publications.\\n\\nb. Internships and Placements: Facilitation of student internships, projects, and job placements.\\n\\nc. Seminars and Workshops: Organizing joint seminars, conferences, and workshops.\\n\\nd. Facilities and Resource Sharing: Providing access to labs, equipment, libraries, etc.\\n\\n3. Intellectual Property:\\n\\nAll intellectual property developed jointly will be shared, and a separate agreement will detail the rights, ownership, and any revenue distribution.\\n\\n4. Funding and Resources:\\n\\nBoth parties agree to jointly contribute [Specify Amount or Percentage], and additional fund sourcing details will be determined on a project-by-project basis.\\n\\n5. Confidentiality:\\n\\nBoth parties agree to maintain the confidentiality of shared proprietary information.\\n\\n6. Duration and Termination:\\n\\nThis Agreement wi", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:17-17" + }, + "145": { + "file_id": 6, + "content": "This code is outlining the terms and conditions for a research collaboration between two organizations, including areas of cooperation, intellectual property sharing, funding contributions, confidentiality, and agreement duration.", + "type": "comment" + }, + "146": { + "file_id": 6, + "content": "ll remain in effect for [e.g., \\\"three years\\\"] from the date of signing, unless terminated earlier by either party with [e.g., \\\"30 days\\\"] written notice.\\n\\n7. Dispute Resolution:\\n\\nAny disputes arising from this Agreement will first attempt resolution through mediation. If unresolved, disputes will be subject to the jurisdiction of [State/Country].\\n\\n8. Amendments:\\n\\nChanges to this Agreement must be in writing and signed by both parties.\\n\\n9. Liability:\\n\\nEach party assumes responsibility for its actions and is not liable for the actions of the other party.\\n\\n10. Governing Law:\\n\\nThis Agreement is governed by the laws of [State/Country].\\n\\nIN WITNESS WHEREOF, both parties have executed this Agreement as of the date first mentioned above.\",\n \"SUPPLY AGREEMENT FOR UNIVERSITY LABORATORY\\n\\nThis Supply Agreement (the \\\"Agreement\\\"), made as of [Insert Date], is entered into by and between [Supplier Name], a [business entity type, e.g., \\\"corporation\\\"] having its principal o", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:17-18" + }, + "147": { + "file_id": 6, + "content": "This is a sample Supply Agreement for a university laboratory between the Supplier and the University. The agreement outlines terms, dispute resolution, amendments, liability, and governing law.", + "type": "comment" + }, + "148": { + "file_id": 6, + "content": "ffice at [Supplier Address], hereinafter referred to as the \\\"Supplier', and [University Name], a higher education institution located at [University Address], acting through its [specific department or laboratory, e.g., \\\"Department of Chemistry\\\"], hereinafter referred to as the \\\"University\\\".\\n\\n1. Purpose:\\n\\nThe Supplier agrees to provide specific products/materials/equipment, as detailed in Annex A, to the University for use in its laboratory.\\n\\n2. Terms of Supply:\\n\\na. Description of Goods: The goods to be supplied are detailed in Annex A attached herewith.\\n\\nb. Delivery: Goods will be delivered to [University Address or specific lab address] within [specific timeframe].\\n\\nc. Pricing: The price for the goods is set out in Annex A and includes all packaging, transportation, and delivery costs unless otherwise specified.\\n\\n3. Payment Terms:\\n\\nPayments will be made by the University within [e.g., \\\"30 days\\\"] of receiving the invoice from the Supplier.\\n\\n4. Warranty:\\n\\nThe", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:18-18" + }, + "149": { + "file_id": 6, + "content": "The code outlines an agreement between a Supplier and a University for the provision of specific products/materials/equipment to be used in the University's laboratory, with details outlined in Annex A. It covers terms of supply, payment terms, and warranty information.", + "type": "comment" + }, + "150": { + "file_id": 6, + "content": " Supplier warrants that all goods supplied under this Agreement will be free from defects for a period of [specific duration, e.g., \\\"12 months\\\"] from the date of delivery.\\n\\n5. No Disclosure Clause:\\n\\na. The University agrees not to disclose, reproduce, or distribute any proprietary information, trade secrets, or other confidential details related to the products/materials/equipment provided by the Supplier without the Supplier's prior written consent.\\n\\nb. This clause remains effective for a period of [e.g., \\\"5 years\\\"] from the date of the last delivery of the goods under this Agreement.\\n\\n6. Termination:\\n\\nEither party may terminate this Agreement with [e.g., \\\"30 days\\\"] written notice if the other party breaches any term of this Agreement and fails to remedy such breach within the notice period.\\n\\n7. Governing Law:\\n\\nThis Agreement shall be governed by and interpreted in accordance with the laws of [State/Country].\\n\\n8. Amendments:\\n\\nModifications to this Agreement mus", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:18-18" + }, + "151": { + "file_id": 6, + "content": "This code provides a template for an agreement between a supplier and the University, outlining warranties on goods, confidentiality terms, termination conditions, governing law, and amendment procedures.", + "type": "comment" + }, + "152": { + "file_id": 6, + "content": "t be in writing and signed by both parties.\\n\\nIN WITNESS WHEREOF, the parties hereto have executed this Supply Agreement as of the date first above written.\",\n \"LABORATORY SUPPLY AGREEMENT\\n\\nDate: [Insert Date]\\n\\nParties:\\n\\n Supplier: [Supplier Name], [Supplier Address]\\n University: [University Name], [University Address]\\n\\n1. Purpose: Supplier will provide goods as listed in Annex A to the University's laboratory.\\n\\n2. Delivery: Within [specific timeframe, e.g., \\\"30 days\\\"] to [specific lab address].\\n\\n3. Payment: University will pay within [e.g., \\\"30 days\\\"] of invoice receipt.\\n\\n4. Warranty: Goods are defect-free for [e.g., \\\"12 months\\\"] from delivery.\\n\\n5. Non-disclosure: University will not disclose Supplier\\u2019s proprietary details for [e.g., \\\"5 years\\\"]. Breach will result in a penalty of [specific amount or formula, e.g., \\\"$5,000 per incident or actual damages, whichever is greater\\\"].\\n\\n6. Termination: [e.g., \\\"30 days\\\"] notice for breaches unresolve", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:18-19" + }, + "153": { + "file_id": 6, + "content": "This code represents a template for a Laboratory Supply Agreement, including sections on purpose, delivery, payment, warranty, non-disclosure, and termination. It provides a framework for suppliers and universities to establish contractual agreements regarding the supply of goods to laboratories.", + "type": "comment" + }, + "154": { + "file_id": 6, + "content": "d within said period.\\n\\n7. Law: Governed by [State/Country] laws.\\n\\n8. Amendments: Both parties must sign written changes.\",\n \"FREELANCER AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Client: [Client Full Name or Company Name], located at [Client Address].\\n Freelancer: [Freelancer Full Name], located at [Freelancer Address].\\n\\n1. SERVICES:\\n\\nFreelancer agrees to provide the following services: [Brief description of services, e.g., \\\"web design, content creation, graphic design\\\"].\\n\\n2. PAYMENT TERMS:\\n\\nFor the services rendered, Client agrees to pay Freelancer a total of [Total Amount, e.g., \\\"$1,000\\\"]. Payments shall be made as follows: [Payment structure, e.g., \\\"50% upfront, 50% upon completion\\\"].\\n\\n3. DEADLINE:\\n\\nThe services will be completed by [End Date, e.g., \\\"December 31, 2023\\\"].\\n\\n4. CONFIDENTIALITY:\\n\\nFreelancer agrees to maintain confidentiality regarding all proprietary information of the Client.\\n\\n5. TERMINATION:\\n\\nEither party may terminate", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:19-20" + }, + "155": { + "file_id": 6, + "content": "This code represents a sample freelance agreement document with various sections such as services provided, payment terms, deadline, confidentiality, and termination. It is governed by the laws of a specific state/country and allows for amendments only through written changes signed by both parties.", + "type": "comment" + }, + "156": { + "file_id": 6, + "content": " this agreement with [X days, e.g., \\\"14 days\\\"] written notice. Upon termination, payments will be adjusted for work completed.\\n\\n6. INDEPENDENT CONTRACTOR:\\n\\nFreelancer is an independent contractor and not an employee of the Client. No benefits, rights, or obligations of employment are conferred by this agreement.\\n\\n7. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of New York\\\"].\\n\\n8. AMENDMENTS:\\n\\nAny changes to this agreement must be in writing and signed by both parties.\",\n \"FREELANCER AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Client: [Client Full Name or Company Name], located at [Client Address].\\n Freelancer: [Freelancer Full Name], located at [Freelancer Address].\\n\\n1. SERVICES:\\n\\nFreelancer agrees to provide the following services: [Brief description of services, e.g., \\\"web design, content creation, graphic design\\\"].\\n\\n2. PAYMENT TERMS:\\n\\nFor the services rendered, Client agrees to pay Freel", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:20-21" + }, + "157": { + "file_id": 6, + "content": "This is a contractual agreement between a client and a freelancer, detailing the services provided by the freelancer, payment terms, termination notice period, independent contractor status, governing law, and amendment requirements.", + "type": "comment" + }, + "158": { + "file_id": 6, + "content": "ancer a total of [Total Amount, e.g., \\\"$1,000\\\"]. Payments shall be made as follows: [Payment structure, e.g., \\\"50% upfront, 50% upon completion\\\"].\\n\\n3. DEADLINE:\\n\\nThe services will be completed by [End Date, e.g., \\\"December 31, 2023\\\"].\\n\\n4. PENALTIES:\\n\\na. Late Delivery: If Freelancer fails to deliver the completed service by the specified deadline, a penalty of [specific amount, e.g., \\\"$50\\\"] per day will be deducted from the final payment until the service is delivered.\\n\\nb. Confidentiality Breach: Breaching the confidentiality clause will result in a penalty of [specific amount, e.g., \\\"$2,000\\\"].\\n\\n5. CONFIDENTIALITY:\\n\\nFreelancer agrees to maintain confidentiality regarding all proprietary information of the Client.\\n\\n6. TERMINATION:\\n\\nEither party may terminate this agreement with [X days, e.g., \\\"14 days\\\"] written notice. Upon termination, payments will be adjusted for work completed.\\n\\n7. INDEPENDENT CONTRACTOR:\\n\\nFreelancer is an independent contractor and ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:21-21" + }, + "159": { + "file_id": 6, + "content": "This code is a template for a legal agreement between a client and freelancer, outlining payment terms, deadlines, penalties for late delivery or confidentiality breach, confidentiality agreement, termination clause, and the status of the freelancer as an independent contractor.", + "type": "comment" + }, + "160": { + "file_id": 6, + "content": "not an employee of the Client. No benefits, rights, or obligations of employment are conferred by this agreement.\\n\\n8. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of New York\\\"].\\n\\n9. AMENDMENTS:\\n\\nAny changes to this agreement must be in writing and signed by both parties.\",\n \"This document outlines the terms of cooperation between Company A and Company B for a joint research project. The duties of each company are designated, with a detailed financial contribution outlined in Appendix A. Confidentiality is strictly enforced, and any intellectual property created will be jointly owned. All published findings will be reviewed by both parties for protection of proprietary information. Termination of this agreement requires 30 days' written notice, and each party assumes any risks or liabilities during this collaboration. Amendments must be in writing and signed by both parties. The duration of the agreement lasts from the s", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:21-22" + }, + "161": { + "file_id": 6, + "content": "This code outlines an agreement between Company A and Company B for a joint research project, detailing duties, financial contributions, confidentiality, intellectual property ownership, termination terms, risks and liabilities, amendment requirements, and the duration of the agreement.", + "type": "comment" + }, + "162": { + "file_id": 6, + "content": "tart date to the end date, unless extended. By signing, both parties acknowledge and agree to these terms.\",\n \"BUSINESS AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Business B: [Full Legal Name of Business B], located at [Business B Address].\\n\\n1. PURPOSE:\\n\\nThis Agreement outlines the terms of the collaboration/project/service between Business A and Business B regarding [Brief Description of the Collaboration/Project/Service].\\n\\n2. TERMS OF SERVICE:\\n\\n Business A agrees to: [Specific tasks/responsibilities, e.g., \\\"Supply 500 units of Product X monthly.\\\"].\\n Business B agrees to: [Specific tasks/responsibilities, e.g., \\\"Pay $50 per unit of Product X within 30 days of delivery.\\\"].\\n\\n3. PAYMENT TERMS:\\n\\nPayments shall be made as follows: [Payment structure, e.g., \\\"Payment due within 30 days of invoice.\\\"].\\n\\n4. CONFIDENTIALITY:\\n\\nBoth parties commit to maintaining confidentia", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:22-23" + }, + "163": { + "file_id": 6, + "content": "Code snippet represents the start of a business agreement document, with fields for effective date and names and addresses of the businesses involved. It outlines the purpose of collaboration, terms of service, payment terms, and confidentiality commitments made by both parties upon signing.", + "type": "comment" + }, + "164": { + "file_id": 6, + "content": "lity regarding all proprietary information exchanged during this agreement.\\n\\n5. TERMINATION:\\n\\nEither party may terminate this Agreement with [X days, e.g., \\\"30 days\\\"] written notice. If breached, the aggrieved party may seek remedies as per governing laws.\\n\\n6. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n7. AMENDMENTS:\\n\\nModifications to this Agreement must be in writing and signed by both parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBy signing below, both parties affirm their understanding and acceptance of this Business Agreement.\",\n \"CONFIDENTIALITY:\\n\\n4.1. Confidential Information: For the purposes of this Agreement, \\\"Confidential Information\\\" refers to any data or information, regardless of its form, proprietary to or maintained as confidential by either party, which is not publicly known and which is disclosed during the term of this Agreement or in relation to the collaboration/project/service.\\n\\n4.", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:23-24" + }, + "165": { + "file_id": 6, + "content": "This code is a Business Agreement template containing sections for confidentiality, termination, governing law, amendments, and acknowledgement. The agreement states that all proprietary information exchanged must be kept confidential, and the agreement can be terminated with X days notice. It's governed by specific country/state laws and any modifications must be in writing signed by both parties.", + "type": "comment" + }, + "166": { + "file_id": 6, + "content": "2. Protection and Non-Disclosure: Both parties agree to use the Confidential Information solely for the purposes of the Agreement and will exert reasonable efforts to prevent the unauthorized disclosure or use of the Confidential Information. Neither party shall disclose, reproduce, or distribute any portion of the Confidential Information without the disclosing party's prior written consent.\\n\\n4.3. Exclusions: Confidential Information shall not include any data or information which:\\n\\n Is or becomes publicly known through no wrongful act of the receiving party;\\n Is independently developed by the receiving party without the use of the Confidential Information;\\n Is rightfully received from a third party without any obligation of confidentiality;\\n Is disclosed under legal requirement or order.\\n\\n4.4. Return or Destruction: Upon the termination of this Agreement, or at the request of the disclosing party, the receiving party shall return all copies of the Confidential In", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:24-24" + }, + "167": { + "file_id": 6, + "content": "This code is from a legal agreement, specifically addressing the protection and non-disclosure of confidential information between two parties. It outlines the exclusions for what constitutes as confidential information and stipulates that upon agreement termination or request, all copies must be returned.", + "type": "comment" + }, + "168": { + "file_id": 6, + "content": "formation to the disclosing party or certify in writing that it has destroyed all such copies.\\n\\n4.5. Duration: The obligations set forth in this Confidentiality section shall survive the termination or expiration of this Agreement for a period of [specific time, e.g., \\\"five years\\\"].\",\n \"LOYALTY AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Party A: [Full Legal Name of Party A], located at [Party A Address].\\n Party B: [Full Legal Name of Party B], located at [Party B Address].\\n\\n1. LOYALTY COMMITMENT:\\n\\nBoth parties acknowledge the mutual value of their business relationship. They commit to work in good faith, ensuring a collaborative environment that prioritizes trust, loyalty, and shared objectives.\\n\\n2. NON-POACHING OF EMPLOYEES:\\n\\nFor the duration of this Agreement and [specific time after termination, e.g., \\\"for 12 months following its termination\\\"], neither Party A nor Party B shall, without the prior written consent of the other party:\\n\\na. Directly or i", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:24-25" + }, + "169": { + "file_id": 6, + "content": "This code contains a confidentiality agreement and a loyalty agreement, including details such as effective dates, parties involved, duration of obligations, non-poaching clauses, and survival of obligations after the termination of the agreement.", + "type": "comment" + }, + "170": { + "file_id": 6, + "content": "ndirectly solicit, induce, or encourage any employees of the other party to terminate their employment or to engage in employment or other services elsewhere.\\nb. Hire, employ, or contract the services of any employee of the other party who has been employed by the said party within the last 12 months.\\n\\n3. BREACH:\\n\\nAny violation of the clauses in this Agreement will be deemed a material breach and may result in legal action or other remedies as available by law.\\n\\n4. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n5. AMENDMENTS:\\n\\nAny modifications to this Agreement must be in writing and signed by both parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBy signing below, both parties affirm their understanding and acceptance of this Loyalty Agreement.\",\n \"NON-COMPETE AND LOYALTY AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Busines", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:25-26" + }, + "171": { + "file_id": 6, + "content": "Non-compete and loyalty agreement between Business A and another party, outlining restrictions on soliciting employees, hiring former employees, breach consequences, governing law, amendment rules, and acknowledgment of understanding.", + "type": "comment" + }, + "172": { + "file_id": 6, + "content": "s B: [Full Legal Name of Business B], located at [Business B Address].\\n\\n1. PURPOSE:\\n\\nThis Agreement is designed to protect the proprietary and business interests of both parties by ensuring loyalty and preventing competition during and after the period of collaboration or engagement.\\n\\n2. NON-COMPETE:\\n\\nFor the duration of this Agreement and [specific time after termination, e.g., \\\"for 24 months following its termination\\\"], neither party shall:\\n\\na. Engage in or support any venture that directly competes with the core business of the other party within [specific geographical region, e.g., \\\"the State of California\\\"].\\nb. Invest in, partner with, or advise any business entity that competes directly with the other party.\\n\\n3. LOYALTY AND NON-POACHING:\\n\\nBoth parties pledge their commitment to a loyal business relationship. Specifically:\\n\\na. Neither party will, without the prior written consent of the other, solicit, induce, or encourage any employees or contractors of the o", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:26-26" + }, + "173": { + "file_id": 6, + "content": "This code is a legal agreement ensuring loyalty and preventing competition between two parties during and after collaboration or engagement, with sections for non-compete, non-poaching, and loyalty.", + "type": "comment" + }, + "174": { + "file_id": 6, + "content": "ther party to terminate their engagement or to join another business.\\nb. Neither party shall disparage or encourage others to disparage the other party, its products, services, or its employees.\\n\\n4. CONFIDENTIALITY:\\n\\nBoth parties agree to maintain confidentiality regarding any proprietary or business-sensitive information exchanged during the course of this Agreement, ensuring that such information isn't disclosed without the explicit consent of the party owning that information.\\n\\n5. BREACH AND REMEDIES:\\n\\nA violation of any provision in this Agreement will be deemed a significant breach. The aggrieved party shall be entitled to seek injunctive relief, damages, or any other remedies available under the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n6. GOVERNING LAW:\\n\\nThis Agreement shall be governed by and interpreted in accordance with the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n7. AMENDMENTS:\\n\\nModifications or amend", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:26-26" + }, + "175": { + "file_id": 6, + "content": "This code snippet represents a legal agreement between two parties, outlining the terms and conditions of their engagement. It includes clauses on termination, confidentiality, breach remedies, governing law, and amendments.", + "type": "comment" + }, + "176": { + "file_id": 6, + "content": "ments to this Agreement must be in writing and duly signed by authorized representatives of both parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBy signing below, representatives from both businesses affirm their understanding and acceptance of this Non-Compete and Loyalty Agreement.\",\n \"AMENDMENT TO CONTRACT: LENGTH OF ENGAGEMENT\\n\\nThis Amendment is made on [Date], and amends the Non-Compete and Loyalty Agreement dated [Original Agreement Date] between:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Business B: [Full Legal Name of Business B], located at [Business B Address].\\n\\nAMENDMENT:\\n\\nThe parties hereby agree to amend the Non-Compete and Loyalty Agreement as follows:\\n\\nSection [Specific Section Number, e.g., \\\"2\\\"] - Length of Engagement\\n\\nThe period of engagement between Business A and Business B as stipulated in the original Agreement is hereby extended/shortened/set to commence from [New Start Date] and conclude on [New End Date].\\n\\nGENERAL ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:26-27" + }, + "177": { + "file_id": 6, + "content": "This code represents an amendment to a Non-Compete and Loyalty Agreement between two businesses, specifically changing the length of engagement. It includes the updated agreement details, such as dates, business names, and locations. The amendment is made on a specific date and modifies the original agreement's section related to the duration of engagement.", + "type": "comment" + }, + "178": { + "file_id": 6, + "content": "PROVISIONS:\\n\\n Except as specifically amended herein, all other terms and conditions of the original Agreement remain unchanged and in full effect.\\n This Amendment, together with the original Agreement, represents the entire understanding between the parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBoth parties, by their signatures below, indicate their acceptance and approval of this Amendment.\",\n \"AMENDMENT TO CONTRACT: FEES FOR LATE DELIVERY\\n\\nThis Amendment is made on [Date], and amends the Agreement dated [Original Agreement Date] between:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Business B: [Full Legal Name of Business B], located at [Business B Address].\\n\\nAMENDMENT:\\n\\nThe parties hereby agree to amend the Agreement as follows:\\n\\nSection [Specific Section Number, e.g., \\\"3\\\"] - Fees for Late Delivery\\n\\na. If Business A/B fails to deliver the products/services by the agreed-upon deadline, a late fee of [Specific Amount or Percentage,", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:27-28" + }, + "179": { + "file_id": 6, + "content": "This code is for creating an amendment to a contract, specifically for adjusting the fees for late delivery in case of delayed product/service delivery by either Business A or B. The original agreement and this amendment together represent the entire understanding between the parties involved. Both parties acknowledge and approve the amendment with their signatures.", + "type": "comment" + }, + "180": { + "file_id": 6, + "content": " e.g., \\\"$100\\\" or \\\"5% of the total contract value\\\"] shall be applied for each [time period, e.g., \\\"day\\\"] of delay.\\n\\nb. The total late fees shall not exceed [Specific Maximum Amount or Percentage, e.g., \\\"$1,000\\\" or \\\"20% of the total contract value\\\"].\\n\\nc. The fees will be deducted from the final payment or invoiced separately, as deemed appropriate by the non-defaulting party.\\n\\nGENERAL PROVISIONS:\\n\\n Except as specifically amended herein, all other terms and conditions of the original Agreement remain unchanged and in full effect.\\n This Amendment, together with the original Agreement, represents the entire understanding between the parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBoth parties, by their signatures below, indicate their acceptance and approval of this Amendment.\",\n \"AMENDMENT TO CONTRACT\\n\\nThis Amendment (the \\\"Amendment\\\") is entered into on [Date], between [Party One Name], hereinafter referred to as the \\\"First Party', and [Party Two Name], hereinafter referred", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:28-29" + }, + "181": { + "file_id": 6, + "content": "This code defines an amendment to a contract, specifying late fees for delayed payments, maximum limits, deduction methods, and preserves original agreement terms. It also includes acknowledgement from both parties upon signing.", + "type": "comment" + }, + "182": { + "file_id": 6, + "content": " to as the \\\"Second Party'', collectively referred to as the \\\"Parties\\\".\\n\\nWHEREAS, the Parties entered into a contract dated [Original Contract Date], hereinafter referred to as the \\\"Original Contract', for [Brief Description of the Original Contract, e.g., \\\"provision of IT services to First Party\\\"];\\n\\nWHEREAS, the Parties now wish to amend the Original Contract to add additional responsibilities pertaining to the maintenance of existing IT systems;\\n\\nNOW, THEREFORE, in consideration of the mutual covenants contained herein and for other good and valuable consideration, the receipt and sufficiency of which is hereby acknowledged, the Parties agree as follows:\\n\\n Additional Responsibility:\\n\\n The Second Party shall assume the responsibility of maintaining and ensuring the smooth functioning of the existing IT systems of the First Party. This responsibility includes, but is not limited to:\\n\\n a. Regular monitoring of the IT systems for any anomalies or issues.\\n\\n ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:29-29" + }, + "183": { + "file_id": 6, + "content": "The code is a legal document amendment, referring to the Original Contract, adding additional responsibilities for maintaining and ensuring the smooth functioning of existing IT systems.", + "type": "comment" + }, + "184": { + "file_id": 6, + "content": "b. Prompt troubleshooting and rectification of any issues identified.\\n\\n c. Routine updates and patches to ensure the systems are secure and up-to-date.\\n\\n d. Any other related tasks as deemed necessary by the First Party.\\n\\n Compensation:\\n\\n As a result of this additional responsibility, the Parties agree to a revised compensation of [New Compensation Details, e.g., \\\"$XXX per month\\\"]. All other payment terms as outlined in the Original Contract shall remain unchanged.\\n\\n Duration and Termination:\\n\\n The duration and termination clauses from the Original Contract shall remain applicable to this Amendment unless otherwise agreed upon in writing by the Parties.\\n\\n Miscellaneous:\\n\\n All other terms and conditions of the Original Contract, which are not specifically amended by this Amendment, shall remain in full force and effect. In the event of a conflict between this Amendment and the Original Contract, the terms of this Amendment shall prevail.\\n\\n ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:29-29" + }, + "185": { + "file_id": 6, + "content": "This code snippet outlines amendment details for a contract. It includes revised responsibilities such as prompt troubleshooting, routine updates and patches, additional tasks, new compensation terms, duration and termination clauses remaining the same unless agreed upon in writing, and existing terms not affected unless in conflict with the amendment.", + "type": "comment" + }, + "186": { + "file_id": 6, + "content": "Entire Agreement:\\n\\n This Amendment, along with the Original Contract, constitutes the entire agreement between the Parties and supersedes any prior understandings, written or oral, relating to the subject matter of this Amendment.\\n\\n Governing Law:\\n\\n This Amendment shall be governed by the laws of [Jurisdiction, e.g., \\\"State of New York\\\"].\",\n \"This appendix, part of the Contract between Party One and Party Two, sets milestones and deadlines for Party Two. \\n\\nMilestone 1 involves tasks such as gathering requirements, designing user interface etc. with the objective of developing initial prototype of a software application. Delivery Deadline: September 15, 2023. \\n\\nMilestone 2 involves tasks like incorporating feedback and conducting beta testing, aiming at finalizing and testing the software application. Delivery Deadline: October 15, 2023. \\n\\nEach milestone's completion will be reviewed by Party One; if requirements aren't met, Party Two may correct and resubmit. P", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:29-30" + }, + "187": { + "file_id": 6, + "content": "This code section contains legal terms and conditions, specifically an amendment to a contract with governing law and milestones for software development.", + "type": "comment" + }, + "188": { + "file_id": 6, + "content": "ayment terms and penalties are outlined in the main Contract. This appendix is executed adhering to main Contract's terms and conditions.\",\n \"APPENDIX B \\u2013 CHANGE IN TIME OF DELIVERY\\n\\nThis Appendix is an addendum to the contract (the \\\"Contract\\\") dated [Original Contract Date], entered into between [Party One Name], hereinafter referred to as the \\\"First Party', and [Party Two Name], hereinafter referred to as the \\\"Second Party\\\". The purpose of this Appendix is to amend and modify the delivery time as specified in the original Contract.\\n\\n Original Delivery Time: As per the terms of the original Contract, the delivery time was set for [Original Delivery Date, e.g., \\\"September 15, 2023\\\"].\\n\\n Revised Delivery Time: The Parties, through mutual agreement, have now decided to amend the delivery time. The new delivery date shall be [Revised Delivery Date, e.g., \\\"October 10, 2023\\\"].\\n\\n Reason for Change: [Provide a brief explanation for the change in delivery time, ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:30-31" + }, + "189": { + "file_id": 6, + "content": "This code defines an appendix to a contract that modifies the delivery time due to mutual agreement between parties. The original and revised delivery dates are provided, along with a brief explanation for the change.", + "type": "comment" + }, + "190": { + "file_id": 6, + "content": "e.g., \\\"Due to unforeseen challenges in the production process, additional time is required to ensure that the deliverables meet the agreed-upon quality standards.\\\"]\\n\\n Consequences of Delay: Unless otherwise stated in the main body of the Contract:\\n\\n a. If the Second Party fails to meet the revised delivery time, penalties or consequences as outlined in the original Contract for late delivery will apply from the revised delivery date.\\n\\n b. All other terms related to late delivery, including but not limited to penalties, refunds, or rights to terminate, remain effective and unchanged by this Appendix.\\n\\n Prevailing Terms: All other terms and conditions of the original Contract not specifically amended by this Appendix shall remain in full force and effect. In the event of any inconsistency or conflict between the original Contract and this Appendix, the terms of this Appendix shall prevail with respect to the change in the delivery time.\\n\\n Acknowledgment: By sig", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:31-31" + }, + "191": { + "file_id": 6, + "content": "The code defines consequences and prevailing terms for a contract's delayed delivery, with the Appendix taking precedence in case of inconsistencies.", + "type": "comment" + }, + "192": { + "file_id": 6, + "content": "ning this Appendix, the Parties acknowledge and agree to the revised delivery time and any associated consequences of delays.\\n\\nThis Appendix is executed as an acknowledgment and agreement to the revised delivery time and shall be considered an integral part of the original Contract.\",\n \"APPENDIX C \\u2013 ADDITIONAL CONFIDENTIAL INFORMATION\\n\\nThis Appendix is an extension of the contract (the \\\"Contract\\\") dated [Original Contract Date], between [Party One Name] (\\\"First Party\\\") and [Party Two Name] (\\\"Second Party\\\"). It outlines additional categories of confidential information beyond those detailed in the Contract.\\n\\n Additional Confidential Information Includes:\\n\\n a. Non-public financial data.\\n\\n b. Unpublished marketing strategies and materials.\\n\\n c. Upcoming product or service details.\\n\\n d. Proprietary software codes and processes.\\n\\n e. Personnel records.\\n\\n f. Any data labeled as \\\"Confidential\\\" or \\\"Proprietary\\\" after the Contract\\u2019s e", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:31-32" + }, + "193": { + "file_id": 6, + "content": "This code appears to be extracting text chunks from a file named \"pure_documents.json\" at lines 30-31, possibly representing various appendices or amendments within an existing contract. These sections outline revised delivery times and additional confidential information categories beyond the original scope of the agreement.", + "type": "comment" + }, + "194": { + "file_id": 6, + "content": "xecution.\\n\\n Protection & Exclusions:\\n\\nBoth Parties shall extend the same protection to this Additional Confidential Information as previously agreed upon in the Contract. Information that becomes public, is received rightfully from a third party, is independently developed, or gets written release authorization is excluded from confidentiality obligations.\\n\\n Duration:\\n\\nThe confidentiality obligations for this Appendix shall persist as defined in the Contract or, if unspecified, for [e.g., \\\"five years\\\"] from the disclosure date.\\n\\n Prevailing Terms:\\n\\nIf there\\u2019s any conflict between this Appendix and the Contract concerning confidentiality, this Appendix takes precedence concerning Additional Confidential Information.\\n\\nExecuted as an integral part of the Contract.\",\n \"AMENDMENT TO NON-DISCLOSURE AGREEMENT\\n\\nThis Amendment (the \\u201cAmendment\\u201d) is made and entered into as of [Amendment Date], by and between [Party A Name], having an address at [Party ", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:32-33" + }, + "195": { + "file_id": 6, + "content": "This code defines a legal document containing confidentiality provisions, exclusions, duration, and precedence in case of conflict with the Contract.", + "type": "comment" + }, + "196": { + "file_id": 6, + "content": "A Address] (\\u201cParty A\\u201d), and [Party B Name], having an address at [Party B Address] (\\u201cParty B\\u201d), collectively referred to as the \\u201cParties.\\u201d\\n\\nRECITALS\\n\\nWHEREAS, the Parties entered into a Non-Disclosure Agreement dated [Original NDA Date] (the \\u201cOriginal Agreement\\u201d);\\n\\nWHEREAS, the Parties desire to amend the Original Agreement to extend the duration of certain restrictions therein;\\n\\nNOW, THEREFORE, in consideration of the mutual covenants and promises made by the Parties hereto, the Parties agree as follows:\\n\\n Extension of Time Restrictions: The time restriction set forth in Section [X] of the Original Agreement, currently stating a period of [Original Time, e.g., \\\"two (2) years\\\"], is hereby amended and extended to [New Time, e.g., \\\"five (5) years\\\"] from the date of disclosure of the Confidential Information.\\n\\n Full Force and Effect: Except as expressly modified by this Amendment, all terms, conditions, and provisions of the Or", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:33-33" + }, + "197": { + "file_id": 6, + "content": "This code represents a legal document outlining the terms of an amendment to an existing Non-Disclosure Agreement (NDA) between Party A and Party B. The amendment extends the duration of certain time restrictions within the original agreement, with new time specified in Section [X].", + "type": "comment" + }, + "198": { + "file_id": 6, + "content": "iginal Agreement shall remain in full force and effect. In the event of any conflict between the terms of this Amendment and the Original Agreement, the terms of this Amendment shall govern.\\n\\n Counterparts: This Amendment may be executed in counterparts, each of which shall be deemed an original and all of which together shall constitute one and the same instrument.\\n\\n Governing Law: This Amendment shall be governed by and construed in accordance with the laws of [Governing State or Country, e.g., \\\"the State of California\\\"], without regard to its conflict of laws principles.\\n\\nIN WITNESS WHEREOF, the Parties hereto have executed this Amendment as of the date first above written.\",\n \"BUSINESS COOPERATION AGREEMENT\\n\\nThis Agreement is between [Business A Name], at [Business A Address] (\\\"Business A\\\"), and [Business B Name], at [Business B Address] (\\\"Business B\\\"), effective [Day, Month, Year].\\n\\n1. Purpose:\\nBoth businesses will cooperate in [brief description, e.g., \\\"", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:33-34" + }, + "199": { + "file_id": 6, + "content": "This code represents an amendment to an original business agreement. It includes provisions for conflict resolution, counterparts execution, and governing law. The agreement is between two businesses - Business A and Business B, with a specified effective date.", + "type": "comment" + } +} \ No newline at end of file diff --git a/docs/data/2.json b/docs/data/2.json new file mode 100644 index 0000000..bc2cfc0 --- /dev/null +++ b/docs/data/2.json @@ -0,0 +1,548 @@ +{ + "200": { + "file_id": 6, + "content": "joint marketing\\\"].\\n\\n2. Responsibilities:\\n\\n Business A will: [Key obligation, e.g., \\\"Promote Business B in newsletters.\\\"]\\n Business B will: [Key obligation, e.g., \\\"Display Business A products.\\\"]\\n\\n3. Term:\\nEffective from the above date for [e.g., \\\"12 months\\\"]. Either party can terminate with [e.g., \\\"30 days\\\"] notice.\\n\\n4. Confidentiality:\\nConfidential information remains private, during and post-agreement.\\n\\n5. Governing Law:\\nGoverning laws of [State/Country, e.g., \\\"California\\\"].\\n\\n6. Amendments:\\nChanges must be written and signed by both parties.\",\n \"APPENDIX TO BUSINESS COOPERATION AGREEMENT\\n\\nEXTENSION OF CONFIDENTIALITY CONDITIONS\\n\\nThis Appendix is made as of [Day, Month, Year], and is appended to the Business Cooperation Agreement dated [Original Agreement Date] (\\\"Original Agreement\\\") between [Business A Name], located at [Business A Address] (\\\"Business A\\\") and [Business B Name], located at [Business B Address] (\\\"Business B\\\").\\n\\n1. Extensio", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:34-35" + }, + "201": { + "file_id": 6, + "content": "This code represents a business cooperation agreement between Business A and Business B, outlining their joint marketing responsibilities, term, confidentiality, governing law, and amendment processes. An appendix is also included to extend the confidentiality conditions of the original agreement.", + "type": "comment" + }, + "202": { + "file_id": 6, + "content": "n of Confidentiality Period:\\nThe confidentiality period stipulated in Section 4 (or the appropriate section number) of the Original Agreement is hereby extended. Previously set to expire [Original Expiry Date], it will now extend to [New Expiry Date].\\n\\n2. Continued Obligations:\\nAll other confidentiality obligations and conditions outlined in the Original Agreement remain unchanged and in full effect.\\n\\n3. Entire Agreement:\\nThis Appendix, in conjunction with the Original Agreement, constitutes the entire agreement between the parties regarding the subject matter herein.\\n\\n4. Governing Law:\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\nIN WITNESS WHEREOF, both parties hereto have executed this Appendix as of the date first above written.\",\n \"APPENDIX: LOYALTY CLAUSE\\n\\nEffective [Day, Month, Year], attached to the Agreement dated [Original Agreement Date] between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Loya", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:35-36" + }, + "203": { + "file_id": 6, + "content": "Section of code describes an amendment to a confidentiality agreement, extending the confidentiality period and keeping other obligations unchanged. It also outlines the governing law for the amendment.", + "type": "comment" + }, + "204": { + "file_id": 6, + "content": "lty Commitment:\\nFor one year from the Effective Date, both parties pledge loyalty by refraining from activities harmful or competitive to the other within the context of the Agreement.\\n\\n2. Consequences:\\nBreaches may result in Agreement termination and legal action as per the original terms.\\n\\n3. Governing Law:\\nGoverned by the laws of [State/Country, e.g., \\\"California\\\"].\",\n \"APPENDIX: CONFIDENTIALITY CLAUSE\\n\\nThis Appendix is appended to the B2B Contractor Agreement (\\\"Agreement\\\") dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"] between [Company Name], hereinafter referred to as \\\"Company', and [Contractor Name], hereinafter referred to as \\\"Contractor\\\".\\n\\n1. Confidentiality:\\n\\n1.1 Both Company and Contractor acknowledge that they may have access to or receive information during the term of the Agreement which is confidential to the disclosing party (\\\"Confidential Information\\\").\\n\\n1.2 Confidential Information shall not include information that:\\n\\n is or b", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:36-37" + }, + "205": { + "file_id": 6, + "content": "This code snippet represents a legal document with sections for commitment, consequences of breach, and governing law. The commitment section outlines loyalty pledge and restraints from competitive activities. The consequences section describes potential agreement termination and legal actions for breaches. Lastly, the governing law section states the applicable state or country's laws to govern the agreement.", + "type": "comment" + }, + "206": { + "file_id": 6, + "content": "ecomes public knowledge without breach of this clause;\\n was known by the receiving party before receipt from the disclosing party;\\n is received from a third party without breach of any obligation of confidentiality.\\n\\n1.3 The receiving party shall:\\n\\n use the Confidential Information only for performing under the Agreement;\\n take all reasonable precautions to prevent any unauthorized disclosure of the Confidential Information;\\n not disclose, reproduce, or distribute Confidential Information without the written consent of the disclosing party.\\n\\n2. Duration:\\n\\nThe obligations set forth in this Appendix shall continue for a period of [e.g., \\\"two years\\\"] from the date of termination or expiration of the Agreement.\\n\\n3. Return or Destruction:\\n\\nUpon the expiration or termination of the Agreement, or upon the disclosing party's request, the receiving party shall return or, if directed by the disclosing party, destroy all copies of the Confidential Information.\\n\\n", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:37-37" + }, + "207": { + "file_id": 6, + "content": "This code snippet contains a confidentiality agreement clause, which outlines the rules for handling and protecting sensitive information. The receiving party is required to use the Confidential Information only for performing under the Agreement, take precautions to prevent unauthorized disclosure, and obtain written consent before reproducing or distributing it. The obligations of this clause continue for a specified duration (e.g., two years) after the expiration or termination of the Agreement, and the receiving party must return or destroy all copies upon request or termination.", + "type": "comment" + }, + "208": { + "file_id": 6, + "content": "4. Governing Law:\\n\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the Agreement.\",\n \"APPENDIX: CONFIDENTIALITY CLAUSE\\n\\nThis Appendix is part of the Agreement dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"] between [Company Name] (\\\"Company\\\") and [Contractor Name] (\\\"Contractor\\\").\\n\\n1. Confidential Information:\\nBoth parties may access or receive the other's confidential information (\\\"Confidential Information\\\") during the Agreement term. Confidential Information excludes publicly known details, data known prior, or information obtained from third parties without confidentiality obligations.\\n\\n2. Obligations:\\nThe recipient shall:\\n\\n Use the Confidential Information solely for the Agreement's purpose.\\n Prevent unauthorized disclosures.\\n Not disclose without prior written consent.\\n\\n3. Duration:\\nObligations persist for [e.g., \\\"two years\\\"] post Agreement termination or expiration.\\n\\n4. Return/Des", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:37-38" + }, + "209": { + "file_id": 6, + "content": "This code snippet represents a Confidentiality Agreement between two parties, defining the scope of confidential information, obligations to protect it, and its duration post-agreement termination or expiration.", + "type": "comment" + }, + "210": { + "file_id": 6, + "content": "truction:\\nUpon Agreement conclusion, or on request, all Confidential Information copies should be returned or destroyed.\\n\\n5. Governing Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective [Effective Date, e.g., \\\"August 15, 2023\\\"], between [Tech Company Name], located at [Tech Company Address], (\\\"Company\\\") and [Contractor's Full Name], located at [Contractor Address], (\\\"Contractor\\\").\\n\\nPurpose:\\nContractor will access Company's confidential information during their engagement.\\n\\n1. Definition:\\n\\\"Confidential Information\\\" means proprietary data related to the Company\\u2019s business, excluding publicly known details, prior known information, or data from third parties without confidentiality bounds.\\n\\n2. Obligation:\\nContractor shall:\\n\\n Use Confidential Information solely for engagement purposes.\\n Prevent unauthorized disclosure.\\n\\n3. Duration:\\nObligations persist for [e.g., \\\"two years\\\"] from disclosure", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:38-39" + }, + "211": { + "file_id": 6, + "content": "Non-Disclosure Agreement (NDA) between a tech company and a contractor, effective on [Effective Date], defining Confidential Information, its use, protection, and obligation duration.", + "type": "comment" + }, + "212": { + "file_id": 6, + "content": " date.\\n\\n4. Return:\\nContractor shall return all Confidential Information items upon engagement completion or Company's request, retaining no copies.\\n\\n5. Remedies:\\nBreach may result in legal actions, damages, and costs.\\n\\n6. Governing Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"APPENDIX: EXTENSION OF CONTRACT DURATION\\n\\nThis Appendix is a part of the Agreement initially dated [Original Agreement Date, e.g., \\\"August 15, 2021\\\"], between [Party A Name], located at [Party A Address] (\\\"Party A\\\") and [Party B Name], located at [Party B Address] (\\\"Party B\\\").\\n\\n1. Duration Extension:\\nThe duration of the Agreement referenced above is hereby extended for an additional two (2) years from the original expiration date. With this extension, the new expiration date of the Agreement will be [New Expiration Date, e.g., \\\"August 15, 2025\\\"].\\n\\n2. All Other Terms Remain Unchanged:\\nExcept for the extension of the contract duration as described herein, all other term", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:39-40" + }, + "213": { + "file_id": 6, + "content": "Section describes terms for confidential information handling, remedies for breach, and governing law. Appendix extends contract duration by two years while keeping other terms unchanged.", + "type": "comment" + }, + "214": { + "file_id": 6, + "content": "s and conditions of the Agreement remain unchanged and in full effect.\\n\\n3. Entire Agreement:\\nThis Appendix, in conjunction with the original Agreement, constitutes the entire agreement between Party A and Party B. Any previous understandings, written or oral, relating to the subject of this Appendix are superseded by the terms herein.\\n\\n4. Governing Law:\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the original Agreement.\",\n \"CONFIDENTIALITY AGREEMENT\\n\\nEffective [Effective Date, e.g., \\\"August 15, 2023\\\"], between [Company Name], located at [Company Address] (\\\"Company\\\"), and [Supplier Name], located at [Supplier Address] (\\\"Supplier\\\").\\n\\n1. Definition:\\n\\\"Confidential Information\\\" means proprietary data of the Company, excluding:\\n\\n Pre-disclosed or publicly known data.\\n Info from third parties without confidentiality bounds.\\n\\n2. Obligations:\\nSupplier will:\\n\\n Use Confidential Information solely for b", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:40-41" + }, + "215": { + "file_id": 6, + "content": "This code snippet represents a confidentiality agreement between Company and Supplier, with details such as governing law, effective date, obligations of the Supplier regarding Confidential Information, and superseding previous understandings.", + "type": "comment" + }, + "216": { + "file_id": 6, + "content": "usiness purposes with the Company.\\n Protect its secrecy and prevent unauthorized disclosure.\\n Return or destroy all Confidential Information upon request or business completion.\\n\\n3. Duration:\\nObligations last for [e.g., \\\"two years\\\"] from disclosure date.\\n\\n4. Remedies:\\nBreaches may result in legal actions, damages, and costs by the Company.\\n\\n5. Governing Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"APPENDIX: BREACH CONSEQUENCES\\n\\nRelated to the Agreement on [Original Agreement Date, e.g., \\\"August 15, 2023\\\"], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Notification:\\nSuspected breaches must be reported in writing by the non-breaching party.\\n\\n2. Rectification:\\nThe breaching party has [e.g., \\\"14 days\\\"] from notification to rectify, unless irreparable.\\n\\n3. Fees:\\nBreaches incur a penalty of [e.g., \\\"$10,000\\\"], aside from claimed damages.\\n\\n4. Legal Actions:\\nUnresolved or damaging breaches may lead to lega", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:41-42" + }, + "217": { + "file_id": 6, + "content": "This code is part of a legal document agreement. It specifies the obligations, duration, remedies for breaches, and governing law in case of any violations. It also includes details about notifying suspected breaches, rectification timelines, fees for breaches, and potential legal actions if necessary.", + "type": "comment" + }, + "218": { + "file_id": 6, + "content": "l actions, including injunctive relief, damages, and legal fees.\\n\\n5. Termination:\\nRepeated or severe breaches can cause Agreement termination by the non-breaching party.\\n\\n6. Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"APPENDIX: TERMS OF CONTRACT TERMINATION\\n\\nRelated to the Agreement on [Original Agreement Date, e.g., \\\"August 15, 2023\\\"], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Termination for Breach:\\nIf either party breaches any conditions of the Agreement, the non-breaching party may terminate the Agreement immediately upon written notice to the breaching party.\\n\\n2. Termination by Notice:\\nEither party may terminate the Agreement for any reason by providing a written notice to the other party. The termination will become effective 30 days after the receipt of such notice.\\n\\n3. Obligations Upon Termination:\\nUpon termination, all rights and obligations under the Agreement will cease, except for those which by thei", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:42-43" + }, + "219": { + "file_id": 6, + "content": "This code defines contract termination clauses, including termination for breach, termination by notice, and obligations upon termination. It also specifies that the Agreement is governed by specific state or country laws.", + "type": "comment" + }, + "220": { + "file_id": 6, + "content": "r nature should survive termination (e.g., confidentiality, liability for prior breaches, etc.).\\n\\n4. Governing Law:\\nThis Appendix, and any disputes arising from it, will be governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the original Agreement.\",\n \"APPENDIX: OBLIGATIONS UPON TERMINATION\\n\\nPertaining to the Agreement dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Return of Property:\\nUpon termination, each party shall promptly return to the other all property, materials, and assets belonging to the other party, unless otherwise specified in the Agreement.\\n\\n2. Confidential Information:\\nBoth parties shall continue to abide by any confidentiality obligations set forth in the Agreement. Any confidential information must be returned or destroyed, as instructed by the owning party.\\n\\n3. Outstanding Payments:\\nAll due payments must be settled within [e.g., \\\"14 day", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:43-44" + }, + "221": { + "file_id": 6, + "content": "This code represents an Appendix titled \"Obligations Upon Termination\" in a legal agreement between Party A and Party B. It outlines the responsibilities of both parties, such as returning property, maintaining confidentiality, and settling outstanding payments upon termination of the agreement.", + "type": "comment" + }, + "222": { + "file_id": 6, + "content": "s\\\"] of termination, as per the terms of the original Agreement.\\n\\n4. Non-Disparagement:\\nBoth parties agree not to make any derogatory or disparaging statements about the other party post-termination.\\n\\n5. Survival of Provisions:\\nAny provisions in the Agreement that, by their nature, should persist beyond termination (e.g., indemnity, liability, confidentiality) will continue to be in effect.\\n\\n6. Notifications:\\nEach party must inform their respective stakeholders, if necessary, about the termination in a manner that maintains the goodwill and reputation of both parties.\\n\\n7. Transition Assistance:\\nTo ensure a smooth transition, both parties agree to cooperate, as reasonably requested by the other, for a period of [e.g., \\\"30 days\\\"] after termination.\\n\\n8. Governing Law:\\nThis Appendix is governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the original Agreement.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective [Date, e.g., \\\"August 15, 2023\\\"], be", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:44-45" + }, + "223": { + "file_id": 6, + "content": "Non-Disclosure Agreement (NDA) with termination, non-disparagement, survival of provisions, notifications, transition assistance, and governing law clauses.", + "type": "comment" + }, + "224": { + "file_id": 6, + "content": "tween [Client Name], (\\\"Client\\\") and [Business Name], (\\\"Business\\\").\\n\\nPurpose:\\nProtection of confidential information exchanged due to potential collaboration.\\n\\n1. Confidentiality:\\nBusiness agrees to keep secret all Confidential Information shared by Client.\\n\\n2. Definition:\\n\\\"Confidential Information\\\" is non-public data shared by either party, excluding info that's publicly available, already known, or received without confidentiality constraints.\\n\\n3. Duration:\\nObligations last [e.g., \\\"two years\\\"] from the date of disclosure.\\n\\n4. Return/Destruction:\\nUpon Client's request, Business will return or destroy all Confidential Information.\\n\\n5. Remedies:\\nUnauthorized disclosures may lead to legal action by Client, including damages.\\n\\n6. Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"IT SERVICES AGREEMENT\\n\\nEffective Date: [Date, e.g., \\\"August 15, 2023\\\"]\\n\\nParties:\\n\\n [Client Name], located at [Client Address] (\\\"Client\\\")\\n [Service Prov", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:45-46" + }, + "225": { + "file_id": 6, + "content": "The code is for a confidentiality agreement between a client and a business. It outlines the purpose, terms of confidentiality, definition of confidential information, duration of obligations, return/destruction process, legal remedies, and governing laws.", + "type": "comment" + }, + "226": { + "file_id": 6, + "content": "ider Name], located at [Service Provider Address] (\\\"Provider\\\")\\n\\nScope of Work:\\nProvider agrees to offer IT services, including [e.g., \\\"network setup, software installation, and routine maintenance\\\"], as detailed in Attachment A.\\n\\nPayment:\\nClient shall pay Provider [e.g., \\\"$1,000\\\"] per month. Invoices will be sent monthly and are due within [e.g., \\\"30 days\\\"].\\n\\nDuration:\\nThis Agreement starts on [Start Date] and ends on [End Date], unless terminated earlier.\\n\\nTermination:\\nEither party may terminate with [e.g., \\\"30 days\\\"] written notice. Upon termination, any unpaid fees for services rendered become immediately due.\\n\\nConfidentiality:\\nBoth parties agree to keep all business and technical information confidential.\\n\\nLimitation of Liability:\\nProvider's liability is limited to the amount paid by the Client for the specific service causing damage.\\n\\nGoverning Law:\\nThis Agreement is governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\nEntire Agreement:\\nThis constitutes the full agreement between both parties.\",", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:46-46" + }, + "227": { + "file_id": 6, + "content": "This code is defining the basic structure and content of a service agreement between a client and a service provider, including scope of work, payment terms, duration, termination clauses, confidentiality, limitation of liability, governing law, and stating that this constitutes the full agreement between both parties.", + "type": "comment" + }, + "228": { + "file_id": 6, + "content": " \"CONFIDENTIALITY AMENDMENT TO NDA\\n\\nThis Amendment, effective [Date, e.g., \\\"August 15, 2023\\\"], modifies the NDA dated [Original Agreement Date] between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Responsibilities:\\n\\na) Protection: Parties must safeguard Confidential Information at least as they do their own.\\n\\nb) Access: Access is limited to those needing it who are also bound by confidentiality.\\n\\nc) Breach Notification: Parties must immediately inform the other of any breaches.\\n\\nd) Return/Destruction: Upon request or agreement end, parties must return or certify the destruction of Confidential Information.\\n\\ne) No Reverse Engineering: Receiving party shall not reverse engineer any provided items.\\n\\n2. Remedies:\\nUnauthorized disclosures permit injunctive relief and other legal remedies.\\n\\n3. Original Agreement:\\nExcept for this Amendment, the NDA remains unchanged.\\n\\nGoverning Law:\\nAs per the NDA.\",\n \"LOYALTY AGREEMENT\\n\\nThis Agreement (\\\"", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:47-48" + }, + "229": { + "file_id": 6, + "content": "This code contains different types of legal documents including a Confidentiality Amendment to NDA and a Loyalty Agreement, which outline the terms between two parties.", + "type": "comment" + }, + "230": { + "file_id": 6, + "content": "Agreement\\\") is made as of [Date, e.g., \\\"August 15, 2023\\\"], between:\\n\\n [Party A Name], with its principal office at [Party A Address] (\\\"Party A\\\"),\\n [Party B Name], with its principal office at [Party B Address] (\\\"Party B\\\").\\n\\nPurpose:\\nThe parties wish to collaborate and establish a loyal relationship in their joint business endeavors.\\n\\n1. Loyalty Commitment:\\n\\na) Both parties commit to act in good faith and refrain from engaging in any activity or partnership that might conflict with the interests of the other party during the term of this Agreement.\\n\\nb) Neither party shall assist, collaborate, or engage with third parties that may cause harm or disrepute to the other party.\\n\\nc) Each party shall prioritize the other's interests in situations where opportunities arise from their collaboration.\\n\\n2. Non-Solicitation:\\nDuring the term of this Agreement, and for [e.g., \\\"one year\\\"] thereafter, neither party shall solicit or attempt to entice away any clients, cust", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:48-48" + }, + "231": { + "file_id": 6, + "content": "This code defines a contractual agreement between two parties (Party A and Party B) for collaboration, loyalty commitment, and non-solicitation. The agreement is made as of a specific date and aims to establish a loyal relationship in their joint business endeavors.", + "type": "comment" + }, + "232": { + "file_id": 6, + "content": "omers, or employees of the other party.\\n\\n3. Duration:\\nThis Agreement will begin on the Effective Date and remain in effect for [e.g., \\\"two years\\\"] unless terminated earlier by mutual consent.\\n\\n4. Termination:\\nEither party may terminate this Agreement with [e.g., \\\"30 days\\\"] written notice if the other party breaches any term herein.\\n\\n5. Confidentiality:\\nBoth parties agree to maintain the confidentiality of all proprietary or non-public information obtained during the collaboration.\\n\\n6. Governing Law:\\nThis Agreement is governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\n7. Entire Agreement:\\nThis document constitutes the full understanding between both parties, superseding all prior discussions, agreements, or understandings.\",\n \"BUSINESS CONSULTING CONTRACT\\n\\nThis Consulting Contract (\\\"Contract\\\") is made as of [Date, e.g., \\\"August 15, 2023\\\"], between:\\n\\n [Client Name], with its principal office at [Client Address] (\\\"Client\\\"),\\n [Consultant N", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:48-49" + }, + "233": { + "file_id": 6, + "content": "This is a business consulting contract between Client and Consultant. It includes clauses for scope of work, payment terms, duration, termination, confidentiality, governing law, and entire agreement.", + "type": "comment" + }, + "234": { + "file_id": 6, + "content": "ame], with its principal office at [Consultant Address] (\\\"Consultant\\\").\\n\\nPurpose:\\nThe Consultant will provide professional consulting services to the Client as described below.\\n\\n1. Scope of Services:\\nConsultant agrees to offer services including, but not limited to:\\na) Business strategy development\\nb) Market analysis\\nc) [Other services as needed]\\nAny additional services will require an amendment to this Contract.\\n\\n2. Compensation:\\nFor services rendered, the Client shall pay the Consultant [e.g., \\\"$100\\\"] per hour. Invoices will be issued [e.g., \\\"monthly\\\"] and are due within [e.g., \\\"30 days\\\"] of receipt.\\n\\n3. Duration:\\nThis Contract begins on [Start Date] and ends on [End Date], unless extended by mutual agreement or terminated earlier.\\n\\n4. Termination:\\nEither party can terminate this Contract with [e.g., \\\"30 days\\\"] written notice. In case of termination, the Client will pay for services rendered up to the notice date.\\n\\n5. Confidentiality:\\nThe Consultant sh", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:49-49" + }, + "235": { + "file_id": 6, + "content": "Consulting contract between Client and Consultant, outlines services provided, compensation terms, duration, termination conditions, and confidentiality agreement.", + "type": "comment" + }, + "236": { + "file_id": 6, + "content": "all maintain the confidentiality of all proprietary information received during the engagement, unless obligated by law to disclose.\\n\\n6. Non-compete:\\nFor [e.g., \\\"six months\\\"] after Contract termination, the Consultant agrees not to provide similar services to any direct competitor of the Client within [e.g., \\\"50 miles\\\"] of the Client's primary location.\\n\\n7. Independent Contractor:\\nThe Consultant is an independent contractor and not an employee of the Client.\\n\\n8. Governing Law:\\nThis Contract shall be governed by and interpreted under the laws of [State/Country, e.g., \\\"California\\\"].\\n\\n9. Entire Agreement:\\nThis Contract represents the entire understanding between both parties, superseding all prior negotiations, discussions, or agreements.\",\n \"APPENDIX A: CONFIDENTIALITY BREACH FEES\\n\\nThis Appendix is attached to and made part of the Contract (\\\"Original Contract\\\") dated [Original Contract Date], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:49-50" + }, + "237": { + "file_id": 6, + "content": "This code snippet represents an agreement between two parties, Party A and Party B, with sections covering confidentiality, non-compete clauses, independent contractor status, governing law, and entire agreement. It also references Appendix A regarding confidentiality breach fees.", + "type": "comment" + }, + "238": { + "file_id": 6, + "content": "\\n1. Purpose:\\nThis Appendix defines the fees and penalties associated with any breach of confidentiality as stipulated in the Original Contract.\\n\\n2. Confidentiality Breach Fee:\\nIn the event of a breach of the confidentiality provisions in the Original Contract by either party:\\n\\na) The breaching party will be liable for an immediate penalty of [specific amount, e.g., \\\"$10,000\\\"].\\n\\nb) If the breach results in any direct financial loss to the non-breaching party, the breaching party shall additionally reimburse the non-breaching party for the full amount of such loss.\\n\\nc) The breaching party will also bear all costs, including legal fees, that the non-breaching party incurs while addressing or remedying the breach.\\n\\n3. Payment Terms:\\nPayment of any penalty or reimbursement as defined above shall be made within [e.g., \\\"30 days\\\"] of written notification of the breach.\\n\\n4. Disputes:\\nAny disputes related to this Appendix shall be resolved as stipulated in the dispute resolu", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:50-50" + }, + "239": { + "file_id": 6, + "content": "This code defines the fees and penalties for breaching confidentiality in the Original Contract, including immediate penalty amounts, reimbursement for direct financial losses, and coverage of legal fees and costs. Payment terms are outlined as well, with disputes to be resolved according to the dispute resolution stipulations in the contract.", + "type": "comment" + }, + "240": { + "file_id": 6, + "content": "tion clause of the Original Contract.\\n\\n5. Continuation of Original Contract:\\nExcept as modified by this Appendix, the Original Contract remains in full force and effect.\\n\\n6. Governing Law:\\nThis Appendix, consistent with the Original Contract, is governed by the laws of [State/Country, e.g., \\\"California\\\"].\",\n \"APPENDIX A: STRICT CONFIDENTIALITY BREACH PENALTIES\\n\\nThis Appendix is annexed to the Contract dated [Original Contract Date] between [Party A Name] and [Party B Name].\\n\\n1. Breach Fees:\\nIf a party breaches confidentiality:\\n\\na) Immediate penalty: [e.g., \\\"$50,000\\\"].\\n\\nb) For reputational harm or business loss: Additional [e.g., \\\"$100,000\\\"].\\n\\nc) Full reimbursement for direct financial losses caused by the breach.\\n\\nd) All associated legal and remedy costs borne by the breaching party.\\n\\n2. Remedial Actions:\\nThe breaching party must swiftly rectify the breach, potentially including public apologies or recalling disclosed information.\\n\\n3. Payment:\\nDue withi", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:50-51" + }, + "241": { + "file_id": 6, + "content": "This code represents a legal document, specifically an appendix to a contract, which outlines strict confidentiality breach penalties. The appendix is attached to the Contract dated [Original Contract Date] between [Party A Name] and [Party B Name]. It states the immediate penalty upon breaching confidentiality, potential additional fees for reputational harm or business loss, full reimbursement for direct financial losses caused by the breach, all associated legal and remedy costs to be borne by the breaching party, and that the breaching party must swiftly rectify the breach. Payment is due within a certain period.", + "type": "comment" + }, + "242": { + "file_id": 6, + "content": "n [e.g., \\\"15 days\\\"] of breach notification.\\n\\n4. Termination:\\nNon-breaching party can immediately terminate the main contract upon a breach.\\n\\n5. Governing Law:\\nThis Appendix adheres to [State/Country, e.g., \\\"California\\\"] laws.\"\n]", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:51-52" + }, + "243": { + "file_id": 6, + "content": "Code snippet outlines contractual terms for a breach notification, termination clause, and governing law.", + "type": "comment" + }, + "244": { + "file_id": 7, + "content": "/examples/keyword_counting/README.md", + "type": "filepath" + }, + "245": { + "file_id": 7, + "content": "The code offers a frequency computation method for countries in text using seven approaches, utilizing 'countries.csv' as input, and allows for custom samples, budgets, and directory creation with log files.", + "type": "summary" + }, + "246": { + "file_id": 7, + "content": "# Keyword Counting\nThe use case in this directory computes the frequencies of occurring countries \nin a long passage of text. We provide implementations of seven different approaches:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT):\n - ToT: wider tree, meaning more branches per level\n - ToT2: tree with more levels, but fewer branches per level\n- Graph of Thoughts (GoT):\n - GoT4: split passage into 4 sub-passages\n - GoT8: split passage into 8 sub-passages\n - GoTx: split by sentences\n## Data\nWe provide an input file with 100 samples: `countries.csv`. It is also possible to use\nthe data generator `dataset_gen_countries.py` to generate additional or\ndifferent samples (using GPT-4). The parameters can be updated on line 54 (number of samples to be generated). \nNote that not every generated sample will be included in the dataset, as each sample is \nadditionally tested for validity (observe script output for details).\n## Execution\nThe file to execute the use case is called\n`keyword_counting.py`. In the main body, one can", + "type": "code", + "location": "/examples/keyword_counting/README.md:1-26" + }, + "247": { + "file_id": 7, + "content": "This code provides a use case for computing the frequencies of occurring countries in a long passage of text using seven different approaches including IO, Chain-of-Thought (CoT), Tree of Thought (ToT) with variations, and Graph of Thoughts (GoT) with variations. It uses an input file named 'countries.csv' and provides a data generator for additional or different samples. The code to execute the use case is called 'keyword_counting.py'.", + "type": "comment" + }, + "248": { + "file_id": 7, + "content": "select the specific samples to be run (variable samples) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are\ncreated. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 150 of `plot.py` and run `python3\nplot.py` to plot your data.", + "type": "code", + "location": "/examples/keyword_counting/README.md:27-45" + }, + "249": { + "file_id": 7, + "content": "The code selects specific samples, approaches, and sets a budget for running Python scripts. It creates directories for each run with `config.json` and `log.log` files containing LLM prompts/responses and GRS data for samples. Change the results directory in line 150 of `plot.py` to plot data by running `python3 plot.py`.", + "type": "comment" + }, + "250": { + "file_id": 8, + "content": "/examples/keyword_counting/dataset_gen_countries.py", + "type": "filepath" + }, + "251": { + "file_id": 8, + "content": "The code generates a language model dataset by organizing country occurrences into popular and rest categories, cleaning paragraphs, checking for invalid elements, and storing the result in CSV format.", + "type": "summary" + }, + "252": { + "file_id": 8, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Ales Kubicek\nimport csv\nfrom typing import List, Tuple\nfrom graph_of_thoughts import controller\ndef find_country_indices(text: str, country: str) -> List[Tuple[int, str]]:\n \"\"\"\n Finds the indices of the occurences of a given country in the input text.\n :param text: Input text.\n :type text: str\n :param country: Country to search for.\n :type country: str\n :return: List of tuples, where each tuple consists of index and country.\n :rtype: List[Tuple[int, str]]\n \"\"\"\n indices = []\n index = text.find(country)\n while index != -1:\n indices.append(index)\n index = text.find(country, index + 1)\n return [(index, country) for index in indices]\nprimary_countries = [\n \"Afghanistan\",\n \"Argentina\",\n \"Australia\",\n \"Brazil\",\n \"Canada\",\n \"China\",\n \"Colombia\",\n \"Cuba\",\n \"Egypt\",", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:1-43" + }, + "253": { + "file_id": 8, + "content": "This function finds the indices of occurrences of a given country in an input text and returns them as a list of tuples containing index and country. The primary_countries variable is a list of countries used in the dataset.", + "type": "comment" + }, + "254": { + "file_id": 8, + "content": " \"France\",\n \"Germany\",\n \"Greece\",\n \"India\",\n \"Indonesia\",\n \"Iran\",\n \"Iraq\",\n \"Ireland\",\n \"Israel\",\n \"Italy\",\n \"Japan\",\n \"Kenya\",\n \"Mexico\",\n \"Netherlands\",\n \"New Zealand\",\n \"Nigeria\",\n \"North Korea\",\n \"Pakistan\",\n \"Peru\",\n \"Philippines\",\n \"Poland\",\n \"Portugal\",\n \"Russia\",\n \"Saudi Arabia\",\n \"South Africa\",\n \"South Korea\",\n \"Spain\",\n \"Sweden\",\n \"Switzerland\",\n \"Thailand\",\n \"Turkey\",\n \"Ukraine\",\n \"United Arab Emirates\",\n \"United Kingdom\",\n \"United States\",\n \"Venezuela\",\n \"Vietnam\",\n \"Yemen\",\n \"Zimbabwe\",\n \"Belgium\",\n \"Norway\",\n]\nprimary_adjectives = [\n \"Afghan\",\n \"Argentine \",\n \"Argentinean\",\n \"Australian\",\n \"Brazilian\",\n \"Canadian\",\n \"Chinese\",\n \"Colombian\",\n \"Cuban\",\n \"Egyptian\",\n \"French\",\n \"German\",\n \"Greek\",\n \"Indian\",\n \"Indonesian\",\n \"Iranian\",\n \"Iraqi\",\n \"Irish\",\n \"Israeli\",\n \"Italian\",\n \"Japanese\",\n \"Kenyan\",\n \"Mexican\",\n \"Dutch\",\n \"New Zealander \",\n \"Kiwi\",", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:44-112" + }, + "255": { + "file_id": 8, + "content": "This code contains lists of countries and their corresponding primary adjectives. The countries list includes 46 nations, while the adjectives list has 28 items. These data can be used for keyword counting or other text processing tasks related to country-specific information.", + "type": "comment" + }, + "256": { + "file_id": 8, + "content": " \"Nigerian\",\n \"North Korean\",\n \"Pakistani\",\n \"Peruvian\",\n \"Filipino\",\n \"Philippine\",\n \"Polish\",\n \"Portuguese\",\n \"Russian\",\n \"Saudi \",\n \"Saudi Arabian\",\n \"South African\",\n \"South Korean\",\n \"Spanish\",\n \"Swedish\",\n \"Swiss\",\n \"Thai\",\n \"Turkish\",\n \"Ukrainian\",\n \"United Arab Emirates\",\n \"Emirati\",\n \"British\",\n \"American\",\n \"Venezuelan\",\n \"Vietnamese\",\n \"Yemeni\",\n \"Zimbabwean\",\n \"Belgian\",\n \"Norwegian\",\n]\nrest_countries = [\n \"Albania\",\n \"Algeria\",\n \"Andorra\",\n \"Angola\",\n \"Antigua and Barbuda\",\n \"Armenia\",\n \"Austria\",\n \"Azerbaijan\",\n \"The Bahamas\",\n \"Bahrain\",\n \"Bangladesh\",\n \"Barbados\",\n \"Belarus\",\n \"Belize\",\n \"Benin\",\n \"Bhutan\",\n \"Bolivia\",\n \"Bosnia and Herzegovina\",\n \"Botswana\",\n \"Brunei\",\n \"Bulgaria\",\n \"Burkina Faso\",\n \"Burundi\",\n \"Cabo Verde\",\n \"Cambodia\",\n \"Cameroon\",\n \"Central African Republic\",\n \"Chad\",\n \"Chile\",\n \"Comoros\",\n \"Congo\",\n \"Costa Rica\",\n \"Côte d’Ivoire\",", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:113-176" + }, + "257": { + "file_id": 8, + "content": "The code provides a list of countries divided into two sections: \"popular_countries\" containing widely recognized nations, and \"rest_countries\" containing the remaining countries. It appears to be used for organizing or filtering country data in an application or dataset.", + "type": "comment" + }, + "258": { + "file_id": 8, + "content": " \"Croatia\",\n \"Cyprus\",\n \"Czech Republic\",\n \"Czechia\",\n \"Denmark\",\n \"Djibouti\",\n \"Dominica\",\n \"Dominican Republic\",\n \"East Timor\",\n \"Timor-Leste\",\n \"Ecuador\",\n \"El Salvador\",\n \"Equatorial Guinea\",\n \"Eritrea\",\n \"Estonia\",\n \"Eswatini\",\n \"Ethiopia\",\n \"Fiji\",\n \"Finland\",\n \"Gabon\",\n \"The Gambia\",\n \"Georgia\",\n \"Ghana\",\n \"Grenada\",\n \"Guatemala\",\n \"Guinea\",\n \"Guinea-Bissau\",\n \"Guyana\",\n \"Haiti\",\n \"Honduras\",\n \"Hungary\",\n \"Iceland\",\n \"Jamaica\",\n \"Jordan\",\n \"Kazakhstan\",\n \"Kiribati\",\n \"Kosovo\",\n \"Kuwait\",\n \"Kyrgyzstan\",\n \"Laos\",\n \"Latvia\",\n \"Lebanon\",\n \"Lesotho\",\n \"Liberia\",\n \"Libya\",\n \"Liechtenstein\",\n \"Lithuania\",\n \"Luxembourg\",\n \"Madagascar\",\n \"Malawi\",\n \"Malaysia\",\n \"Maldives\",\n \"Mali\",\n \"Malta\",\n \"Marshall Islands\",\n \"Mauritania\",\n \"Mauritius\",\n \"Micronesia\",\n \"Moldova\",\n \"Monaco\",\n \"Mongolia\",\n \"Montenegro\",\n \"Morocco\",\n \"Mozambique\",\n \"Myanmar\",\n \"Burma\",\n \"Namibia\",", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:177-243" + }, + "259": { + "file_id": 8, + "content": "The code includes a list of country names in alphabetical order. Each country name is separated by a comma, and some countries have multiple names listed for different uses or recognition.", + "type": "comment" + }, + "260": { + "file_id": 8, + "content": " \"Nauru\",\n \"Nepal\",\n \"Nicaragua\",\n \"Niger\",\n \"North Macedonia\",\n \"Oman\",\n \"Palau\",\n \"Panama\",\n \"Papua New Guinea\",\n \"Paraguay\",\n \"Qatar\",\n \"Romania\",\n \"Rwanda\",\n \"Saint Kitts and Nevis\",\n \"Saint Lucia\",\n \"Saint Vincent and the Grenadines\",\n \"Samoa\",\n \"San Marino\",\n \"Sao Tome and Principe\",\n \"Senegal\",\n \"Serbia\",\n \"Seychelles\",\n \"Sierra Leone\",\n \"Singapore\",\n \"Slovakia\",\n \"Slovenia\",\n \"Solomon Islands\",\n \"Somalia\",\n \"Sri Lanka\",\n \"Sudan\",\n \"Suriname\",\n \"Syria\",\n \"Taiwan\",\n \"Tajikistan\",\n \"Tanzania\",\n \"Togo\",\n \"Tonga\",\n \"Trinidad and Tobago\",\n \"Tunisia\",\n \"Turkmenistan\",\n \"Tuvalu\",\n \"Uganda\",\n \"Uruguay\",\n \"Uzbekistan\",\n \"Vanuatu\",\n \"Vatican City\",\n \"Zambia\",\n]\nrest_adjectives = [\n \"Albanian\",\n \"Algerian\",\n \"Andorran\",\n \"Angolan\",\n \"Antiguan and Barbudan\",\n \"Armenian\",\n \"Austrian\",\n \"Azerbaijani\",\n \"Bahamian\",\n \"Bahraini\",\n \"Bangladeshi\",\n \"Barbadian\",\n \"Belarusian\",\n \"Belizean\",", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:244-306" + }, + "261": { + "file_id": 8, + "content": "This code defines a list of countries and their corresponding adjectives, which can be used to generate diverse language datasets.", + "type": "comment" + }, + "262": { + "file_id": 8, + "content": " \"Beninese\",\n \"Bhutanese\",\n \"Bolivian\",\n \"Bosnian and Herzegovinian\",\n \"Botswanan\",\n \"Bruneian\",\n \"Bulgarian\",\n \"Burkinabè\",\n \"Burundian\",\n \"Cape Verdean\",\n \"Cambodian\",\n \"Cameroonian\",\n \"Central African\",\n \"Chadian\",\n \"Chilean\",\n \"Comorian\",\n \"Congolese\",\n \"Costa Rican\",\n \"Ivorian\",\n \"Croatian\",\n \"Cypriot\",\n \"Czech\",\n \"Czech\",\n \"Danish\",\n \"Djiboutian\",\n \"Dominican\",\n \"Dominican\",\n \"East Timorese\",\n \"Timorese\",\n \"Ecuadorian\",\n \"Salvadoran\",\n \"Equatorial Guinean\",\n \"Eritrean\",\n \"Estonian\",\n \"Swazi\",\n \"Ethiopian\",\n \"Fijian\",\n \"Finnish\",\n \"Gabonese\",\n \"Gambian\",\n \"Georgian\",\n \"Ghanaian\",\n \"Grenadian\",\n \"Guatemalan\",\n \"Guinean\",\n \"Bissau-Guinean\",\n \"Guyanese\",\n \"Haitian\",\n \"Honduran\",\n \"Hungarian\",\n \"Icelandic\",\n \"Jamaican\",\n \"Jordanian\",\n \"Kazakh\",\n \"I-Kiribati\",\n \"Kosovar\",\n \"Kuwaiti\",\n \"Kyrgyz\",\n \"Laotian\",\n \"Latvian\",\n \"Lebanese\",\n \"Basotho\",\n \"Liberian\",\n \"Libyan\",", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:307-370" + }, + "263": { + "file_id": 8, + "content": "This code lists various country names and their corresponding adjective forms, used for identifying nationality or origin.", + "type": "comment" + }, + "264": { + "file_id": 8, + "content": " \"Liechtensteiner\",\n \"Lithuanian\",\n \"Luxembourger\",\n \"Malagasy\",\n \"Malawian\",\n \"Malaysian\",\n \"Maldivian\",\n \"Malian\",\n \"Maltese\",\n \"Marshallese\",\n \"Mauritanian\",\n \"Mauritian\",\n \"Micronesian\",\n \"Moldovan\",\n \"Monégasque\",\n \"Mongolian\",\n \"Montenegrin\",\n \"Moroccan\",\n \"Mozambican\",\n \"Myanmarese\",\n \"Burmese\",\n \"Namibian\",\n \"Nauruan\",\n \"Nepali\",\n \"Nicaraguan\",\n \"Nigerien\",\n \"Macedonian\",\n \"Omani\",\n \"Palauan\",\n \"Panamanian\",\n \"Papua New Guinean\",\n \"Paraguayan\",\n \"Qatari\",\n \"Romanian\",\n \"Rwandan\",\n \"Kittitian\",\n \"Nevisian\",\n \"Saint Lucian\",\n \"Vincentian\",\n \"Samoan\",\n \"Sammarinese\",\n \"Santomean\",\n \"Senegalese\",\n \"Serbian\",\n \"Seychellois\",\n \"Sierra Leonean\",\n \"Singaporean\",\n \"Slovak\",\n \"Slovenian\",\n \"Solomon Islander\",\n \"Somali\",\n \"Sri Lankan\",\n \"Sudanese\",\n \"Surinamese\",\n \"Syrian\",\n \"Taiwanese\",\n \"Tajik\",\n \"Tanzanian\",\n \"Togolese\",\n \"Tongan\",\n \"Trinidadian \",\n \"Tobagonian\",\n \"Tunisian\",", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:371-433" + }, + "265": { + "file_id": 8, + "content": "This code defines a list of country names and their associated adjectival forms, used for keyword counting in a dataset.", + "type": "comment" + }, + "266": { + "file_id": 8, + "content": " \"Turkmen\",\n \"Tuvaluan\",\n \"Ugandan\",\n \"Uruguayan\",\n \"Uzbek\",\n \"Ni-Vanuatu\",\n \"Vatican\",\n \"Zambian\",\n]\nlm = controller.ChatGPT(\n \"../../graph_of_thoughts/controller/config.json\", model_name=\"chatgpt4\"\n)\nprompt = \"\"\" Generate a continuous passage (single paragraph) of 16 sentences following the provided restrictions precisely. \n\nThe following restrictions must apply to the generated text:\n1. Single continuous passage of exactly 16 sentences without any paragraphs (line breaks).\n2. Countries appearing in the passage must be only from the provided list. No other countries can be mentioned.\n3. When a country is mentioned in the passage, it must be mentioned multiple times consecutively in the same or following sentences.\n4. Passage should be creative and coherent.\n5. Using adjectives of a country is NOT allowed (e.g., \"Colombian coffee\" should be \"coffee from Colombia\" instead)\n\n\nList of countries: [Afghanistan, Argentina", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:434-460" + }, + "267": { + "file_id": 8, + "content": "This code generates a prompt for an AI language model to create a continuous passage with 16 sentences using a provided list of countries and specific restrictions. The generated text should mention the countries multiple times consecutively, be creative and coherent, and avoid using adjectives for the countries.", + "type": "comment" + }, + "268": { + "file_id": 8, + "content": ", Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway]\nPassage:\nWhile exploring the ancient ruins in Greece, Sam discovered manuscripts that hinted at the hidden treasures of Egypt. It seemed these treasures were once stolen from Egypt by rogue merchants and secretly moved to Greece, only to be buried under layers of time. Intrigued, he shared the findings with his friend Maya from India, who was an expert in decoding ancient languages. She pointed out that there was a similar legend in India about treasures from China that had somehow ended up in the southern parts of India, possibly", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:460-462" + }, + "269": { + "file_id": 8, + "content": "The code defines a list containing the names of countries. These country names are used in various parts of the program to handle data related to specific countries.", + "type": "comment" + }, + "270": { + "file_id": 8, + "content": " through trade or conquest. She also recounted tales from China that spoke of incredible artifacts from Indonesia, suggesting a rich tapestry of cultural exchanges throughout history. Their conversation took an interesting turn when Sam mentioned a book he'd read about the mysterious connections between Argentina and Brazil. The book detailed how both Argentina and Brazil, despite their differences, shared tales of lost civilizations and forgotten cities deep within their jungles. Maya excitedly mentioned that she'd been to the Philippines and had heard local legends about ancient ties with Indonesia and how traders from the Philippines would journey to Indonesia in search of spices and other goods. Thinking of spices, Sam fondly recalled his trip to Spain, where he had learned about the country's historical links with Portugal. Spain and Portugal, both maritime giants of their time, had extensively explored unknown lands and established trade routes. Maya, remembering her travels, sai", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:462-462" + }, + "271": { + "file_id": 8, + "content": "Code snippet describes a conversation between Sam and Maya discussing historical connections between different countries through trade and cultural exchanges.", + "type": "comment" + }, + "272": { + "file_id": 8, + "content": "d that she had been to Belgium once and was fascinated by its connections with the Netherlands. Both Belgium and the Netherlands, she explained, had rich histories of art, trade, and diplomacy that intertwined them for centuries. They both sat back, marveling at the interconnectedness of the world and how countries from Greece to the Netherlands shared tales of adventure, discovery, and mystery.\n\nList of countries: [Afghanistan, Argentina, Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway]\nPassage:\n\"\"\"\nnum_samples = 100\nsample_id = 0\nresult = [[\"ID\", \"Text\", \"Countries\", \"Sentences\", \"Characters\"]]", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:462-471" + }, + "273": { + "file_id": 8, + "content": "This code generates a dataset of samples, where each sample contains an ID, text, list of countries mentioned, number of sentences, and number of characters. It will generate 100 samples with incrementing IDs. The provided list of countries serves as the pool from which countries will be randomly selected for each sample's text.", + "type": "comment" + }, + "274": { + "file_id": 8, + "content": "\"\"\"\nGenerate passages of text that contain country names to be used as input for the\nkeyword counting.\nInput(x) : Number of samples\nOutput(y) : Passages written to a file in the CSV format.\n File contains the sample ID, the passage, the countries the passage\n contains, the sentences of the passages, number of characters of the\n passage.\n\"\"\"\n# For x batches of y responses\nfor _ in range(num_samples):\n response = lm.query(prompt, 1)\n texts = lm.get_response_texts(response)\n for text in texts:\n # Clean paragraphs - single long passage\n text = text.strip().replace(\"\\n\", \"\")\n # Get all occurrences of all primary permissible countries\n occurrences = []\n for country in [country for country in primary_countries if country in text]:\n occurrences.extend(find_country_indices(text, country))\n # Order exactly how they appear in the text\n ordered_occurrences = [country[1] for country in sorted(occurrences)]\n # Check invalid countries and adjectives", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:473-499" + }, + "275": { + "file_id": 8, + "content": "This code generates passages containing country names for keyword counting. It iterates through a given number of samples, queries the language model (lm) for responses, cleans paragraphs by removing newlines and extra spaces, finds all occurrences of primary countries in each text, orders them based on their appearance in the text, and checks for invalid countries or adjectives.", + "type": "comment" + }, + "276": { + "file_id": 8, + "content": " invalid_primary_adjective = [\n adjective for adjective in primary_adjectives if adjective in text\n ]\n invalid_rest_country = [\n country for country in rest_countries if country in text\n ]\n invalid_rest_adjective = [\n adjective for adjective in rest_adjectives if adjective in text\n ]\n invalid_count = (\n len(invalid_primary_adjective)\n + len(invalid_rest_country)\n + len(invalid_rest_adjective)\n )\n if invalid_count > 0:\n print(\n f\"Invalid countries or adjectives present: {invalid_primary_adjective}, {invalid_rest_country}, {invalid_rest_adjective}\"\n )\n continue\n result.append(\n [\n sample_id,\n text,\n \"[{0}]\".format(\", \".join(map(str, ordered_occurrences))),\n len(text.split(\".\")) - 1,\n len(text),\n ]\n )\n sample_id += 1\n# Writing to csv file", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:500-532" + }, + "277": { + "file_id": 8, + "content": "This code segment checks for invalid primary adjectives, rest countries, and rest adjectives in the text. It counts their occurrences, and if any of them are present, it prints a message with details about the invalid elements found. If there are no invalid elements, it adds the sample (with its ID, text, ordered occurrences, number of sentences, and total length) to the result list. The code continues to the next iteration, and after processing all samples, it will write the final result to a CSV file.", + "type": "comment" + }, + "278": { + "file_id": 8, + "content": "with open(\"countries_script.csv\", \"w\") as csvfile:\n csvwriter = csv.writer(csvfile)\n csvwriter.writerows(result)", + "type": "code", + "location": "/examples/keyword_counting/dataset_gen_countries.py:533-535" + }, + "279": { + "file_id": 8, + "content": "This code writes the result to a CSV file named \"countries_script.csv\". It opens the file in write mode (\"w\"), creates a CSV writer object, and uses the writerows() method to write each row of the result variable to the CSV file.", + "type": "comment" + }, + "280": { + "file_id": 9, + "content": "/examples/keyword_counting/plot.py", + "type": "filepath" + }, + "281": { + "file_id": 9, + "content": "This code retrieves JSON data, organizes it in a dictionary and plots results using boxplots and bar charts with customizable titles. It also sets y-axis limits, handles missing results and displays solved values.", + "type": "summary" + }, + "282": { + "file_id": 9, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Ales Kubicek\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():", + "type": "code", + "location": "/examples/keyword_counting/plot.py:1-29" + }, + "283": { + "file_id": 9, + "content": "This code retrieves complete results from a given base directory, iterating through each folder and file. It collects JSON data from specified .json files, stores them in the \"results_complete\" dictionary with corresponding key and appends the data to its value.", + "type": "comment" + }, + "284": { + "file_id": 9, + "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]", + "type": "code", + "location": "/examples/keyword_counting/plot.py:30-58" + }, + "285": { + "file_id": 9, + "content": "The code sorts the results dictionary by key, then retrieves final scores for each method in the results_complete dictionary. It appends a list of scores (including score, solved status, prompt tokens, completion tokens, and cost) to the corresponding method in the scores dictionary.", + "type": "comment" + }, + "286": { + "file_id": 9, + "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"got4\", \"got8\", \"gotx\"],\n model=\"GPT-3.5\",\n y_lower=0,\n y_upper=40,\n cost_upper=1.8,\n display_solved=True,\n annotation_offset=1,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n scores_ordered = [\n [score for score in results[method][\"scores\"] if score != 100 and score != 300]", + "type": "code", + "location": "/examples/keyword_counting/plot.py:59-93" + }, + "287": { + "file_id": 9, + "content": "This code retrieves and prepares data for plotting keyword counting results. It first gets complete results from a specified base directory, then extracts final scores. The data is then organized into a dictionary format for plotting. The function `plot_results` takes this data, along with optional parameters to adjust the visualization. The code filters out irrelevant scores and orders them based on the input order.", + "type": "comment" + }, + "288": { + "file_id": 9, + "content": " for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n fig, ax = plt.subplots(dpi=150, figsize=(3.75, 4))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n methods_labels = [\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT4\", \"GoT8\", \"GoTx\"]\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(methods_labels, fontsize=10)\n ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1)\n plt.yticks(fontsize=fig_fontsize)\n if display_left_ylabel:\n ax.set_ylabel(f\"Number of errors; the lower the better\", fontsize=fig_fontsize)\n ax.set_title(f\"Keyword Counting\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)", + "type": "code", + "location": "/examples/keyword_counting/plot.py:94-122" + }, + "289": { + "file_id": 9, + "content": "This code generates a boxplot of keyword counting results and adds a bar chart of total costs to the same axes. It uses the matplotlib library for plotting, sets tick and label positions, and allows for customization of y-axis labels and title. The total costs are calculated by summing the \"costs\" values from the \"results\" dictionary for each method in a specified order.", + "type": "comment" + }, + "290": { + "file_id": 9, + "content": " ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]\n ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"keyword_counting_{model}.pdf\", bbox_inches=\"tight\")\nplot_results(", + "type": "code", + "location": "/examples/keyword_counting/plot.py:123-158" + }, + "291": { + "file_id": 9, + "content": "This code is setting the y-axis limits and ticks for a graph, adding annotations for solved solutions, labeling the y-axis, and saving the figure with a specific file name. It also handles missing results by continuing to the next method in case one is not available. The purpose of this code is likely related to plotting a graph that compares different methods or models based on their performance (cost) and whether they solved the problem or not.", + "type": "comment" + }, + "292": { + "file_id": 9, + "content": " get_plotting_data(\"results/\"),\n display_solved=True,\n annotation_offset=-0.3,\n model=\"GPT-3.5\",\n y_upper=35,\n display_left_ylabel=True,\n display_right_ylabel=True,\n cost_upper=9,\n)", + "type": "code", + "location": "/examples/keyword_counting/plot.py:159-167" + }, + "293": { + "file_id": 9, + "content": "This code is calling a function named 'get_plotting_data' to retrieve data from the \"results/\" directory and generate a plot. The model used for this task is \"GPT-3.5\". The y-axis has an upper limit of 35, and the cost axis has an upper limit of 9. The function will display solved values on the plot and show left and right y-labels.", + "type": "comment" + }, + "294": { + "file_id": 10, + "content": "/examples/set_intersection/README.md", + "type": "filepath" + }, + "295": { + "file_id": 10, + "content": "The code provides a data generator for set intersections and allows users to customize parameters, storing results in JSON files. To visualize the data, modify the results directory and length parameter in `plot.py` before executing the script.", + "type": "summary" + }, + "296": { + "file_id": 10, + "content": "# Set Intersection\nThe use case in this directory computes the intersection of two input\nsets. We provide implementations of five different approaches for 32, 64\nand 128 elements:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT):\n - ToT: wider tree, meaning more branches per level\n - ToT2: tree with more levels, but fewer branches per level\n- Graph of Thoughts (GoT)\n## Data\nWe provide input files with 100 precomputed samples for each set length:\n`set_intersection_.csv`. It is also possible to use\nthe data generator `dataset_gen_intersection.py` to generate additional or\ndifferent samples. The parameters can be updated in lines 24 to 28 of\nthe main body:\n- set_size = 32 # size of the generated sets\n- int_value_ubound = 64 # (exclusive) upper limit of generated numbers\n- seed = 42 # seed of the random number generator\n- num_sample = 100 # number of samples\n- filename = 'set_intersection_032.csv' # output filename\n## Execution\nThe files to execute the use case are called\n`set_intersection_.py`. In the main body, one can", + "type": "code", + "location": "/examples/set_intersection/README.md:1-29" + }, + "297": { + "file_id": 10, + "content": "This code provides different approaches for computing set intersection and offers a data generator to create samples. It includes IO, Chain-of-Thought (CoT), Tree of Thought (ToT) with two variations, and Graph of Thoughts (GoT). The user can specify the number of elements, upper bound, seed, number of samples, and output filename. The code is structured into separate files for each set length.", + "type": "comment" + }, + "298": { + "file_id": 10, + "content": "select the specific samples to be run (variable sample) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe input filename for the samples is currently hardcoded to\n`set_intersection_.csv`, but can be updated in the\nfunction `run`.\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are\ncreated. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.", + "type": "code", + "location": "/examples/set_intersection/README.md:30-46" + }, + "299": { + "file_id": 10, + "content": "This code selects samples and approaches, allows budget setting, hardcodes input filename, creates directories for execution-specific files, and stores the Graph Reasoning State (GRS) for each sample in separate JSON files.", + "type": "comment" + } +} \ No newline at end of file diff --git a/docs/data/3.json b/docs/data/3.json new file mode 100644 index 0000000..7881a04 --- /dev/null +++ b/docs/data/3.json @@ -0,0 +1,542 @@ +{ + "300": { + "file_id": 10, + "content": "## Plot Data\nChange the results directory in line 170 of `plot.py` and update the\nlength parameter in the subsequent line and run `python3 plot.py` to\nplot your data.", + "type": "code", + "location": "/examples/set_intersection/README.md:48-52" + }, + "301": { + "file_id": 10, + "content": "This code snippet instructs the user to modify the results directory in line 170 of `plot.py` and adjust the length parameter accordingly before executing `python3 plot.py` to visualize their data.", + "type": "comment" + }, + "302": { + "file_id": 11, + "content": "/examples/set_intersection/dataset_gen_intersection.py", + "type": "filepath" + }, + "303": { + "file_id": 11, + "content": "The code defines a function \"scramble\" that shuffles array elements and generates random sets, calculating their intersection for specified samples. It uses numpy's default random generator with seed 42 to generate sets of size 32, writing the input, generated, and intersection sets in CSV format.", + "type": "summary" + }, + "304": { + "file_id": 11, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Robert Gerstenberger\nimport csv\nimport numpy as np\ndef scramble(array: np.ndarray, rng: np.random.Generator) -> None:\n \"\"\"\n Helper function to change the order of the elements in an array randomly.\n :param array: Array to be scrambled.\n :type: numpy.ndarray\n :param rng: Random number generator.\n :type rng: numpy.random.Generator\n \"\"\"\n size = array.shape[0]\n index_array = rng.integers(0, size, size)\n for i in range(size):\n temp = array[i]\n array[i] = array[index_array[i]]\n array[index_array[i]] = temp\nif __name__ == \"__main__\":\n \"\"\"\n Input(u) : Set size.\n Input(v) : Range of the integer numbers in the sets: 0..v (exclusive)\n Input(w) : Seed for the random number generator.\n Input(x) : Number of samples to be generated.\n Input(y) : Filename for the output CSV file.", + "type": "code", + "location": "/examples/set_intersection/dataset_gen_intersection.py:1-39" + }, + "305": { + "file_id": 11, + "content": "The code snippet defines a function called \"scramble\" which shuffles the elements of an array randomly. It also contains main code block that specifies input parameters such as set size, range of integer numbers in sets, seed for random number generator, number of samples to be generated, and filename for output CSV file. The purpose is likely to generate a dataset by scrambling the order of elements within sets.", + "type": "comment" + }, + "306": { + "file_id": 11, + "content": " Output(z) : Input sets and intersected set written a file in the CSV format.\n File contains the sample ID, input set 1, input set 2,\n intersection set.\n \"\"\"\n set_size = 32 # size of the generated sets\n int_value_ubound = 64 # (exclusive) upper limit of generated numbers\n seed = 42 # seed of the random number generator\n num_sample = 100 # number of samples\n filename = \"set_intersection_032.csv\" # output filename\n assert 2 * set_size <= int_value_ubound\n rng = np.random.default_rng(seed)\n intersection_sizes = rng.integers(set_size // 4, 3 * set_size // 4, num_sample)\n np.set_printoptions(\n linewidth=np.inf\n ) # no wrapping in the array fields in the output file\n with open(filename, \"w\") as f:\n fieldnames = [\"ID\", \"SET1\", \"SET2\", \"INTERSECTION\"]\n writer = csv.DictWriter(f, delimiter=\",\", fieldnames=fieldnames)\n writer.writeheader()\n for i in range(num_sample):\n intersection_size = intersection_sizes[i]", + "type": "code", + "location": "/examples/set_intersection/dataset_gen_intersection.py:40-67" + }, + "307": { + "file_id": 11, + "content": "Code generates random sets and calculates their intersection for a given number of samples. It uses numpy's default random generator, with seed 42, to generate sets of size 32. The intersected set sizes are also randomly determined (within certain bounds) for each sample. The code writes the input sets, generated sets, and intersection sets in CSV format.", + "type": "comment" + }, + "308": { + "file_id": 11, + "content": " full_set = np.arange(0, int_value_ubound, dtype=np.int16)\n scramble(full_set, rng)\n intersection = full_set[:intersection_size].copy()\n sorted_intersection = np.sort(intersection)\n set1 = full_set[:set_size].copy()\n set2 = np.concatenate(\n [intersection, full_set[set_size : 2 * set_size - intersection_size]]\n )\n scramble(set1, rng)\n scramble(set2, rng)\n writer.writerow(\n {\n \"ID\": i,\n \"SET1\": set1.tolist(),\n \"SET2\": set2.tolist(),\n \"INTERSECTION\": sorted_intersection.tolist(),\n }\n )", + "type": "code", + "location": "/examples/set_intersection/dataset_gen_intersection.py:69-92" + }, + "309": { + "file_id": 11, + "content": "Code generates a full set of integers, scrambles it, takes an intersection of the set with a specified size, splits the full set into two sets, scramble each set, and writes a row to a CSV file containing ID, SET1, SET2, and sorted INTERSECTION.", + "type": "comment" + }, + "310": { + "file_id": 12, + "content": "/examples/set_intersection/plot.py", + "type": "filepath" + }, + "311": { + "file_id": 12, + "content": "The code collects and processes results from various AI methods, storing them in dictionaries for analysis or visualization. It generates boxplots to display the final scores of different methods with customizable y-axis settings and font size. The code also sets labels, plots a bar graph, adds annotations, adjustments, and text, saves as PDF, replaces characters in model names, and calls another function.", + "type": "summary" + }, + "312": { + "file_id": 12, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Robert Gerstenberger\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():", + "type": "code", + "location": "/examples/set_intersection/plot.py:1-29" + }, + "313": { + "file_id": 12, + "content": "This code retrieves complete results from a given base directory. It iterates through each folder in the directory, loads JSON files within each folder, and stores the key-value pairs as dictionaries within lists under each folder's name in a results dictionary. The code also checks if directories are not empty folders.", + "type": "comment" + }, + "314": { + "file_id": 12, + "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]", + "type": "code", + "location": "/examples/set_intersection/plot.py:30-58" + }, + "315": { + "file_id": 12, + "content": "This code organizes and processes results from various AI methods, extracting scores, solved status, prompt/completion tokens, and cost for each method. It stores this information in a dictionary for further analysis or visualization.", + "type": "comment" + }, + "316": { + "file_id": 12, + "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"got\"],\n model=\"GPT-3.5\",\n length=32,\n y_lower=0,\n cost_upper=0.0,\n display_solved=True,\n annotation_offset=0,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n scores_ordered = [\n [score for score in results[method][\"scores\"] if score != 1000]\n for method in methods_order", + "type": "code", + "location": "/examples/set_intersection/plot.py:59-94" + }, + "317": { + "file_id": 12, + "content": "The code retrieves final scores from complete results and organizes them into a dictionary for plotting. It then creates a new dictionary with scores, solved problems count, and costs for each method. This data is used to plot the results in a graph, considering options like method order, model, length, cost limits, and display settings.", + "type": "comment" + }, + "318": { + "file_id": 12, + "content": " ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n methods_labels = [\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT\"]\n plt.yticks(fontsize=fig_fontsize)\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(methods_labels, fontsize=fig_fontsize)\n y_upper = length\n range_increase = 1\n if display_solved:\n if length < 48:\n range_increase = 2\n elif length < 96:\n range_increase = 4\n else:\n range_increase = 8\n ax.set_ylim(y_lower, y_upper + range_increase)\n ax1_yticks = range(\n y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)\n )\n ax.set_yticks(ax1_yticks)\n if display_left_ylabel:", + "type": "code", + "location": "/examples/set_intersection/plot.py:95-130" + }, + "319": { + "file_id": 12, + "content": "This code creates a boxplot to visualize the results of different methods. It sets the y-axis limits and ticks based on the length of the data, and customizes the font size for better readability. The code also handles the display of additional information (solved count) by adjusting the range of y-axis ticks accordingly.", + "type": "comment" + }, + "320": { + "file_id": 12, + "content": " ax.set_ylabel(\n f\"#incorrect elements; the lower the better\", fontsize=fig_fontsize\n )\n ax.set_title(f\"{length} elements\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n if cost_upper > 0:\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]", + "type": "code", + "location": "/examples/set_intersection/plot.py:131-162" + }, + "321": { + "file_id": 12, + "content": "This code sets y-axis label, title, and twin axis for plotting. It then plots a bar graph using the twin axis, setting the y-axis limits and ticks based on specified conditions. Finally, it checks if certain conditions are met and adds annotations or adjusts the graph accordingly.", + "type": "comment" + }, + "322": { + "file_id": 12, + "content": " ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"set_intersection_{model}_{length}.pdf\", bbox_inches=\"tight\")\nplot_results(\n get_plotting_data(\"results/\"),\n length=32,\n display_solved=True,\n model=\"GPT-3.5\",\n display_left_ylabel=True,\n display_right_ylabel=True,\n)", + "type": "code", + "location": "/examples/set_intersection/plot.py:163-184" + }, + "323": { + "file_id": 12, + "content": "This code is adding text annotations to a plot, incrementing a count variable, and saving the final plot as a PDF. It replaces certain characters in the model name and calls another function for more plotting results with specific parameters.", + "type": "comment" + }, + "324": { + "file_id": 13, + "content": "/examples/set_intersection/utils.py", + "type": "filepath" + }, + "325": { + "file_id": 13, + "content": "The code contains helper functions `string_to_list()` and `string_to_set()`, which convert a string-encoded list or set into Python integers. The `test_set_intersection` function compares the intersection of two sets with the sorted list from the input string, counting errors as a score, returning either total errors or 1000 for exceptions.", + "type": "summary" + }, + "326": { + "file_id": 13, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# The source code is adapted from the sorting source code written by\n# Nils Blach.\n#\n# main author: Robert Gerstenberger\nfrom typing import Dict, List, Set\ndef string_to_list(string: str) -> List[int]:\n \"\"\"\n Helper function to convert a list encoded inside a string into a Python\n list object of integer elements.\n :param string: Input string containing a list.\n :type string: str\n :return: List of integer elements.\n :rtype: List[int]\n :raise AssertionError: If input string does not contain a list.\n \"\"\"\n assert string[0] == \"[\" and string[-1] == \"]\", \"String is not a list.\"\n return [int(num) for num in string[1:-1].split(\",\")]\ndef string_to_set(string: str) -> Set[int]:\n \"\"\"\n Helper function to convert a list encoded inside a string into a Python\n set object of integer elements.\n :param string: Input string containing a list.", + "type": "code", + "location": "/examples/set_intersection/utils.py:1-36" + }, + "327": { + "file_id": 13, + "content": "This code defines two helper functions: `string_to_list()` and `string_to_set()`. These functions are used to convert a list encoded in a string into a Python list or set object of integer elements. The `string_to_list()` function converts the input string into an integer list, while the `string_to_set()` function converts it into a set of integers. The assertion is raised if the input string does not contain a list.", + "type": "comment" + }, + "328": { + "file_id": 13, + "content": " :type string: str\n :return: Set of integer elements.\n :rtype: Set[int]\n :raise AssertionError: If input string does not contain a list.\n \"\"\"\n assert string[0] == \"[\" and string[-1] == \"]\", \"String is not a list.\"\n return {int(num) for num in string[1:-1].split(\",\")}\ndef test_set_intersection(state: Dict) -> bool:\n \"\"\"\n Function to test whether the final solution matches ground truth.\n :param state: Thought state that represents the final solution.\n :type state: Dict\n :return: Returns whether the solution matches the ground truth.\n :rtype: bool\n \"\"\"\n # convert string to list\n try:\n correct_list = string_to_list(state[\"result\"])\n sorted_list = sorted(string_to_list(state[\"current\"]))\n return sorted_list == correct_list\n except:\n return False\ndef num_errors(state: Dict) -> float:\n \"\"\"\n Function to locally count the number of errors that serves as a score.\n :param state: Thought state to be scored.\n :type state: Dict\n :return: Number of errors.", + "type": "code", + "location": "/examples/set_intersection/utils.py:37-72" + }, + "329": { + "file_id": 13, + "content": "Function `string_to_list` converts a string input into an integer set. Function `test_set_intersection` checks if the final solution matches the ground truth by converting the result and current states to lists, sorting them, and comparing. Finally, `num_errors` function calculates the number of errors in the given state as a score.", + "type": "comment" + }, + "330": { + "file_id": 13, + "content": " :rtype: float\n \"\"\"\n try:\n set1 = string_to_set(state[\"set1\"])\n set2 = string_to_set(state[\"set2\"])\n if \"subset\" in state and state[\"subset\"] != \"\" and state[\"subset\"] is not None:\n set2 = string_to_set(state[\"subset\"])\n common = sorted(list(set1 & set2))\n llm_solution = sorted(string_to_list(state[\"current\"]))\n num_errors = 0\n common_idx = 0\n llm_idx = 0\n while common_idx < len(common) and llm_idx < len(llm_solution):\n if common[common_idx] == llm_solution[llm_idx]:\n common_idx += 1\n llm_idx += 1\n elif common[common_idx] < llm_solution[llm_idx]:\n common_idx += 1\n num_errors += 1\n elif common[common_idx] > llm_solution[llm_idx]:\n llm_idx += 1\n num_errors += 1\n num_errors += len(common) - common_idx + len(llm_solution) - llm_idx\n return num_errors\n except:\n return 1000", + "type": "code", + "location": "/examples/set_intersection/utils.py:73-99" + }, + "331": { + "file_id": 13, + "content": "This function takes in two sets and a string, calculates the intersection of the sets and compares it with the sorted list from the string. If there is a mismatch between the common elements and the sorted list, it counts the number of errors. Returns the total number of errors found or 1000 if an exception occurs.", + "type": "comment" + }, + "332": { + "file_id": 14, + "content": "/examples/sorting/README.md", + "type": "filepath" + }, + "333": { + "file_id": 14, + "content": "The code directory contains various sorting algorithm examples for numbers 0-9 with implementations for IO, CoT, ToT, and GoT. It includes data files, Python scripts to execute use cases, and organizes results by name, approaches, day, and time. The plot.py file visualizes the results after modification.", + "type": "summary" + }, + "334": { + "file_id": 14, + "content": "# Sorting\nThe use case in this directory sorts the provided list of \nnumbers containing numbers from 0 to 9 (duplicates allowed). \nWe provide implementations of five different approaches for \n32, 64 and 128 elements:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT):\n - ToT: wider tree, meaning more branches per level\n - ToT2: tree with more levels, but fewer branches per level\n- Graph of Thoughts (GoT):\n - GoT: split into subarrays / sort / merge\n## Data\nWe provide input files with 100 precomputed samples for each list\nlength: `sorting_.csv`.\n## Execution\nThe files to execute the use case are called\n`sorting_.py`. In the main body, one can select the\nspecific samples to be run (variable sample) and the approaches\n(variable approaches). It is also possible to set a budget in dollars\n(variable budget).\nThe input filename for the samples is currently hardcoded to\n`sorting_.csv`, but can be updated in the function\n`run`.\nThe Python scripts will create the directory `result`, if it is not", + "type": "code", + "location": "/examples/sorting/README.md:1-31" + }, + "335": { + "file_id": 14, + "content": "This code directory contains examples of sorting algorithms for lists of numbers from 0 to 9. Implementations are provided for IO, Chain-of-Thought (CoT), Tree of Thought (ToT) with two variations, and Graph of Thoughts (GoT). Data includes input files with precomputed samples, and Python scripts execute the use case with options to select samples and approaches.", + "type": "comment" + }, + "336": { + "file_id": 14, + "content": "already present. In the 'result' directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are\ncreated. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 171 of `plot.py` and update the\nlength parameter in the subsequent line and run `python3 plot.py` to\nplot your data.", + "type": "code", + "location": "/examples/sorting/README.md:32-46" + }, + "337": { + "file_id": 14, + "content": "Code organizes results into separate directories for each run based on the name of LLM, list of approaches, day and start time. Inside these execution-specific directories, config.json contains the configuration, log.log has prompts & responses, and approach directories store GRS files for every sample. Plot data can be visualized by modifying the results directory in plot.py and running python3 plot.py.", + "type": "comment" + }, + "338": { + "file_id": 15, + "content": "/examples/sorting/plot.py", + "type": "filepath" + }, + "339": { + "file_id": 15, + "content": "The code reads and sorts JSON data, calculates scores for sorting algorithm performances, plots boxplots, customizes options, adjusts y-axis limits, adds annotations, saves as PDF, and calls function with GPT-3.5 parameters.", + "type": "summary" + }, + "340": { + "file_id": 15, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Robert Gerstenberger\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():", + "type": "code", + "location": "/examples/sorting/plot.py:1-29" + }, + "341": { + "file_id": 15, + "content": "This code reads a directory of JSON files, extracts their key and data, and stores them in a dictionary. It handles directories recursively and does not include non-JSON files or folders without .json files. This function may be used to collect and organize data from multiple sources.", + "type": "comment" + }, + "342": { + "file_id": 15, + "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]", + "type": "code", + "location": "/examples/sorting/plot.py:30-58" + }, + "343": { + "file_id": 15, + "content": "Code sorts results by \"key\" and returns them in a new dictionary. The sorted results are then processed to calculate scores for each method, including score, solution status, prompt tokens, completion tokens, and cost.", + "type": "comment" + }, + "344": { + "file_id": 15, + "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"got\"],\n model=\"GPT-3.5\",\n length=32,\n y_lower=0,\n cost_upper=0.0,\n display_solved=True,\n annotation_offset=0,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n scores_ordered = [\n [\n min(score, length)\n for score in results[method][\"scores\"]", + "type": "code", + "location": "/examples/sorting/plot.py:59-95" + }, + "345": { + "file_id": 15, + "content": "The code defines a function `get_plotting_data` that extracts and organizes data for plotting. It takes the base directory as input, retrieves complete results from it, then gets final scores. The final scores are organized into a dictionary called `results_plotting`, which contains scores, solved counts, and costs for each method. Another function, `plot_results`, is defined to handle the actual plotting of the data with customizable options. It extracts scores in the specified order, organizes them, and provides customizability such as display settings and annotations.", + "type": "comment" + }, + "346": { + "file_id": 15, + "content": " if score != 100 and score != 300\n ]\n for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n method_labels = [\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT\"]\n plt.yticks(fontsize=fig_fontsize)\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(method_labels, fontsize=fig_fontsize)\n y_upper = length\n range_increase = 1\n if display_solved:\n if length < 48:\n range_increase = 2\n elif length < 96:\n range_increase = 4\n else:\n range_increase = 8\n ax.set_ylim(y_lower, y_upper + range_increase)\n ax1_yticks = range(\n y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)", + "type": "code", + "location": "/examples/sorting/plot.py:96-131" + }, + "347": { + "file_id": 15, + "content": "This code creates a boxplot to visualize the scores of different methods, sets the ticks and labels, adjusts the y-axis limits based on length, and defines the y-lower limit as y_lower.", + "type": "comment" + }, + "348": { + "file_id": 15, + "content": " )\n ax.set_yticks(ax1_yticks)\n if display_left_ylabel:\n ax.set_ylabel(f\"#incorrectly sorted elements; the lower the better\")\n ax.set_title(f\"{length} elements\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n if cost_upper > 0:\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue", + "type": "code", + "location": "/examples/sorting/plot.py:132-163" + }, + "349": { + "file_id": 15, + "content": "Setting the y-tick positions and labels for ax2, setting the y-label for ax2 if display_right_ylabel is True, setting the title of the plot to length elements, setting the lower limit of the y-axis for ax2 if cost_upper > 0, adjusting the y-ticks' values for ax2 based on the number of ticks and the upper cost limit, and finally adding annotations for solved methods.", + "type": "comment" + }, + "350": { + "file_id": 15, + "content": " solved = results[method][\"solved\"]\n ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"sorting_{model}_{length}.pdf\", bbox_inches=\"tight\")\nplot_results(\n get_plotting_data(\"results/\"),\n length=32,\n display_solved=True,\n model=\"GPT-3.5\",\n display_left_ylabel=True,\n display_right_ylabel=True,\n)", + "type": "code", + "location": "/examples/sorting/plot.py:164-186" + }, + "351": { + "file_id": 15, + "content": "The code plots sorting algorithm performance data and displays the solved count for each method. It saves the plot as a PDF with the model name and length appended to its filename. The function is then called again with specific parameters, including GPT-3.5 as the model.", + "type": "comment" + }, + "352": { + "file_id": 16, + "content": "/examples/sorting/utils.py", + "type": "filepath" + }, + "353": { + "file_id": 16, + "content": "The code defines a function that converts string-encoded lists to Python integer lists and tests if the solution matches ground truth. A helper function checks sorted lists by comparing adjacent elements, returning error count as score; defaults to 300 in case of exception.", + "type": "summary" + }, + "354": { + "file_id": 16, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom typing import Dict, List\ndef string_to_list(string: str) -> List[int]:\n \"\"\"\n Helper function to convert a list encoded inside a string into a Python\n list object of string elements.\n :param string: Input string containing a list.\n :type string: str\n :return: List of string elements.\n :rtype: List[str]\n :raise AssertionError: If input string does not contain a list.\n \"\"\"\n assert string[0] == \"[\" and string[-1] == \"]\", \"String is not a list.\"\n return [int(num) for num in string[1:-1].split(\",\")]\ndef test_sorting(state: Dict) -> bool:\n \"\"\"\n Function to test whether the final solution matches ground truth.\n :param state: Thought state that represents the final solution.\n :type state: Dict\n :return: Returns whether the solution matches the ground truth.\n :rtype: bool", + "type": "code", + "location": "/examples/sorting/utils.py:1-35" + }, + "355": { + "file_id": 16, + "content": "This code defines a function to convert a list encoded inside a string into a Python list object of integer elements. It also contains a helper function that tests whether the final solution matches the ground truth, taking a thought state as input and returning a boolean result.", + "type": "comment" + }, + "356": { + "file_id": 16, + "content": " \"\"\"\n try:\n correct_list = sorted(string_to_list(state[\"original\"]))\n sorted_list = string_to_list(state[\"current\"])\n return sorted_list == correct_list\n except:\n return False\ndef num_errors(state: Dict) -> float:\n \"\"\"\n Function to locally count the number of errors that serves as a score.\n :param state: Thought state to be scored.\n :type state: Dict\n :return: Number of errors.\n :rtype: float\n \"\"\"\n try:\n unsorted_list = state[\"original\"]\n if (\n \"unsorted_sublist\" in state\n and state[\"unsorted_sublist\"] != \"\"\n and state[\"unsorted_sublist\"] is not None\n and len(state[\"unsorted_sublist\"]) < len(unsorted_list) - 5\n ):\n unsorted_list = state[\"unsorted_sublist\"]\n correct_list = sorted(string_to_list(unsorted_list))\n current_list = string_to_list(state[\"current\"])\n num_errors = 0\n for i in range(10):\n num_errors += abs(\n sum([1 for num in current_list if num == i])", + "type": "code", + "location": "/examples/sorting/utils.py:36-70" + }, + "357": { + "file_id": 16, + "content": "Function to check if a given list is correctly sorted. If not, returns the number of errors as score.", + "type": "comment" + }, + "358": { + "file_id": 16, + "content": " - sum([1 for num in correct_list if num == i])\n )\n num_errors += sum(\n [1 for num1, num2 in zip(current_list, current_list[1:]) if num1 > num2]\n )\n return num_errors\n except:\n return 300", + "type": "code", + "location": "/examples/sorting/utils.py:71-78" + }, + "359": { + "file_id": 16, + "content": "This code calculates the number of errors in a sorted list by comparing adjacent elements. It uses list comprehensions and built-in Python functions like zip() and sum(). If an exception occurs, it returns 300 as a default value for num_errors.", + "type": "comment" + }, + "360": { + "file_id": 17, + "content": "/graph_of_thoughts/controller/README.md", + "type": "filepath" + }, + "361": { + "file_id": 17, + "content": "The Controller class manages the execution of a graph of operations using an LLM and requires custom prompter, parser, GoO, and AbstractLanguageModel. The code initializes an instance with these parameters, runs the executor, and outputs the generated graph to file.", + "type": "summary" + }, + "362": { + "file_id": 17, + "content": "# Controller\nThe Controller class is responsible for traversing the Graph of Operations (GoO), which is a static structure that is constructed once, before the execution starts.\nGoO prescribes the execution plan of thought operations and the Controller invokes their execution, generating the Graph Reasoning State (GRS). \nIn order for a GoO to be executed, an instance of Large Language Model (LLM) must be supplied to the controller (along with other required objects).\nPlease refer to the [Language Models](../language_models/README.md) section for more information about LLMs. \nThe following section describes how to instantiate the Controller to run a defined GoO. \n## Controller Instantiation\n- Requires custom `Prompter`, `Parser`, as well as instantiated `GraphOfOperations` and `AbstractLanguageModel` - creation of these is described separately.\n- Prepare initial state (thought) as dictionary - this can be used in the initial prompts by the operations.\n```\nlm = ...create\ngraph_of_operations = ...create", + "type": "code", + "location": "/graph_of_thoughts/controller/README.md:1-16" + }, + "363": { + "file_id": 17, + "content": "The Controller class manages the execution of the Graph of Operations (GoO) using a Large Language Model (LLM). It requires custom Prompter and Parser, along with instantiated GraphOfOperations and AbstractLanguageModel. The initial state is represented as a dictionary for prompts in operations.", + "type": "comment" + }, + "364": { + "file_id": 17, + "content": "executor = controller.Controller(\n lm,\n graph_of_operations,\n ,\n ,\n ,\n)\nexecutor.run()\nexecutor.output_graph(\"path/to/output.json\")\n```\n- After the run the graph is written to an output file, which contains individual operations, their thoughts, information about scores and validity and total amount of used tokens / cost.", + "type": "code", + "location": "/graph_of_thoughts/controller/README.md:18-28" + }, + "365": { + "file_id": 17, + "content": "The code initializes an instance of the Controller class with necessary parameters, including a language model (lm), graph of operations, custom prompter and parser, and an initial state. It then runs the executor and writes the generated graph containing individual operations, thoughts, scores, validity, and token usage to an output file at the specified path.", + "type": "comment" + }, + "366": { + "file_id": 18, + "content": "/graph_of_thoughts/controller/__init__.py", + "type": "filepath" + }, + "367": { + "file_id": 18, + "content": "This line is importing the \"Controller\" class from the \"controller\" module, which is located in the same package directory. This likely means that this code is part of a larger application where different modules handle different aspects of the program's functionality, and the \"Controller\" class manages some specific part or feature of the app.", + "type": "summary" + }, + "368": { + "file_id": 18, + "content": "from .controller import Controller", + "type": "code", + "location": "/graph_of_thoughts/controller/__init__.py:1-1" + }, + "369": { + "file_id": 18, + "content": "This line is importing the \"Controller\" class from the \"controller\" module, which is located in the same package directory. This likely means that this code is part of a larger application where different modules handle different aspects of the program's functionality, and the \"Controller\" class manages some specific part or feature of the app.", + "type": "comment" + }, + "370": { + "file_id": 19, + "content": "/graph_of_thoughts/controller/controller.py", + "type": "filepath" + }, + "371": { + "file_id": 19, + "content": "The code manages the execution flow of a graph's operations using language models and classes for processing, serialization, and debugging, resulting in an organized list written to a JSON file.", + "type": "summary" + }, + "372": { + "file_id": 19, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport json\nimport logging\nfrom typing import List\nfrom graph_of_thoughts.language_models import AbstractLanguageModel\nfrom graph_of_thoughts.operations import GraphOfOperations, Thought\nfrom graph_of_thoughts.prompter import Prompter\nfrom graph_of_thoughts.parser import Parser\nclass Controller:\n \"\"\"\n Controller class to manage the execution flow of the Graph of Operations,\n generating the Graph Reasoning State.\n This involves language models, graph operations, prompting, and parsing.\n \"\"\"\n def __init__(\n self,\n lm: AbstractLanguageModel,\n graph: GraphOfOperations,\n prompter: Prompter,\n parser: Parser,\n problem_parameters: dict,\n ) -> None:\n \"\"\"\n Initialize the Controller instance with the language model,\n operations graph, prompter, parser, and problem parameters.", + "type": "code", + "location": "/graph_of_thoughts/controller/controller.py:1-35" + }, + "373": { + "file_id": 19, + "content": "This code defines a Controller class to manage the execution flow of the Graph of Operations, utilizing language models, graph operations, prompting, and parsing. The Controller is initialized with an AbstractLanguageModel, GraphOfOperations, Prompter, Parser, and problem parameters.", + "type": "comment" + }, + "374": { + "file_id": 19, + "content": " :param lm: An instance of the AbstractLanguageModel.\n :type lm: AbstractLanguageModel\n :param graph: The Graph of Operations to be executed.\n :type graph: OperationsGraph\n :param prompter: An instance of the Prompter class, used to generate prompts.\n :type prompter: Prompter\n :param parser: An instance of the Parser class, used to parse responses.\n :type parser: Parser\n :param problem_parameters: Initial parameters/state of the problem.\n :type problem_parameters: dict\n \"\"\"\n self.logger = logging.getLogger(self.__class__.__module__)\n self.lm = lm\n self.graph = graph\n self.prompter = prompter\n self.parser = parser\n self.problem_parameters = problem_parameters\n self.run_executed = False\n def run(self) -> None:\n \"\"\"\n Run the controller and execute the operations from the Graph of\n Operations based on their readiness.\n Ensures the program is in a valid state before execution.", + "type": "code", + "location": "/graph_of_thoughts/controller/controller.py:37-60" + }, + "375": { + "file_id": 19, + "content": "This function initializes a controller object with provided language model, graph of operations, prompter, parser, and problem parameters. It also sets the run_executed flag to False. The run method executes the operations from the Graph of Operations based on their readiness, ensuring the program is in a valid state before execution.", + "type": "comment" + }, + "376": { + "file_id": 19, + "content": " :raises AssertionError: If the Graph of Operation has no roots.\n :raises AssertionError: If the successor of an operation is not in the Graph of Operations.\n \"\"\"\n self.logger.debug(\"Checking that the program is in a valid state\")\n assert self.graph.roots is not None, \"The operations graph has no root\"\n self.logger.debug(\"The program is in a valid state\")\n execution_queue = [\n operation\n for operation in self.graph.operations\n if operation.can_be_executed()\n ]\n while len(execution_queue) > 0:\n current_operation = execution_queue.pop(0)\n self.logger.info(\"Executing operation %s\", current_operation.operation_type)\n current_operation.execute(\n self.lm, self.prompter, self.parser, **self.problem_parameters\n )\n self.logger.info(\"Operation %s executed\", current_operation.operation_type)\n for operation in current_operation.successors:\n assert (", + "type": "code", + "location": "/graph_of_thoughts/controller/controller.py:61-82" + }, + "377": { + "file_id": 19, + "content": "This code snippet is checking the validity of the program state and executing operations in a queue. It raises AssertionError if the Graph of Operations has no roots or if a successor operation is not found in the graph. The code logs debug messages for state checks, information messages for executed operations, and asserts to ensure proper execution order.", + "type": "comment" + }, + "378": { + "file_id": 19, + "content": " operation in self.graph.operations\n ), \"The successor of an operation is not in the operations graph\"\n if operation.can_be_executed():\n execution_queue.append(operation)\n self.logger.info(\"All operations executed\")\n self.run_executed = True\n def get_final_thoughts(self) -> List[List[Thought]]:\n \"\"\"\n Retrieve the final thoughts after all operations have been executed.\n :return: List of thoughts for each operation in the graph's leaves.\n :rtype: List[List[Thought]]\n :raises AssertionError: If the `run` method hasn't been executed yet.\n \"\"\"\n assert self.run_executed, \"The run method has not been executed\"\n return [operation.get_thoughts() for operation in self.graph.leaves]\n def output_graph(self, path: str) -> None:\n \"\"\"\n Serialize the state and results of the operations graph to a JSON file.\n :param path: The path to the output file.\n :type path: str", + "type": "code", + "location": "/graph_of_thoughts/controller/controller.py:83-106" + }, + "379": { + "file_id": 19, + "content": "Code snippet defines a class with methods to execute operations in a graph, retrieve final thoughts after execution, and serialize the graph state and results. The `run` method executes operations in the graph, checks if operation is in graph's operations, appends executable operations to an execution queue, logs information when all operations are executed, and sets `run_executed` flag to True. `get_final_thoughts` method retrieves final thoughts after execution of all operations by iterating through graph's leaves and getting thoughts from each operation. It raises AssertionError if the run method has not been executed yet. `output_graph` method serializes state and results of operations graph to a JSON file at specified path.", + "type": "comment" + }, + "380": { + "file_id": 19, + "content": " \"\"\"\n output = []\n for operation in self.graph.operations:\n operation_serialized = {\n \"operation\": operation.operation_type.name,\n \"thoughts\": [thought.state for thought in operation.get_thoughts()],\n }\n if any([thought.scored for thought in operation.get_thoughts()]):\n operation_serialized[\"scored\"] = [\n thought.scored for thought in operation.get_thoughts()\n ]\n operation_serialized[\"scores\"] = [\n thought.score for thought in operation.get_thoughts()\n ]\n if any([thought.validated for thought in operation.get_thoughts()]):\n operation_serialized[\"validated\"] = [\n thought.validated for thought in operation.get_thoughts()\n ]\n operation_serialized[\"validity\"] = [\n thought.valid for thought in operation.get_thoughts()\n ]\n if any(", + "type": "code", + "location": "/graph_of_thoughts/controller/controller.py:107-128" + }, + "381": { + "file_id": 19, + "content": "This code iterates through the operations in a graph, serializes each operation with its thoughts, and adds extra information if any thoughts have been scored, validated, or are invalid. This is used for generating an output list of serialized operations and associated data.", + "type": "comment" + }, + "382": { + "file_id": 19, + "content": " [\n thought.compared_to_ground_truth\n for thought in operation.get_thoughts()\n ]\n ):\n operation_serialized[\"compared_to_ground_truth\"] = [\n thought.compared_to_ground_truth\n for thought in operation.get_thoughts()\n ]\n operation_serialized[\"problem_solved\"] = [\n thought.solved for thought in operation.get_thoughts()\n ]\n output.append(operation_serialized)\n output.append(\n {\n \"prompt_tokens\": self.lm.prompt_tokens,\n \"completion_tokens\": self.lm.completion_tokens,\n \"cost\": self.lm.cost,\n }\n )\n with open(path, \"w\") as file:\n file.write(json.dumps(output, indent=2))", + "type": "code", + "location": "/graph_of_thoughts/controller/controller.py:129-152" + }, + "383": { + "file_id": 19, + "content": "This code iterates over the thoughts in each operation, compares them to ground truth, and determines if they were solved. The data is serialized and appended to a list, which is then written to a JSON file along with prompt, completion tokens, and cost information.", + "type": "comment" + }, + "384": { + "file_id": 20, + "content": "/graph_of_thoughts/language_models/README.md", + "type": "filepath" + }, + "385": { + "file_id": 20, + "content": "The Language Models module supports GPT-4/GPT-3.5 and Llama-2, with functionality for instantiating LLMs, adding new ones, and using OpenAI API features like pricing and response_token_cost. It is implemented in a base class for building language models that allows for querying and retrieving response texts.", + "type": "summary" + }, + "386": { + "file_id": 20, + "content": "# Language Models\nThe Language Models module is responsible for managing the large language models (LLMs) used by the Controller.\nCurrently, the framework supports the following LLMs:\n- GPT-4 / GPT-3.5 (Remote - OpenAI API)\n- Llama-2 (Local - HuggingFace Transformers) \nThe following sections describe how to instantiate individual LLMs and how to add new LLMs to the framework.\n## LLM Instantiation\n- Create a copy of `config_template.json` named `config.json`.\n- Fill configuration details based on the used model (below).\n### GPT-4 / GPT-3.5\n- Adjust predefined `chatgpt`, `chatgpt4` or create new configuration with an unique key.\n| Key | Value |", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:1-18" + }, + "387": { + "file_id": 20, + "content": "This code introduces the Language Models module and explains its purpose. It currently supports GPT-4/GPT-3.5 (Remote - OpenAI API) and Llama-2 (Local - HuggingFace Transformers). The following sections describe how to instantiate individual LLMs and add new ones to the framework. The LLM instantiation process involves creating a copy of `config_template.json`, filling in configuration details based on the used model, and adjusting predefined configurations or creating a new one with an unique key for GPT-4/GPT-3.5.", + "type": "comment" + }, + "388": { + "file_id": 20, + "content": "|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| model_id | Model name based on [OpenAI model overview](https://platform.openai.com/docs/models/overview). |\n| prompt_token_cost | Price per 1000 prompt tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance. ", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:19-21" + }, + "389": { + "file_id": 20, + "content": "This table maps model IDs to their respective OpenAI names and calculates prompt token costs based on OpenAI pricing, which is used for determining cumulative prices per language modeling (LLM) instance.", + "type": "comment" + }, + "390": { + "file_id": 20, + "content": " |\n| response_token_cost | Price per 1000 response tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance. |\n| temperature | Parameter of OpenAI models that controls randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 2.0, default is 1.0. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature). |\n| max_tokens | The maximum number of tokens to generate in the chat completion. Value ", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:21-24" + }, + "391": { + "file_id": 20, + "content": "The code defines 'response_token_cost', a variable representing the price per 1000 response tokens, which follows OpenAI's pricing. It also includes 'temperature', a parameter controlling randomness and creativity in responses. The value is between 0.0 and 2.0, defaulting to 1.0, with further details available in the OpenAI API reference. Lastly, 'max_tokens' sets the maximum number of tokens generated in chat completions.", + "type": "comment" + }, + "392": { + "file_id": 20, + "content": "depends on the maximum context size of the model specified in the [OpenAI model overview](https://platform.openai.com/docs/models/overview). More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-max_tokens). |\n| stop | String or array of strings specifying sequence of characters which if detected, stops further generation of tokens. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-stop). |\n| organization | Organization to use for the API requests (may be empty). |", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:24-26" + }, + "393": { + "file_id": 20, + "content": "This code defines three input parameters for the OpenAI API's chat creation endpoint: \"model\", \"stop\", and \"organization\". The model parameter specifies the language model to use, with its maximum context size determined by the OpenAI model overview. The stop parameter identifies a sequence of characters that halt further token generation, referencing the OpenAI API reference for more information. Lastly, organization is an optional field used for API requests, which can be left empty.", + "type": "comment" + }, + "394": { + "file_id": 20, + "content": "| api_key | Personal API key that will be used to access OpenAI API. |\n- Instantiate the language model based on the selected configuration key (predefined / custom).\n```\nlm = controller.ChatGPT(\n \"path/to/config.json\", \n model_name=\n)\n```\n### Llama-2\n- Requires local hardware to run inference and a HuggingFace account.\n- Adjust predefined `llama7b-hf`, `llama13b-hf`, `llama70b-hf` or create a new configuration with an unique key.\n| Key | Value |\n|---------------------|----------------", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:27-42" + }, + "395": { + "file_id": 20, + "content": "The code snippet is initializing a language model controller using the ChatGPT class. It takes in the path to a configuration file and a model name corresponding to the selected configuration key. The model can be predefined (llama7b-hf, llama13b-hf, llama70b-hf) or custom with a unique key.", + "type": "comment" + }, + "396": { + "file_id": 20, + "content": "-----------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| model_id | Specifies HuggingFace Llama 2 model identifier (`meta-llama/`). |\n| cache_dir | Local directory where model will be downloaded and accessed. |\n| prompt_token_cost | Price per 1000 prompt tokens (currently not used - local model = no cost). |\n| response_token_cost | Price per 1000 response tokens (currently not used - local model = no cost). |\n| temperature | Parameter ", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:42-47" + }, + "397": { + "file_id": 20, + "content": "This code block is defining the parameters for a language model, including the Llama 2 model identifier (`model_id`), the local directory where the model will be stored and accessed (`cache_dir`), the price per 1000 prompt tokens (`prompt_token_cost`), the price per 1000 response tokens (`response_token_cost`), and a parameter for temperature control. Note that currently, these costs are not used due to the local model being cost-free.", + "type": "comment" + }, + "398": { + "file_id": 20, + "content": "that controls randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 1.0, default is 0.6. |\n| top_k | Top-K sampling method described in [Transformers tutorial](https://huggingface.co/blog/how-to-generate). Default value is set to 10. |\n| max_tokens | The maximum number of tokens to generate in the chat completion. More tokens require more memory. |\n- Instantiate the language model based on the selected configuration key (predefined / custom).\n```\nlm = controller.Llama2HF(\n \"path/to/config.json\", \n model_name=\n)\n```\n- Request access to Llama-2 via the [Meta form](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) using the same email address as for the HuggingFace account.\n- After the access is granted, go to [HuggingFace Llama-2 model ca", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:47-59" + }, + "399": { + "file_id": 20, + "content": "The code initializes a language model (Llama2HF) with a specified configuration key, which determines the randomness and creativity of responses. It also sets top-K sampling method from Transformers tutorial and maximum tokens to generate in chat completion. Access to Llama-2 is requested via Meta form using the same email as HuggingFace account, then access HuggingFace Llama-2 model page.", + "type": "comment" + } +} \ No newline at end of file diff --git a/docs/data/4.json b/docs/data/4.json new file mode 100644 index 0000000..ff1ad66 --- /dev/null +++ b/docs/data/4.json @@ -0,0 +1,543 @@ +{ + "400": { + "file_id": 20, + "content": "rd](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), log in and accept the license (_\"You have been granted access to this model\"_ message should appear).\n- Generate HuggingFace access token.\n- Log in from CLI with: `huggingface-cli login --token `.\nNote: 4-bit quantization is used to reduce the model size for inference. During instantiation, the model is downloaded from HuggingFace into the cache directory specified in the `config.json`. Running queries using larger models will require multiple GPUs (splitting across many GPUs is done automatically by the Transformers library).\n## Adding LLMs\nMore LLMs can be added by following these steps:\n- Create new class as a subclass of `AbstractLanguageModel`.\n- Use the constructor for loading configuration and instantiating the language model (if needed). \n```\nclass CustomLanguageModel(AbstractLanguageModel):\n def __init__(\n self,\n config_path: str = \"\",\n model_name: str = \"llama7b-hf\",\n cache: bool = False\n ) -> None:", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:59-76" + }, + "401": { + "file_id": 20, + "content": "This code provides instructions for adding a new LLM (Language Language Model) to the existing model. To do so, create a subclass of `AbstractLanguageModel` and use the constructor to load configuration and instantiate the language model if needed. The model is downloaded from HuggingFace into the cache directory specified in the config.json. Running queries with larger models may require multiple GPUs, which will be automatically split by the Transformers library.", + "type": "comment" + }, + "402": { + "file_id": 20, + "content": " super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # Load data from configuration into variables if needed\n # Instantiate LLM if needed\n```\n- Implement `query` abstract method that is used to get a list of responses from the LLM (call to remote API or local model inference).\n```\ndef query(self, query: str, num_responses: int = 1) -> Any:\n # Support caching \n # Call LLM and retrieve list of responses - based on num_responses \n # Return LLM response structure (not only raw strings) \n```\n- Implement `get_response_texts` abstract method that is used to get a list of raw texts from the LLM response structure produced by `query`.\n```\ndef get_response_texts(self, query_response: Union[List[Dict], Dict]) -> List[str]:\n # Retrieve list of raw strings from the LLM response structure \n```", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:77-95" + }, + "403": { + "file_id": 20, + "content": "The code is a part of a class that serves as a base for building language models. It loads configuration and initializes the model. The `query` method calls the LLM to get responses based on a query, while `get_response_texts` retrieves raw texts from the response structure produced by `query`. These methods are abstract and need to be implemented in child classes.", + "type": "comment" + }, + "404": { + "file_id": 21, + "content": "/graph_of_thoughts/language_models/__init__.py", + "type": "filepath" + }, + "405": { + "file_id": 21, + "content": "This code imports the necessary classes (AbstractLanguageModel, ChatGPT, and Llama2HF) from their respective submodules in the language_models package.", + "type": "summary" + }, + "406": { + "file_id": 21, + "content": "from .abstract_language_model import AbstractLanguageModel\nfrom .chatgpt import ChatGPT\nfrom .llamachat_hf import Llama2HF", + "type": "code", + "location": "/graph_of_thoughts/language_models/__init__.py:1-3" + }, + "407": { + "file_id": 21, + "content": "This code imports the necessary classes (AbstractLanguageModel, ChatGPT, and Llama2HF) from their respective submodules in the language_models package.", + "type": "comment" + }, + "408": { + "file_id": 22, + "content": "/graph_of_thoughts/language_models/abstract_language_model.py", + "type": "filepath" + }, + "409": { + "file_id": 22, + "content": "This code defines an AbstractLanguageModel class with config file path, model name, and caching options for language models. It also includes two abstract methods: 'query' and 'get_response_texts', serving as placeholders for derived classes to implement their own functionality.", + "type": "summary" + }, + "410": { + "file_id": 22, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom abc import ABC, abstractmethod\nfrom typing import List, Dict, Union, Any\nimport json\nimport os\nimport logging\nclass AbstractLanguageModel(ABC):\n \"\"\"\n Abstract base class that defines the interface for all language models.\n \"\"\"\n def __init__(\n self, config_path: str = \"\", model_name: str = \"\", cache: bool = False\n ) -> None:\n \"\"\"\n Initialize the AbstractLanguageModel instance with configuration, model details, and caching options.\n :param config_path: Path to the config file. Defaults to \"\".\n :type config_path: str\n :param model_name: Name of the language model. Defaults to \"\".\n :type model_name: str\n :param cache: Flag to determine whether to cache responses. Defaults to False.\n :type cache: bool\n \"\"\"\n self.logger = logging.getLogger(self.__class__.__name__)", + "type": "code", + "location": "/graph_of_thoughts/language_models/abstract_language_model.py:1-34" + }, + "411": { + "file_id": 22, + "content": "This code snippet defines an abstract base class, AbstractLanguageModel, for language models with config file path, model name, and caching options in the initializer. It also initializes a logger for logging purposes.", + "type": "comment" + }, + "412": { + "file_id": 22, + "content": " self.config: Dict = None\n self.model_name: str = model_name\n self.cache = cache\n if self.cache:\n self.respone_cache: Dict[str, List[Any]] = {}\n self.load_config(config_path)\n self.prompt_tokens: int = 0\n self.completion_tokens: int = 0\n self.cost: float = 0.0\n def load_config(self, path: str) -> None:\n \"\"\"\n Load configuration from a specified path.\n :param path: Path to the config file. If an empty path provided,\n default is `config.json` in the current directory.\n :type path: str\n \"\"\"\n if path == \"\":\n current_dir = os.path.dirname(os.path.abspath(__file__))\n path = os.path.join(current_dir, \"config.json\")\n with open(path, \"r\") as f:\n self.config = json.load(f)\n self.logger.debug(f\"Loaded config from {path} for {self.model_name}\")\n def clear_cache(self) -> None:\n \"\"\"\n Clear the response cache.\n \"\"\"\n self.respone_cache.clear()", + "type": "code", + "location": "/graph_of_thoughts/language_models/abstract_language_model.py:35-66" + }, + "413": { + "file_id": 22, + "content": "This code initializes an abstract language model object with optional cache and loads its configuration from a specified file. It also provides methods to clear the response cache.", + "type": "comment" + }, + "414": { + "file_id": 22, + "content": " @abstractmethod\n def query(self, query: str, num_responses: int = 1) -> Any:\n \"\"\"\n Abstract method to query the language model.\n :param query: The query to be posed to the language model.\n :type query: str\n :param num_responses: The number of desired responses.\n :type num_responses: int\n :return: The language model's response(s).\n :rtype: Any\n \"\"\"\n pass\n @abstractmethod\n def get_response_texts(self, query_responses: Union[List[Any], Any]) -> List[str]:\n \"\"\"\n Abstract method to extract response texts from the language model's response(s).\n :param query_responses: The responses returned from the language model.\n :type query_responses: Union[List[Any], Any]\n :return: List of textual responses.\n :rtype: List[str]\n \"\"\"\n pass", + "type": "code", + "location": "/graph_of_thoughts/language_models/abstract_language_model.py:68-92" + }, + "415": { + "file_id": 22, + "content": "This code defines two abstract methods for a language model. The 'query' method takes a query and the desired number of responses, but doesn't specify what it should do with them. The 'get_response_texts' method expects response(s) from the language model, but doesn't clarify how to extract textual data. It serves as a placeholder for derived classes to implement their own functionality.", + "type": "comment" + }, + "416": { + "file_id": 23, + "content": "/graph_of_thoughts/language_models/chatgpt.py", + "type": "filepath" + }, + "417": { + "file_id": 23, + "content": "The code creates a ChatGPT class that inherits from AbstractLanguageModel, initializes with configuration and model details, sets query parameters, supports multiple responses, uses OpenAI's chat API, incorporates backoff and caching for optimization, logs response texts and costs, and utilizes `get_response_texts` to extract response strings.", + "type": "summary" + }, + "418": { + "file_id": 23, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport backoff\nimport os\nimport random\nimport time\nfrom typing import List, Dict, Union\nfrom openai import OpenAI, OpenAIError\nfrom openai.types.chat.chat_completion import ChatCompletion\nfrom .abstract_language_model import AbstractLanguageModel\nclass ChatGPT(AbstractLanguageModel):\n \"\"\"\n The ChatGPT class handles interactions with the OpenAI models using the provided configuration.\n Inherits from the AbstractLanguageModel and implements its abstract methods.\n \"\"\"\n def __init__(\n self, config_path: str = \"\", model_name: str = \"chatgpt\", cache: bool = False\n ) -> None:\n \"\"\"\n Initialize the ChatGPT instance with configuration, model details, and caching options.\n :param config_path: Path to the configuration file. Defaults to \"\".\n :type config_path: str\n ", + "type": "code", + "location": "/graph_of_thoughts/language_models/chatgpt.py:1-35" + }, + "419": { + "file_id": 23, + "content": "This code is the initialization of a class called ChatGPT. It inherits from AbstractLanguageModel and initializes with configuration, model details, and caching options. The config_path parameter is for the path to a configuration file and defaults to an empty string. The model_name parameter specifies the model to be used, defaulting to \"chatgpt\", and cache can be set to True or False for enabling or disabling caching respectively.", + "type": "comment" + }, + "420": { + "file_id": 23, + "content": " :param model_name: Name of the model, default is 'chatgpt'. Used to select the correct configuration.\n :type model_name: str\n :param cache: Flag to determine whether to cache responses. Defaults to False.\n :type cache: bool\n \"\"\"\n super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # The model_id is the id of the model that is used for chatgpt, i.e. gpt-4, gpt-3.5-turbo, etc.\n self.model_id: str = self.config[\"model_id\"]\n # The prompt_token_cost and response_token_cost are the costs for 1000 prompt tokens and 1000 response tokens respectively.\n self.prompt_token_cost: float = self.config[\"prompt_token_cost\"]\n self.response_token_cost: float = self.config[\"response_token_cost\"]\n # The temperature of a model is defined as the randomness of the model's output.\n self.temperature: float = self.config[\"temperature\"]\n # The maximum number of tokens to generate in the chat completion.", + "type": "code", + "location": "/graph_of_thoughts/language_models/chatgpt.py:35-49" + }, + "421": { + "file_id": 23, + "content": "The code initializes a model with a specified name and sets the cache flag. It retrieves the model ID, prompt token cost, response token cost, temperature, and maximum number of tokens for chat completion from the configuration file.", + "type": "comment" + }, + "422": { + "file_id": 23, + "content": " self.max_tokens: int = self.config[\"max_tokens\"]\n # The stop sequence is a sequence of tokens that the model will stop generating at (it will not generate the stop sequence).\n self.stop: Union[str, List[str]] = self.config[\"stop\"]\n # The account organization is the organization that is used for chatgpt.\n self.organization: str = self.config[\"organization\"]\n if self.organization == \"\":\n self.logger.warning(\"OPENAI_ORGANIZATION is not set\")\n self.api_key: str = os.getenv(\"OPENAI_API_KEY\", self.config[\"api_key\"])\n if self.api_key == \"\":\n raise ValueError(\"OPENAI_API_KEY is not set\")\n # Initialize the OpenAI Client\n self.client = OpenAI(api_key=self.api_key, organization=self.organization)\n def query(\n self, query: str, num_responses: int = 1\n ) -> Union[List[ChatCompletion], ChatCompletion]:\n \"\"\"\n Query the OpenAI model for responses.\n :param query: The query to be posed to the language model.", + "type": "code", + "location": "/graph_of_thoughts/language_models/chatgpt.py:50-69" + }, + "423": { + "file_id": 23, + "content": "This code initializes an instance of a language model and sets parameters such as maximum tokens, stop sequence, organization, API key, and initializes the OpenAI client. It also includes a query method to ask the language model for responses.", + "type": "comment" + }, + "424": { + "file_id": 23, + "content": " :type query: str\n :param num_responses: Number of desired responses, default is 1.\n :type num_responses: int\n :return: Response(s) from the OpenAI model.\n :rtype: Dict\n \"\"\"\n if self.cache and query in self.respone_cache:\n return self.respone_cache[query]\n if num_responses == 1:\n response = self.chat([{\"role\": \"user\", \"content\": query}], num_responses)\n else:\n response = []\n next_try = num_responses\n total_num_attempts = num_responses\n while num_responses > 0 and total_num_attempts > 0:\n try:\n assert next_try > 0\n res = self.chat([{\"role\": \"user\", \"content\": query}], next_try)\n response.append(res)\n num_responses -= next_try\n next_try = min(num_responses, next_try)\n except Exception as e:\n next_try = (next_try + 1) // 2\n self.logger.warning(", + "type": "code", + "location": "/graph_of_thoughts/language_models/chatgpt.py:70-94" + }, + "425": { + "file_id": 23, + "content": "The code defines a function that takes a query and the number of desired responses. If the query is in the cache, it returns the corresponding response(s). If not, it calls the OpenAI chat model to generate responses for the given query. It supports generating multiple responses by repeatedly calling the OpenAI model until the required number of responses are obtained or an exception occurs. The function also logs any warnings during the process.", + "type": "comment" + }, + "426": { + "file_id": 23, + "content": " f\"Error in chatgpt: {e}, trying again with {next_try} samples\"\n )\n time.sleep(random.randint(1, 3))\n total_num_attempts -= 1\n if self.cache:\n self.respone_cache[query] = response\n return response\n @backoff.on_exception(backoff.expo, OpenAIError, max_time=10, max_tries=6)\n def chat(self, messages: List[Dict], num_responses: int = 1) -> ChatCompletion:\n \"\"\"\n Send chat messages to the OpenAI model and retrieves the model's response.\n Implements backoff on OpenAI error.\n :param messages: A list of message dictionaries for the chat.\n :type messages: List[Dict]\n :param num_responses: Number of desired responses, default is 1.\n :type num_responses: int\n :return: The OpenAI model's response.\n :rtype: ChatCompletion\n \"\"\"\n response = self.client.chat.completions.create(\n model=self.model_id,\n messages=messages,", + "type": "code", + "location": "/graph_of_thoughts/language_models/chatgpt.py:95-119" + }, + "427": { + "file_id": 23, + "content": "This code is defining a class with a chat method that sends messages to the OpenAI model and retrieves the response. The method implements backoff on OpenAI error, allowing for multiple attempts if an error occurs. It also includes caching functionality to improve performance by storing previous responses in a cache.", + "type": "comment" + }, + "428": { + "file_id": 23, + "content": " temperature=self.temperature,\n max_tokens=self.max_tokens,\n n=num_responses,\n stop=self.stop,\n )\n self.prompt_tokens += response.usage.prompt_tokens\n self.completion_tokens += response.usage.completion_tokens\n prompt_tokens_k = float(self.prompt_tokens) / 1000.0\n completion_tokens_k = float(self.completion_tokens) / 1000.0\n self.cost = (\n self.prompt_token_cost * prompt_tokens_k\n + self.response_token_cost * completion_tokens_k\n )\n self.logger.info(\n f\"This is the response from chatgpt: {response}\"\n f\"\\nThis is the cost of the response: {self.cost}\"\n )\n return response\n def get_response_texts(\n self, query_response: Union[List[ChatCompletion], ChatCompletion]\n ) -> List[str]:\n \"\"\"\n Extract the response texts from the query response.\n :param query_response: The response dictionary (or list of dictionaries) from the OpenAI model.", + "type": "code", + "location": "/graph_of_thoughts/language_models/chatgpt.py:120-146" + }, + "429": { + "file_id": 23, + "content": "This code interacts with an OpenAI model, specifically the ChatGPT API. It takes a query as input and generates multiple responses using the API. The code keeps track of usage costs in terms of prompt and completion tokens, and logs the response text along with the cost for each generated response. The `get_response_texts` method extracts the response texts from the query response dictionary or list of dictionaries returned by the OpenAI model.", + "type": "comment" + }, + "430": { + "file_id": 23, + "content": " :type query_response: Union[List[ChatCompletion], ChatCompletion]\n :return: List of response strings.\n :rtype: List[str]\n \"\"\"\n if not isinstance(query_response, List):\n query_response = [query_response]\n return [\n choice.message.content\n for response in query_response\n for choice in response.choices\n ]", + "type": "code", + "location": "/graph_of_thoughts/language_models/chatgpt.py:147-157" + }, + "431": { + "file_id": 23, + "content": "This function converts a single ChatCompletion or list of them into a list of response strings by iterating over the choices within each completion and extracting their content.", + "type": "comment" + }, + "432": { + "file_id": 24, + "content": "/graph_of_thoughts/language_models/config_template.json", + "type": "filepath" + }, + "433": { + "file_id": 24, + "content": "The code provides a generic language model configuration template, including parameters for model ID, prompt and response token costs, temperature, max tokens, stop words, cache directory (\"/llama\"), and optional values (top-k=10). This is a user-specific config without API key or organization.", + "type": "summary" + }, + "434": { + "file_id": 24, + "content": "{\n \"chatgpt\" : {\n \"model_id\": \"gpt-3.5-turbo\",\n \"prompt_token_cost\": 0.0015,\n \"response_token_cost\": 0.002,\n \"temperature\": 1.0,\n \"max_tokens\": 1536,\n \"stop\": null,\n \"organization\": \"\",\n \"api_key\": \"\"\n },\n \"chatgpt4\" : {\n \"model_id\": \"gpt-4\",\n \"prompt_token_cost\": 0.03,\n \"response_token_cost\": 0.06,\n \"temperature\": 1.0,\n \"max_tokens\": 4096,\n \"stop\": null,\n \"organization\": \"\",\n \"api_key\": \"\"\n },\n \"llama7b-hf\" : {\n \"model_id\": \"Llama-2-7b-chat-hf\",\n \"cache_dir\": \"/llama\",\n \"prompt_token_cost\": 0.0,\n \"response_token_cost\": 0.0,\n \"temperature\": 0.6,\n \"top_k\": 10,\n \"max_tokens\": 4096\n },\n \"llama13b-hf\" : {\n \"model_id\": \"Llama-2-13b-chat-hf\",\n \"cache_dir\": \"/llama\",\n \"prompt_token_cost\": 0.0,\n \"response_token_cost\": 0.0,\n \"temperature\": 0.6,\n \"top_k\": 10,\n \"max_tokens\": 4096\n },\n \"llama70b-hf\" : {\n \"model_id\": \"Llama-2-70b-chat-hf\",", + "type": "code", + "location": "/graph_of_thoughts/language_models/config_template.json:1-41" + }, + "435": { + "file_id": 24, + "content": "This code appears to be a configuration template for language models, with each model (such as \"chatgpt\", \"chatgpt4\", \"llama7b-hf\", etc.) defined by its own set of parameters including the model ID, prompt and response token costs, temperature, max tokens, and optional stop words. The \"cache_dir\" parameter is specific to Llama models, suggesting these models require local caching. The absence of an API key and organization suggests that this is a generic template for user-specific configurations.", + "type": "comment" + }, + "436": { + "file_id": 24, + "content": " \"cache_dir\": \"/llama\",\n \"prompt_token_cost\": 0.0,\n \"response_token_cost\": 0.0,\n \"temperature\": 0.6,\n \"top_k\": 10,\n \"max_tokens\": 4096\n }\n}", + "type": "code", + "location": "/graph_of_thoughts/language_models/config_template.json:42-49" + }, + "437": { + "file_id": 24, + "content": "This code snippet contains a configuration template for a language model. It sets the cache directory path as \"/llama\", prompts and response tokens costs to 0, temperature at 0.6, top-k value as 10, and maximum generated token count as 4096.", + "type": "comment" + }, + "438": { + "file_id": 25, + "content": "/graph_of_thoughts/language_models/llamachat_hf.py", + "type": "filepath" + }, + "439": { + "file_id": 25, + "content": "The code initializes the LLaMA 2 model for text generation, sets up configurations and tokenizer, creates a pipeline, defines a method to generate responses by querying the model, formats responses into dictionaries, and extracts \"generated_text\" from multiple query response dictionaries.", + "type": "summary" + }, + "440": { + "file_id": 25, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Ales Kubicek\nimport os\nimport torch\nfrom typing import List, Dict, Union\nfrom .abstract_language_model import AbstractLanguageModel\nclass Llama2HF(AbstractLanguageModel):\n \"\"\"\n An interface to use LLaMA 2 models through the HuggingFace library.\n \"\"\"\n def __init__(\n self, config_path: str = \"\", model_name: str = \"llama7b-hf\", cache: bool = False\n ) -> None:\n \"\"\"\n Initialize an instance of the Llama2HF class with configuration, model details, and caching options.\n :param config_path: Path to the configuration file. Defaults to an empty string.\n :type config_path: str\n :param model_name: Specifies the name of the LLaMA model variant. Defaults to \"llama7b-hf\".\n Used to select the correct configuration.\n :type model_name: str\n :param cache: Flag to determine whether to cache responses. Defaults to False.", + "type": "code", + "location": "/graph_of_thoughts/language_models/llamachat_hf.py:1-31" + }, + "441": { + "file_id": 25, + "content": "The code imports necessary libraries, defines a class Llama2HF as an interface for using LLaMA 2 models through HuggingFace library, and initializes the class with configuration, model name, and caching options.", + "type": "comment" + }, + "442": { + "file_id": 25, + "content": " :type cache: bool\n \"\"\"\n super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # Detailed id of the used model.\n self.model_id: str = self.config[\"model_id\"]\n # Costs for 1000 tokens.\n self.prompt_token_cost: float = self.config[\"prompt_token_cost\"]\n self.response_token_cost: float = self.config[\"response_token_cost\"]\n # The temperature is defined as the randomness of the model's output.\n self.temperature: float = self.config[\"temperature\"]\n # Top K sampling.\n self.top_k: int = self.config[\"top_k\"]\n # The maximum number of tokens to generate in the chat completion.\n self.max_tokens: int = self.config[\"max_tokens\"]\n # Important: must be done before importing transformers\n os.environ[\"TRANSFORMERS_CACHE\"] = self.config[\"cache_dir\"]\n import transformers\n hf_model_id = f\"meta-llama/{self.model_id}\"\n model_config = transformers.AutoConfig.from_pretrained(hf_model_id)", + "type": "code", + "location": "/graph_of_thoughts/language_models/llamachat_hf.py:32-53" + }, + "443": { + "file_id": 25, + "content": "The code initializes a class and sets various attributes such as model_id, prompt and response token costs, temperature, top K sampling, and maximum tokens. It also sets the Transformers library cache environment variable before importing it to avoid conflicts with other caches.", + "type": "comment" + }, + "444": { + "file_id": 25, + "content": " bnb_config = transformers.BitsAndBytesConfig(\n load_in_4bit=True,\n bnb_4bit_quant_type=\"nf4\",\n bnb_4bit_use_double_quant=True,\n bnb_4bit_compute_dtype=torch.bfloat16,\n )\n self.tokenizer = transformers.AutoTokenizer.from_pretrained(hf_model_id)\n self.model = transformers.AutoModelForCausalLM.from_pretrained(\n hf_model_id,\n trust_remote_code=True,\n config=model_config,\n quantization_config=bnb_config,\n device_map=\"auto\",\n )\n self.model.eval()\n torch.no_grad()\n self.generate_text = transformers.pipeline(\n model=self.model, tokenizer=self.tokenizer, task=\"text-generation\"\n )\n def query(self, query: str, num_responses: int = 1) -> List[Dict]:\n \"\"\"\n Query the LLaMA 2 model for responses.\n :param query: The query to be posed to the language model.\n :type query: str\n :param num_responses: Number of desired responses, default is 1.", + "type": "code", + "location": "/graph_of_thoughts/language_models/llamachat_hf.py:54-82" + }, + "445": { + "file_id": 25, + "content": "The code initializes an LLaMA model for text generation, loads the tokenizer and model configurations, and creates a text generation pipeline. It also provides a function to query the model with a given input query and can generate multiple responses depending on the provided number of desired responses.", + "type": "comment" + }, + "446": { + "file_id": 25, + "content": " :type num_responses: int\n :return: Response(s) from the LLaMA 2 model.\n :rtype: List[Dict]\n \"\"\"\n if self.cache and query in self.respone_cache:\n return self.respone_cache[query]\n sequences = []\n query = f\"<>You are a helpful assistant. Always follow the intstructions precisely and output the response exactly in the requested format.<>\\n\\n[INST] {query} [/INST]\"\n for _ in range(num_responses):\n sequences.extend(\n self.generate_text(\n query,\n do_sample=True,\n top_k=self.top_k,\n num_return_sequences=1,\n eos_token_id=self.tokenizer.eos_token_id,\n max_length=self.max_tokens,\n )\n )\n response = [\n {\"generated_text\": sequence[\"generated_text\"][len(query) :].strip()}\n for sequence in sequences\n ]\n if self.cache:\n self.respone_cache[query] = response", + "type": "code", + "location": "/graph_of_thoughts/language_models/llamachat_hf.py:83-107" + }, + "447": { + "file_id": 25, + "content": "This code defines a method that generates responses from the LLaMA 2 language model. It first checks if the response is cached, then creates a query with system instructions and input. It generates multiple responses using the `generate_text` function, stores them in a list, and formats them into a response dictionary. Finally, it caches the response if necessary.", + "type": "comment" + }, + "448": { + "file_id": 25, + "content": " return response\n def get_response_texts(self, query_responses: List[Dict]) -> List[str]:\n \"\"\"\n Extract the response texts from the query response.\n :param query_responses: The response list of dictionaries generated from the `query` method.\n :type query_responses: List[Dict]\n :return: List of response strings.\n :rtype: List[str]\n \"\"\"\n return [query_response[\"generated_text\"] for query_response in query_responses]", + "type": "code", + "location": "/graph_of_thoughts/language_models/llamachat_hf.py:108-119" + }, + "449": { + "file_id": 25, + "content": "This function takes a list of query response dictionaries, extracts the \"generated_text\" key from each dictionary and returns a list of those extracted texts.", + "type": "comment" + }, + "450": { + "file_id": 26, + "content": "/graph_of_thoughts/operations/README.md", + "type": "filepath" + }, + "451": { + "file_id": 26, + "content": "The Operations module manages thought manipulation with language models and helper classes, including 'ValidateAndImprove' and 'Generate' operations, as well as three additional operations: **KeepValid**, **Selector**, and **GroundTruth** for thought processing systems.", + "type": "summary" + }, + "452": { + "file_id": 26, + "content": "# Operations\nThe Operations module contains operations to manipulate and process thoughts represented by the [Thought](thought.py) class. \nOperations interface with a language model and use other helper classes like [Prompter](../prompter/prompter.py) and [Parser](../parser/parser.py) for effective communication and extraction of results from the language model. \nThe [Graph of Operations](graph_of_operations.py) class is the main class of the module and is responsible for orchestrating the operations, defining their relationships and maintaining the state of the thought graph, also known as Graph Reasoning State.\n## Graph of Operations\nThe [GraphOfOperations](graph_of_operations.py) class facilitates the creation and management of a directed graph representing the sequence and interrelationships of operations on thoughts. Here’s how you can construct and work with the Graph of Operations:\n### Initialization\nCreating a new instance of GraphOfOperations:\n```python\nfrom graph_of_thoughts.operations import GraphOfOperations", + "type": "code", + "location": "/graph_of_thoughts/operations/README.md:1-14" + }, + "453": { + "file_id": 26, + "content": "This code snippet describes the Operations module, which contains operations for manipulating and processing thoughts represented by the Thought class. It uses a language model and helper classes like Prompter and Parser for communication and result extraction. The Graph of Operations is the main class that orchestrates operations and maintains thought graph state.", + "type": "comment" + }, + "454": { + "file_id": 26, + "content": "graph = GraphOfOperations()\n```\nUpon initialization, the graph will be empty with no operations, roots, or leaves.\n### Adding Operations\n**Append Operation:** You can append operations to the end of the graph using the append_operation method. This ensures that the operation becomes a successor to all current leaf operations in the graph.\n```python\nfrom graph_of_thoughts.operations import Generate\noperationA = Generate()\ngraph.append_operation(operationA)\n```\n**Add Operation with Relationships:** If you want to define specific relationships for an operation, use the add_operation method.\n```python\noperationB = Generate()\noperationB.predecessors.append(operationA)\ngraph.add_operation(operationB)\n```\nRemember to set up the predecessors (and optionally successors) for your operation before adding it to the graph.\n## Available Operations\nThe following operations are available in the module:\n**Score:** Collect all thoughts from preceding operations and score them either using the LLM or a custom scoring function.", + "type": "code", + "location": "/graph_of_thoughts/operations/README.md:16-40" + }, + "455": { + "file_id": 26, + "content": "The code initializes a GraphOfOperations object, which starts empty and can be used to add operations with relationships. Operations can be appended at the end or added while specifying their predecessors. Available operations include the Score operation for scoring thoughts using LLM or custom scoring functions.", + "type": "comment" + }, + "456": { + "file_id": 26, + "content": "- num_samples (Optional): The number of samples to use for scoring, defaults to 1.\n- combined_scoring (Optional): Whether to score all thoughts together in a single prompt or separately, defaults to False.\n- scoring_function (Optional): A function that takes in a list of thought states and returns a list of scores for each thought.\n**ValidateAndImprove:** For each thought, validate it and if it is invalid, improve it. \n- num_samples (Optional): The number of samples to use for validation, defaults to 1.\n- improve (Optional): Whether to improve the thought if it is invalid, defaults to True.\n- num_tries (Optional): The number of times to try improving the thought, before giving up, defaults to 3.\n- validate_function (Optional): A function that takes in a thought state and returns a boolean indicating whether the thought is valid.\n**Generate:** Generate new thoughts from the current thoughts. If no previous thoughts are available, the thoughts are initialized with the input to the [Controller](../controller/controller.py). ", + "type": "code", + "location": "/graph_of_thoughts/operations/README.md:41-51" + }, + "457": { + "file_id": 26, + "content": "This code describes several operations for a thought processing system. The 'ValidateAndImprove' operation validates each thought and attempts to improve it if invalid, while the 'Generate' operation generates new thoughts based on previous ones or initial input to the Controller. Optional parameters include number of samples, scoring function, validation function, and whether to improve or generate new thoughts.", + "type": "comment" + }, + "458": { + "file_id": 26, + "content": "- num_branches_prompt (Optional): Number of responses that each prompt should generate (passed to prompter). Defaults to 1.\n- num_branches_response (Optional): Number of responses the LLM should generate for each prompt. Defaults to 1.\n**Improve:** Improve the current thoughts. This operation is similar to the ValidateAndImprove operation, but it does not validate the thoughts and always tries to improve them. \n**Aggregate:** Aggregate the current thoughts into a single thought. This operation is useful when you want to combine multiple thoughts into a single thought. \n- num_responses (Optional): Number of responses to request from the LLM (generates multiple new thoughts). Defaults to 1.\n**KeepBestN:** Keep the best N thoughts from the preceding thoughts. Assumes that the thoughts are already scored and throws an error if they are not.\n- n: The number of thoughts to keep in order of score.\n- higher_is_better (Optional): Whether higher scores are better (True) or lower scores are better (False). Defaults to True.", + "type": "code", + "location": "/graph_of_thoughts/operations/README.md:52-62" + }, + "459": { + "file_id": 26, + "content": "This code snippet provides details about the available operations and their respective parameters for generating, aggregating, or filtering thoughts. It allows users to generate multiple responses, combine them into a single thought, or keep the best N thoughts based on scores. The code also includes default values for optional parameters to ease usage.", + "type": "comment" + }, + "460": { + "file_id": 26, + "content": "**KeepValid:** Keep only the valid thoughts from the preceding thoughts. Assumes that each thought has already been validated, if not, it will be considered valid.\n**Selector:** Select a number of thoughts from the preceding thoughts using a selection function. This is useful if subsequent operations should only be applied to a subset of the preceding thoughts.\n- selector: A function that takes in a list of thoughts and returns a list of thoughts to select.\n**GroundTruth**: Evaluates if the preceding/current thoughts solve the problem and equal the ground truth. This operation is useful for terminating the graph and checking if the final thoughts solve the problem, but is only useful if the ground truth is known.\n- ground_truth_evaluator: A function that takes in a thought state and returns a boolean indicating whether the thought solves the problem.", + "type": "code", + "location": "/graph_of_thoughts/operations/README.md:64-70" + }, + "461": { + "file_id": 26, + "content": "This code defines three operations: **KeepValid** retains valid thoughts, **Selector** selects a subset of thoughts using a selection function, and **GroundTruth** checks if the preceding/current thoughts solve the problem (requires known ground truth).", + "type": "comment" + }, + "462": { + "file_id": 27, + "content": "/graph_of_thoughts/operations/__init__.py", + "type": "filepath" + }, + "463": { + "file_id": 27, + "content": "This code imports classes from different modules within the \"graph-of-thoughts\" package to be used in operations. It includes classes for Thought, GraphOfOperations, Operation, Score, ValidateAndImprove, Generate, Aggregate, KeepBestN, KeepValid, Selector, GroundTruth, and Improve.", + "type": "summary" + }, + "464": { + "file_id": 27, + "content": "from .thought import Thought\nfrom .graph_of_operations import GraphOfOperations\nfrom .operations import (\n Operation,\n Score,\n ValidateAndImprove,\n Generate,\n Aggregate,\n KeepBestN,\n KeepValid,\n Selector,\n GroundTruth,\n Improve,\n)", + "type": "code", + "location": "/graph_of_thoughts/operations/__init__.py:1-14" + }, + "465": { + "file_id": 27, + "content": "This code imports classes from different modules within the \"graph-of-thoughts\" package to be used in operations. It includes classes for Thought, GraphOfOperations, Operation, Score, ValidateAndImprove, Generate, Aggregate, KeepBestN, KeepValid, Selector, GroundTruth, and Improve.", + "type": "comment" + }, + "466": { + "file_id": 28, + "content": "/graph_of_thoughts/operations/graph_of_operations.py", + "type": "filepath" + }, + "467": { + "file_id": 28, + "content": "The Graph of Operations class manages operation execution plans, initializing with empty lists and providing a method to append operations. It iterates through predecessors, removing leaves and appending operations without successors.", + "type": "summary" + }, + "468": { + "file_id": 28, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom __future__ import annotations\nfrom typing import List\nfrom graph_of_thoughts.operations.operations import Operation\nclass GraphOfOperations:\n \"\"\"\n Represents the Graph of Operations, which prescribes the execution plan of thought operations.\n \"\"\"\n def __init__(self) -> None:\n \"\"\"\n Initializes a new Graph of Operations instance with empty operations, roots, and leaves.\n The roots are the entry points in the graph with no predecessors.\n The leaves are the exit points in the graph with no successors.\n \"\"\"\n self.operations: List[Operation] = []\n self.roots: List[Operation] = []\n self.leaves: List[Operation] = []\n def append_operation(self, operation: Operation) -> None:\n \"\"\"\n Appends an operation to all leaves in the graph and updates the relationships.", + "type": "code", + "location": "/graph_of_thoughts/operations/graph_of_operations.py:1-32" + }, + "469": { + "file_id": 28, + "content": "This code represents the Graph of Operations class, which is responsible for managing the execution plan of thought operations. It initializes with empty lists for operations, roots, and leaves, and provides a method to append an operation to all leaves in the graph while updating relationships.", + "type": "comment" + }, + "470": { + "file_id": 28, + "content": " :param operation: The operation to append.\n :type operation: Operation\n \"\"\"\n self.operations.append(operation)\n if len(self.roots) == 0:\n self.roots = [operation]\n else:\n for leave in self.leaves:\n leave.add_successor(operation)\n self.leaves = [operation]\n def add_operation(self, operation: Operation) -> None:\n \"\"\"\n Add an operation to the graph considering its predecessors and successors.\n Adjust roots and leaves based on the added operation's position within the graph.\n :param operation: The operation to add.\n :type operation: Operation\n \"\"\"\n self.operations.append(operation)\n if len(self.roots) == 0:\n self.roots = [operation]\n self.leaves = [operation]\n assert (\n len(operation.predecessors) == 0\n ), \"First operation should have no predecessors\"\n else:\n if len(operation.predecessors) == 0:\n self.roots.append(operation)", + "type": "code", + "location": "/graph_of_thoughts/operations/graph_of_operations.py:34-64" + }, + "471": { + "file_id": 28, + "content": "This code appends an operation to the graph and adjusts roots and leaves accordingly. If there are no roots, it sets the added operation as both root and leaf with no predecessors. If the added operation has no predecessors, it adds it as a new root.", + "type": "comment" + }, + "472": { + "file_id": 28, + "content": " for predecessor in operation.predecessors:\n if predecessor in self.leaves:\n self.leaves.remove(predecessor)\n if len(operation.successors) == 0:\n self.leaves.append(operation)", + "type": "code", + "location": "/graph_of_thoughts/operations/graph_of_operations.py:65-69" + }, + "473": { + "file_id": 28, + "content": "Iterates through predecessors of an operation, removes leaves if they are also operation's predecessors, appends the operation to the leaves list if it has no successors.", + "type": "comment" + }, + "474": { + "file_id": 29, + "content": "/graph_of_thoughts/operations/operations.py", + "type": "filepath" + }, + "475": { + "file_id": 29, + "content": "The comments describe operations that preserve valid thoughts from predecessors, with Comment A introducing an abstract base class for Graph of Thoughts operations and Comment B focusing on the GroundTruth operation in a code context.", + "type": "summary" + }, + "476": { + "file_id": 29, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom __future__ import annotations\nimport logging\nfrom enum import Enum\nfrom typing import List, Iterator, Dict, Callable, Union\nfrom abc import ABC, abstractmethod\nimport itertools\nfrom graph_of_thoughts.operations.thought import Thought\nfrom graph_of_thoughts.language_models import AbstractLanguageModel\nfrom graph_of_thoughts.prompter import Prompter\nfrom graph_of_thoughts.parser import Parser\nclass OperationType(Enum):\n \"\"\"\n Enum to represent different operation types that can be used as unique identifiers.\n \"\"\"\n score: int = 0\n validate_and_improve: int = 1\n generate: int = 2\n improve: int = 3\n aggregate: int = 4\n keep_best_n: int = 5\n keep_valid: int = 6\n ground_truth_evaluator: int = 7\n selector: int = 8\nclass Operation(ABC):\n \"\"\"\n Abstract base class that defines the interface for all operations.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:1-40" + }, + "477": { + "file_id": 29, + "content": "This code defines an abstract base class for operations in the Graph of Thoughts system. It includes an OperationType Enum representing unique operation identifiers and outlines the interface for all operations. This base class will be used to create concrete implementations of different types of operations within the system.", + "type": "comment" + }, + "478": { + "file_id": 29, + "content": " \"\"\"\n _ids: Iterator[int] = itertools.count(0)\n operation_type: OperationType = None\n def __init__(self) -> None:\n \"\"\"\n Initializes a new Operation instance with a unique id, and empty predecessors and successors.\n \"\"\"\n self.logger: logging.Logger = logging.getLogger(self.__class__.__name__)\n self.id: int = next(Operation._ids)\n self.predecessors: List[Operation] = []\n self.successors: List[Operation] = []\n self.executed: bool = False\n def can_be_executed(self) -> bool:\n \"\"\"\n Checks if the operation can be executed based on its predecessors.\n :return: True if all predecessors have been executed, False otherwise.\n :rtype: bool\n \"\"\"\n return all(predecessor.executed for predecessor in self.predecessors)\n def get_previous_thoughts(self) -> List[Thought]:\n \"\"\"\n Iterates over all predecessors and aggregates their thoughts.\n :return: A list of all thoughts from the predecessors.\n :rtype: List[Thought]", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:41-71" + }, + "479": { + "file_id": 29, + "content": "Initializes a new Operation instance with a unique ID and empty predecessors and successors. The operation can be executed if all its predecessors have been executed. Aggregates thoughts from predecessors to return all thoughts from them.", + "type": "comment" + }, + "480": { + "file_id": 29, + "content": " \"\"\"\n previous_thoughts: List[Thought] = [\n thought\n for predecessor in self.predecessors\n for thought in predecessor.get_thoughts()\n ]\n return previous_thoughts\n def add_predecessor(self, operation: Operation) -> None:\n \"\"\"\n Add a preceding operation and update the relationships.\n :param operation: The operation to be set as a predecessor.\n :type operation: Operation\n \"\"\"\n self.predecessors.append(operation)\n operation.successors.append(self)\n def add_successor(self, operation: Operation) -> None:\n \"\"\"\n Add a succeeding operation and update the relationships.\n :param operation: The operation to be set as a successor.\n :type operation: Operation\n \"\"\"\n self.successors.append(operation)\n operation.predecessors.append(self)\n def execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Execute the operation, assuring that all predecessors have been executed.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:72-105" + }, + "481": { + "file_id": 29, + "content": "This code defines an Operation class with methods to add predecessors and successors, ensuring proper relationships are updated. The execute method executes the operation after all predecessors have been executed.", + "type": "comment" + }, + "482": { + "file_id": 29, + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If not all predecessors have been executed.\n \"\"\"\n assert self.can_be_executed(), \"Not all predecessors have been executed\"\n self.logger.info(\n \"Executing operation %d of type %s\", self.id, self.operation_type\n )\n self._execute(lm, prompter, parser, **kwargs)\n self.logger.debug(\"Operation %d executed\", self.id)\n self.executed = True\n @abstractmethod\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Abstract method for the actual execution of the operation.\n This should be implemented in derived classes.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:107-130" + }, + "483": { + "file_id": 29, + "content": "The code defines a class with an abstract method for executing operations, requiring a language model (AbstractLanguageModel), prompter (Prompter), and parser (Parser). The class checks if all predecessors have been executed before execution, logs information during execution, marks itself as executed upon completion.", + "type": "comment" + }, + "484": { + "file_id": 29, + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n \"\"\"\n pass\n @abstractmethod\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Abstract method to retrieve the thoughts associated with the operation.\n This should be implemented in derived classes.\n :return: List of associated thoughts.\n :rtype: List[Thought]\n \"\"\"\n pass\nclass Score(Operation):\n \"\"\"\n Operation to score thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.score\n def __init__(\n self,\n num_samples: int = 1,\n combined_scoring: bool = False,\n scoring_function: Callable[\n [Union[List[Dict], Dict]], Union[List[float], float]\n ] = None,\n ) -> None:", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:132-168" + }, + "485": { + "file_id": 29, + "content": "This code defines an abstract class \"Operation\" with a method to get associated thoughts and a concrete class \"Score\" that inherits from it. The Score class takes parameters like num_samples, combined_scoring, and scoring_function for scoring thoughts. The get_thoughts method must be implemented in derived classes.", + "type": "comment" + }, + "486": { + "file_id": 29, + "content": " \"\"\"\n Initializes a new Score operation.\n :param num_samples: Number of samples to use for scoring. Defaults to 1.\n :type num_samples: int\n :param combined_scoring: Whether to score all thoughts together or individually. Defaults to False.\n :type combined_scoring: bool\n :param scoring_function: A function to score thoughts (if not using LM). Defaults to None.\n :type scoring_function: Takes a list of thought states or a single thought state and\n returns a list of scores or a single score.\n \"\"\"\n super().__init__()\n self.num_samples: int = num_samples\n self.combined_scoring: bool = combined_scoring\n self.thoughts: List[Thought] = []\n self.scoring_function: Callable[\n [Union[List[Dict], Dict]], Union[List[float], float]\n ] = scoring_function\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of scored thoughts.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:169-192" + }, + "487": { + "file_id": 29, + "content": "This code defines a class for a Score operation that takes a specified number of samples, whether to score thoughts individually or combined, and a scoring function (defaulting to None). It initializes the operation with these parameters and returns the associated scored thoughts.", + "type": "comment" + }, + "488": { + "file_id": 29, + "content": " :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the scoring operation by scoring the thoughts from the predecessors.\n If combined scoring is used, the thoughts are scored together, otherwise individually.\n If a scoring function is provided, it is used, otherwise the LM is prompted.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert (\n len(self.predecessors) > 0\n ), \"Score operation needs at least one predecessor\"", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:193-218" + }, + "489": { + "file_id": 29, + "content": "This code defines a method that executes a scoring operation on thoughts from predecessors. It first gets the previous thoughts and asserts that there is at least one predecessor. If combined scoring is used, it scores the thoughts together; otherwise, individually. The language model (LM) and prompter are used for prompting if a scoring function is not provided.", + "type": "comment" + }, + "490": { + "file_id": 29, + "content": " if self.combined_scoring:\n previous_thoughts_states = [thought.state for thought in previous_thoughts]\n if self.scoring_function is not None:\n self.logger.debug(\n \"Using scoring function %s to score states\", self.scoring_function\n )\n scores = self.scoring_function(previous_thoughts_states)\n else:\n prompt = prompter.score_prompt(previous_thoughts_states)\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n scores = parser.parse_score_answer(previous_thoughts_states, responses)\n for thought, score in zip(previous_thoughts, scores):\n new_thought = Thought.from_thought(thought)\n new_thought.score = score\n self.thoughts.append(new_thought)", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:220-239" + }, + "491": { + "file_id": 29, + "content": "This code calculates scores for each previous thought using either a scoring function or by generating prompts from the thoughts and querying a language model. The scores are then assigned to the respective thoughts, and new Thought objects are created with the updated scores before being added to the thoughts list.", + "type": "comment" + }, + "492": { + "file_id": 29, + "content": " else:\n for thought in previous_thoughts:\n new_thought = Thought.from_thought(thought)\n if self.scoring_function is not None:\n self.logger.debug(\n \"Using scoring function %s to score state\",\n self.scoring_function,\n )\n score = self.scoring_function(thought.state)\n else:\n prompt = prompter.score_prompt([thought.state])\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n score = parser.parse_score_answer([thought.state], responses)[0]\n new_thought.score = score\n self.thoughts.append(new_thought)\n self.logger.info(\n \"Score operation %d scored %d thoughts\",", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:240-263" + }, + "493": { + "file_id": 29, + "content": "This code handles scoring thoughts based on whether a scoring function is defined or not. If the scoring function is not defined, it prompts a language model (LM) to generate responses for each thought state and uses a parser to calculate scores from the LM's responses. The new score is then assigned to the thought object, and the thought is appended to the thoughts list.", + "type": "comment" + }, + "494": { + "file_id": 29, + "content": " self.id,\n len(self.thoughts),\n )\nclass ValidateAndImprove(Operation):\n \"\"\"\n Operation to validate and improve thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.validate_and_improve\n def __init__(\n self,\n num_samples: int = 1,\n improve: bool = True,\n num_tries: int = 3,\n validate_function: Callable[[Dict], bool] = None,\n ) -> None:\n \"\"\"\n Initializes a new ValidateAndImprove operation.\n :param num_samples: Number of samples to use for validation. Defaults to 1.\n :type num_samples: int\n :param improve: Whether to improve the thought if it is not valid. Defaults to True.\n :type improve: bool\n :param num_tries: Number of tries to improve the thought before giving up. Defaults to 3.\n :type num_tries: int\n :param validate_function: A function to validate thoughts (if not using LM). Defaults to None.\n :type validate_function: Takes a thought state and returns a boolean.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:264-293" + }, + "495": { + "file_id": 29, + "content": "This code defines a class called `ValidateAndImprove` that extends the `Operation` class. It is designed to validate and improve thoughts, with parameters for number of samples, whether to improve if not valid, number of tries before giving up, and a function to validate thoughts (optional). The operation type is specified as \"validate_and_improve\".", + "type": "comment" + }, + "496": { + "file_id": 29, + "content": " \"\"\"\n super().__init__()\n self.num_samples: int = num_samples\n self.improve: bool = improve\n self.num_tries: int = num_tries\n self.validate_function: Callable[[Dict], bool] = validate_function\n self.thoughts: List[List[Thought]] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the list of final thoughts, after validation and improvement.\n :return: List of final validated and improved thoughts.\n :rtype: List[Thought]\n \"\"\"\n return [thought_list[-1] for thought_list in self.thoughts]\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the ValidateAndImprove operation by validating and improving the predecessors' thoughts.\n If a validation function is provided, it is used, otherwise the LM is prompted.\n If improvement is enabled, the LM is prompted to improve the thought, if it is not valid.\n :param lm: The language model to be used.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:294-319" + }, + "497": { + "file_id": 29, + "content": "This code defines a class called `ValidateAndImprove` with attributes for the number of samples, whether to validate and improve thoughts, the number of tries, and a function to validate the thoughts. It also has methods to get final validated and improved thoughts, and execute validation and improvement using a language model.", + "type": "comment" + }, + "498": { + "file_id": 29, + "content": " :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert (\n len(self.predecessors) > 0\n ), \"ValidateAndImprove operation needs at least one predecessor\"\n for thought in previous_thoughts:\n thought_list = []\n current_thought = Thought.from_thought(thought)\n current_try = 0\n while True:\n if self.validate_function is not None:\n self.logger.debug(\n \"Using validate function %s to score states\",\n self.validate_function,\n )\n valid = self.validate_function(current_thought.state)", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:320-344" + }, + "499": { + "file_id": 29, + "content": "This function gets the previous thoughts, checks that it has at least one predecessor, then iterates through the previous thoughts. It creates a new thought from each previous thought and enters a loop where it validates the current thought's state using a validate function.", + "type": "comment" + } +} \ No newline at end of file diff --git a/docs/data/5.json b/docs/data/5.json new file mode 100644 index 0000000..93446ec --- /dev/null +++ b/docs/data/5.json @@ -0,0 +1,545 @@ +{ + "500": { + "file_id": 29, + "content": " else:\n prompt = prompter.validation_prompt(**current_thought.state)\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n valid = parser.parse_validation_answer(\n current_thought.state, responses\n )\n current_thought.valid = valid\n thought_list.append(current_thought)\n if (\n not self.improve\n or current_thought.valid\n or current_try >= self.num_tries\n ):\n break\n improve_prompt = prompter.improve_prompt(**current_thought.state)\n self.logger.debug(\"Prompt for LM: %s\", improve_prompt)\n responses = lm.get_response_texts(", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:345-366" + }, + "501": { + "file_id": 29, + "content": "Code block retrieves a prompt from prompter, then uses it to get responses from a language model (LM). It validates the response, updates the current thought's validation status and adds it to the thought list. If conditions met, breaks out of the loop.", + "type": "comment" + }, + "502": { + "file_id": 29, + "content": " lm.query(improve_prompt, num_responses=1)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n state_update = parser.parse_improve_answer(\n current_thought.state, responses\n )\n current_thought = Thought({**current_thought.state, **state_update})\n current_try += 1\n self.thoughts.append(thought_list)\n self.logger.info(\n \"Validate and improve operation %d created %d valid thoughts from %d previous thoughts\",\n self.id,\n len(\n [\n thought_list[-1]\n for thought_list in self.thoughts\n if thought_list[-1].valid\n ]\n ),\n len(previous_thoughts),\n )\nclass Generate(Operation):\n \"\"\"\n Operation to generate thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.generate\n def __init__(\n self, num_branches_prompt: int = 1, num_branches_response: int = 1", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:367-399" + }, + "503": { + "file_id": 29, + "content": "This code defines an operation class \"Generate\" for generating thoughts using a language model (LM). It iteratively improves and validates each thought until it reaches the specified number of valid thoughts. Each thought is stored in the \"thoughts\" list. The \"Validate and improve\" operation creates new valid thoughts from previous invalid ones, appending them to the \"thoughts\" list.", + "type": "comment" + }, + "504": { + "file_id": 29, + "content": " ) -> None:\n \"\"\"\n Initializes a new Generate operation.\n :param num_branches_prompt: Number of responses that each prompt should generate (passed to prompter). Defaults to 1.\n :type num_branches_prompt: int\n :param num_branches_response: Number of responses the LM should generate for each prompt. Defaults to 1.\n :type num_branches_response: int\n \"\"\"\n super().__init__()\n self.num_branches_prompt: int = num_branches_prompt\n self.num_branches_response: int = num_branches_response\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of generated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Generate operation by generating thoughts from the predecessors.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:400-427" + }, + "505": { + "file_id": 29, + "content": "This code defines a class for generating thoughts, with parameters for the number of responses per prompt and the language model used. It initializes these parameters, stores generated thoughts in a list, and provides methods to retrieve them. The `_execute` method is responsible for generating thoughts using a language model, prompter, and parser.", + "type": "comment" + }, + "506": { + "file_id": 29, + "content": " The thoughts are generated by prompting the LM with the predecessors' thought states.\n If there are no predecessors, the kwargs are used as a base state.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n if len(previous_thoughts) == 0 and len(self.predecessors) > 0:\n return\n if len(previous_thoughts) == 0:\n # no predecessors, use kwargs as base state\n previous_thoughts = [Thought(state=kwargs)]\n for thought in previous_thoughts:\n base_state = thought.state\n prompt = prompter.generate_prompt(self.num_branches_prompt, **base_state)\n self.logger.debug(\"Prompt for LM: %s\", prompt)", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:428-451" + }, + "507": { + "file_id": 29, + "content": "This function generates thoughts by using a language model (LM) with the predecessor's thought states as prompts. If there are no predecessors, it uses kwargs as a base state to generate thoughts. It then parses and logs the generated prompt for the LM.", + "type": "comment" + }, + "508": { + "file_id": 29, + "content": " responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_branches_response)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n for new_state in parser.parse_generate_answer(base_state, responses):\n new_state = {**base_state, **new_state}\n self.thoughts.append(Thought(new_state))\n self.logger.debug(\n \"New thought %d created with state %s\",\n self.thoughts[-1].id,\n self.thoughts[-1].state,\n )\n if (\n len(self.thoughts)\n > self.num_branches_prompt\n * self.num_branches_response\n * len(previous_thoughts)\n and self.num_branches_prompt > 0\n ):\n self.logger.warning(\n \"Generate operation %d created more thoughts than expected\",\n self.id,\n )\n self.logger.info(\n \"Generate operation %d created %d new thoughts\", self.id, len(self.thoughts)", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:452-476" + }, + "509": { + "file_id": 29, + "content": "This code generates responses from a language model, parses them using a parser, and appends new thoughts to the thoughts list. If more thoughts are created than expected based on prompt and response numbers, a warning is logged.", + "type": "comment" + }, + "510": { + "file_id": 29, + "content": " )\nclass Improve(Operation):\n \"\"\"\n Operation to improve thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.improve\n def __init__(self) -> None:\n \"\"\"\n Initializes a new Improve operation.\n \"\"\"\n super().__init__()\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation after improvement.\n :return: List of improved thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Improve operation by improving the predecessors' thoughts.\n The thoughts are improved by prompting the LM with the predecessors' thought states.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:477-513" + }, + "511": { + "file_id": 29, + "content": "The code defines a class \"Improve\" which represents an operation to enhance thoughts. It initializes a new Improve operation and gets the associated thoughts after improvement. The \"_execute\" method executes the operation by improving the predecessor's thoughts using language model (LM) prompts.", + "type": "comment" + }, + "512": { + "file_id": 29, + "content": " :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert len(self.predecessors) > 0, \"Needs at least one predecessor\"\n for thought in previous_thoughts:\n improve_prompt = prompter.improve_prompt(**thought.state)\n self.logger.debug(\"Prompt for LM: %s\", improve_prompt)\n responses = lm.get_response_texts(lm.query(improve_prompt, num_responses=1))\n self.logger.debug(\"Responses from LM: %s\", responses)\n state_update = parser.parse_improve_answer(thought.state, responses)\n self.thoughts.append(Thought({**thought.state, **state_update}))\n self.logger.info(\n \"Improve operation %d improved %d thoughts\", self.id, len(self.thoughts)\n )\nclass Aggregate(Operation):\n \"\"\"", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:514-537" + }, + "513": { + "file_id": 29, + "content": "This code defines two classes: \"Improve\" and \"Aggregate\", which are subclasses of the \"Operation\" class. The \"Improve\" operation retrieves previous thoughts, improves their prompts using a prompter and language model (LM), gets response texts, parses the responses using a parser, and appends the updated thoughts to the list of thoughts for the current operation. The \"Aggregate\" operation also exists but has no implementation shown in this code snippet.", + "type": "comment" + }, + "514": { + "file_id": 29, + "content": " Operation to aggregate thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.aggregate\n def __init__(self, num_responses: int = 1) -> None:\n \"\"\"\n Initializes a new Aggregate operation.\n :param num_responses: Number of responses to use for aggregation. Defaults to 1.\n :type num_responses: int\n \"\"\"\n super().__init__()\n self.thoughts: List[Thought] = []\n self.num_responses: int = num_responses\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation after aggregation.\n :return: List of aggregated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Aggregate operation by aggregating the predecessors' thoughts.\n The thoughts are aggregated by prompting the LM with the predecessors' thought states.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:538-568" + }, + "515": { + "file_id": 29, + "content": "This code defines an Aggregate operation class that initializes a new Aggregate operation and gets the associated thoughts after aggregation. It also includes a method to execute the operation by prompting the language model with predecessors' thought states for aggregation.", + "type": "comment" + }, + "516": { + "file_id": 29, + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"Aggregate operation must have at least one predecessor\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n if len(previous_thoughts) == 0:\n return\n # applied in order of score\n base_state: Dict = {}\n for thought in sorted(previous_thoughts, key=lambda thought: thought.score):\n base_state = {**base_state, **thought.state}\n previous_thought_states = [thought.state for thought in previous_thoughts]\n prompt = prompter.aggregation_prompt(previous_thought_states)", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:570-594" + }, + "517": { + "file_id": 29, + "content": "This code is a part of an operation class in Python. It checks if the operation has at least one predecessor and retrieves the previous thoughts from it. Then, it sorts the previous thoughts based on their score and constructs a prompt for aggregation using the prompter. Finally, it stores the states of the previous thoughts.", + "type": "comment" + }, + "518": { + "file_id": 29, + "content": " self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_responses)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n parsed = parser.parse_aggregation_answer(previous_thought_states, responses)\n if isinstance(parsed, dict):\n parsed = [parsed]\n for new_state in parsed:\n self.thoughts.append(Thought({**base_state, **new_state}))\nclass KeepBestN(Operation):\n \"\"\"\n Operation to keep the best N thoughts from predecessors based on their score.\n \"\"\"\n operation_type: OperationType = OperationType.keep_best_n\n def __init__(self, n: int, higher_is_better: bool = True) -> None:\n \"\"\"\n Initializes a new KeepBestN operation.\n :param n: Maximum number of thoughts to keep.\n :type n: int\n :param higher_is_better: Whether higher scores are better. Defaults to True.\n :type higher_is_better: bool\n :raises AssertionError: If `n` is not greater than zero.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:596-627" + }, + "519": { + "file_id": 29, + "content": "The code defines a class `KeepBestN` that represents an operation to keep the best N thoughts from predecessors based on their score. The `__init__` method initializes a new `KeepBestN` object with the maximum number of thoughts to keep and whether higher scores are better.", + "type": "comment" + }, + "520": { + "file_id": 29, + "content": " \"\"\"\n super().__init__()\n self.n: int = n\n assert self.n > 0, \"KeepBestN operation must keep at least one thought\"\n self.higher_is_better: bool = higher_is_better\n self.thoughts: List[Thought] = []\n def get_best_n(self) -> List[Thought]:\n \"\"\"\n Returns the best N thoughts from the predecessors based on their score.\n :return: List of best N thoughts.\n :rtype: List[Thought]\n :raises AssertionError: If not all predecessors have been executed.\n :raises AssertionError: If not all thoughts have been scored.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert all(\n previous_thought.scored for previous_thought in previous_thoughts\n ), \"Not all thoughts have been scored\"\n try:\n return sorted(\n previous_thoughts,\n key=lambda thought: thought.score,\n reverse=self.higher_is_better,\n )[: self.n]\n except:", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:628-655" + }, + "521": { + "file_id": 29, + "content": "Class `KeepBestN` initializes its attributes and checks the minimum number of thoughts to keep, then provides a method `get_best_n()` that returns the top N thoughts based on their scores. It raises `AssertionError` if all predecessors haven't been executed or if not all thoughts have been scored.", + "type": "comment" + }, + "522": { + "file_id": 29, + "content": " self.logger.error(\"Error in KeepBestN operation\")\n self.logger.error(\n \"Previous operation: %s\", [op.id for op in self.predecessors]\n )\n self.logger.error(\"Previous thoughts: %s\", previous_thoughts)\n self.logger.error(\n \"Scores: %s\", [thought.score for thought in previous_thoughts]\n )\n return sorted(\n [i for i in previous_thoughts if isinstance(i.score, float)],\n key=lambda thought: thought.score,\n reverse=self.higher_is_better,\n )[: self.n]\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts kept by the operation.\n :return: List of kept thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the KeepBestN operation by keeping the best N thoughts from the predecessors according to their score.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:656-683" + }, + "523": { + "file_id": 29, + "content": "This code defines a `KeepBestN` operation that keeps the top N thoughts from predecessors based on their scores. It logs an error message with previous operation details and previous thoughts' scores, and returns the sorted list of thoughts. The class has methods to access kept thoughts and execute the operation using given language model, prompter, and parser.", + "type": "comment" + }, + "524": { + "file_id": 29, + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n :raises AssertionError: If not all predecessors have been executed.\n :raises AssertionError: If not all thoughts have been scored.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"KeepBestN operation must have at least one predecessor\"\n self.thoughts = [Thought.from_thought(thought) for thought in self.get_best_n()]\n for thought in self.thoughts:\n self.logger.debug(\n \"Thought %d with state %s kept\", thought.id, thought.state\n )\n self.logger.info(\n \"KeepBestN operation %d kept %d thoughts\", self.id, len(self.thoughts)", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:685-708" + }, + "525": { + "file_id": 29, + "content": "The code defines a function for the KeepBestN operation, which requires at least one predecessor, and raises AssertionError if any conditions are not met. It retrieves thoughts from predecessors and logs information about the kept thoughts.", + "type": "comment" + }, + "526": { + "file_id": 29, + "content": " )\nclass KeepValid(Operation):\n \"\"\"\n Operation to keep valid thoughts from predecessors.\n \"\"\"\n operation_type: OperationType = OperationType.keep_valid\n def __init__(self) -> None:\n \"\"\"\n Initializes a new KeepValid operation.\n \"\"\"\n super().__init__()\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts kept by the operation.\n :return: List of kept thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the KeepValid operation by keeping the valid thoughts from the predecessors.\n Keeps unvalidated thoughts as well.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:709-746" + }, + "527": { + "file_id": 29, + "content": "The `KeepValid` operation keeps valid thoughts from predecessors and returns them. It also preserves unvalidated thoughts. This class initializes a new KeepValid operation and provides methods for retrieving the kept thoughts and executing the operation using a language model, prompter, and parser.", + "type": "comment" + }, + "528": { + "file_id": 29, + "content": " :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"KeepValid operation must have at least one predecessor\"\n self.thoughts: List[Thought] = [\n Thought.from_thought(thought)\n for thought in self.get_previous_thoughts()\n if not thought.validated or thought.valid\n ]\n if any(not thought.validated for thought in self.thoughts):\n self.logger.warning(\n \"KeepValid operation %d has unvalidated thoughts\", self.id\n )\n for thought in self.thoughts:\n self.logger.debug(\n \"Thought %d with state %s kept\", thought.id, thought.state\n )\n self.logger.info(\n \"KeepValid operation %d kept %d thoughts\", self.id, len(self.thoughts)\n )\nclass GroundTruth(Operation):\n \"\"\"\n Operation to evaluate if thoughts correctly solve the problem, using a ground truth evaluator", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:747-778" + }, + "529": { + "file_id": 29, + "content": "The code defines two classes: \"KeepValid\" and \"GroundTruth\". The KeepValid class is an operation that requires at least one predecessor. It collects thoughts from previous operations (excluding those that are not valid or already valid) into a list called \"self.thoughts\". If there are any unvalidated thoughts, it logs a warning. Then, it logs debug and info messages for each thought in the list, including its ID and state, as well as the total number of thoughts kept. The GroundTruth class is an operation that uses a ground truth evaluator to assess if thoughts correctly solve the problem.", + "type": "comment" + }, + "530": { + "file_id": 29, + "content": " \"\"\"\n operation_type: OperationType = OperationType.ground_truth_evaluator\n def __init__(self, ground_truth_evaluator: Callable[[Dict], bool]) -> None:\n \"\"\"\n Initializes a new GroundTruth operation.\n :param ground_truth_evaluator: A function to evaluate if a thought solves the problem.\n :type ground_truth_evaluator: A function that takes a thought state and returns a boolean.\n \"\"\"\n super().__init__()\n self.ground_truth_evaluator: Callable[[Dict], bool] = ground_truth_evaluator\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of evaluated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the GroundTruth operation by evaluating the predecessors' thoughts using the ground truth evaluator function.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:779-807" + }, + "531": { + "file_id": 29, + "content": "This code defines a class for the GroundTruth operation, which initializes with a ground truth evaluator function. The operation evaluates predecessors' thoughts using this function and stores them in a list of thoughts. The get_thoughts method returns these evaluated thoughts.", + "type": "comment" + }, + "532": { + "file_id": 29, + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessor.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"GroundTruth operation must have at least one predecessor\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n for thought in previous_thoughts:\n new_thought = Thought.from_thought(thought)\n try:\n new_thought.solved = self.ground_truth_evaluator(new_thought.state)\n except:\n new_thought.solved = False\n self.thoughts.append(new_thought)\n self.logger.info(\n \"GroundTruth operation %d evaluated %d thoughts and %d solved the problem\",", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:809-833" + }, + "533": { + "file_id": 29, + "content": "This code is part of a class that implements the GroundTruth operation. It ensures that the operation has at least one predecessor and evaluates the thoughts generated by the previous operations. The evaluated thoughts are then added to the current operation's thoughts list, and their solved status is determined using the ground_truth_evaluator method. If any exceptions occur during the evaluation process, the solved status is set to False. Finally, an info message is logged indicating how many thoughts were evaluated and how many of them solved the problem.", + "type": "comment" + }, + "534": { + "file_id": 29, + "content": " self.id,\n len(self.thoughts),\n len([thought for thought in self.thoughts if thought.solved]),\n )\nclass Selector(Operation):\n \"\"\"\n Operation to select thoughts from predecessors.\n Useful for separating thoughts to perform different, subsequent operations on them.\n \"\"\"\n operation_type: OperationType = OperationType.selector\n def __init__(self, selector: Callable[[List[Thought]], List[Thought]]) -> None:\n \"\"\"\n Initializes a new Selector operation.\n :param selector: A function to select thoughts from the predecessors' thoughts.\n :type selector: A function that takes a list of thoughts and returns a list of thoughts.\n \"\"\"\n super().__init__()\n self.selector: Callable[[List[Thought]], List[Thought]] = selector\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts selected by the operation.\n :return: List of selected thoughts.\n :rtype: List[Thought]", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:834-864" + }, + "535": { + "file_id": 29, + "content": "This code defines a Selector operation for the Graph of Thoughts, which selects thoughts from predecessors to be used in subsequent operations. The constructor takes a selector function that accepts a list of thoughts and returns a list of selected thoughts. The get_thoughts method returns the thoughts selected by the operation.", + "type": "comment" + }, + "536": { + "file_id": 29, + "content": " \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Selector operation by selecting thoughts from the predecessors using the selector function.\n If the Selector has no predecessors, the selector function is called with a thought containing the kwargs as state.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n if len(previous_thoughts) == 0:\n previous_thoughts = [Thought(kwargs)]\n self.thoughts = [\n Thought.from_thought(thought)\n for thought in self.selector(previous_thoughts)", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:865-890" + }, + "537": { + "file_id": 29, + "content": "This code defines a Selector operation, which selects thoughts from predecessors using a provided selector function. If there are no predecessors, the function calls the selector with a thought containing the provided kwargs as state. The selected thoughts are then returned.", + "type": "comment" + }, + "538": { + "file_id": 29, + "content": " ]\n for thought in self.thoughts:\n self.logger.debug(\n \"Thought %d with state %s selected\", thought.id, thought.state\n )\n self.logger.info(\n \"Selector operation %d selected %d thoughts\", self.id, len(self.thoughts)\n )", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:891-900" + }, + "539": { + "file_id": 29, + "content": "This code segment is logging the selection of thoughts by a selector operation. It iterates over each thought in the self.thoughts list, and logs their ID and state. Finally, it logs the total number of thoughts selected by this operation.", + "type": "comment" + }, + "540": { + "file_id": 30, + "content": "/graph_of_thoughts/operations/thought.py", + "type": "filepath" + }, + "541": { + "file_id": 30, + "content": "The Thought class represents an LLM thought with attributes including state, score, validity flag, and solution flag. It includes methods for initializing new instances and cloning existing thoughts, as well as properties for validity, score, and solved flag management.", + "type": "summary" + }, + "542": { + "file_id": 30, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom __future__ import annotations\nimport logging\nfrom typing import Iterator, Dict, Optional\nimport itertools\nclass Thought:\n \"\"\"\n Represents an LLM thought with its state, constructed by the parser, and various flags.\n \"\"\"\n _ids: Iterator[int] = itertools.count(0)\n def __init__(self, state: Optional[Dict] = None) -> None:\n \"\"\"\n Initializes a new Thought instance with a state and various default flags.\n :param state: The state of the thought. Defaults to None.\n :type state: Optional[Dict]\n \"\"\"\n self.logger: logging.Logger = logging.getLogger(self.__class__.__name__)\n self.id: int = next(Thought._ids)\n self.state: Dict = state\n self._score: float = 0.0\n self._valid: bool = False\n self._solved: bool = False\n self.scored: bool = False", + "type": "code", + "location": "/graph_of_thoughts/operations/thought.py:1-35" + }, + "543": { + "file_id": 30, + "content": "This code defines a `Thought` class that represents an LLM thought with its state, constructed by the parser, and various flags. The class has instance attributes including a logger, unique ID, state, score, validity flag, solution flag, and a method to initialize a new Thought instance with a state and default flags if none provided.", + "type": "comment" + }, + "544": { + "file_id": 30, + "content": " self.validated: bool = False\n self.compared_to_ground_truth: bool = False\n @staticmethod\n def from_thought(thought: Thought) -> Thought:\n \"\"\"\n Creates a new thought from an existing one.\n :param thought: An instance of a Thought to clone.\n :return: A new Thought instance with properties copied from the input thought.\n \"\"\"\n new_thought = Thought(thought.state)\n new_thought.score = thought.score\n new_thought.valid = thought.valid\n new_thought.solved = thought.solved\n new_thought.scored = thought.scored\n new_thought.validated = thought.validated\n new_thought.compared_to_ground_truth = thought.compared_to_ground_truth\n return new_thought\n @property\n def valid(self) -> bool:\n \"\"\"\n Returns the validity of the thought.\n :return: The validity of the thought.\n :rtype: bool\n \"\"\"\n return self._valid\n @valid.setter\n def valid(self, valid: bool) -> None:\n \"\"\"", + "type": "code", + "location": "/graph_of_thoughts/operations/thought.py:36-68" + }, + "545": { + "file_id": 30, + "content": "This code defines a Thought class with properties like state, score, validity, solved status, scoring information, and comparison to ground truth. The class also has a static method `from_thought` to create a new thought from an existing one by cloning its properties. The `valid` property is a boolean representing the validity of the thought, which can be accessed using the `@property` decorator and modified with the `@valid.setter` decorator.", + "type": "comment" + }, + "546": { + "file_id": 30, + "content": " Sets the validity of the thought and the validated flag.\n :param valid: The validity of the thought.\n :type valid: bool\n \"\"\"\n self.validated = True\n self._valid = valid\n @property\n def score(self) -> float:\n \"\"\"\n Returns the score of the thought.\n :return: The score of the thought.\n :rtype: float\n \"\"\"\n return self._score\n @score.setter\n def score(self, new_score: float) -> None:\n \"\"\"\n Sets the score of the thought and the scored flag.\n :param new_score: The score of the thought.\n :type new_score: float\n \"\"\"\n self.scored = True\n self._score = new_score\n @property\n def solved(self) -> bool:\n \"\"\"\n Returns the solved flag of the thought.\n :return: The solved flag of the thought.\n :rtype: bool\n \"\"\"\n return self._solved\n @solved.setter\n def solved(self, solved: bool) -> None:\n \"\"\"\n Sets the solved flag of the thought and the compared_to_ground_truth flag.", + "type": "code", + "location": "/graph_of_thoughts/operations/thought.py:69-111" + }, + "547": { + "file_id": 30, + "content": "This code defines a Thought class with properties for validity, score, and solved flag. The valid property can be set and gets the validity of the thought. The score property returns and sets the score of the thought, marking it as scored when updated. The solved property returns and sets the solved flag of the thought, also marking it as compared_to_ground_truth when updated.", + "type": "comment" + }, + "548": { + "file_id": 30, + "content": " :param solved: Whether the thought contains a solution to the problem.\n :type solved: bool\n \"\"\"\n self.compared_to_ground_truth = True\n self._solved = solved", + "type": "code", + "location": "/graph_of_thoughts/operations/thought.py:113-117" + }, + "549": { + "file_id": 30, + "content": "Method defining a Thought object with a boolean parameter \"solved\" indicating if it contains a solution to the problem. The method sets self.compared_to_ground_truth to True and assigns the value of solved to self._solved.", + "type": "comment" + }, + "550": { + "file_id": 31, + "content": "/graph_of_thoughts/parser/__init__.py", + "type": "filepath" + }, + "551": { + "file_id": 31, + "content": "The code imports the Parser class from the \"parser\" module in the current package, allowing for easier usage and organization of related functions and classes.", + "type": "summary" + }, + "552": { + "file_id": 31, + "content": "from .parser import Parser", + "type": "code", + "location": "/graph_of_thoughts/parser/__init__.py:1-1" + }, + "553": { + "file_id": 31, + "content": "The code imports the Parser class from the \"parser\" module in the current package, allowing for easier usage and organization of related functions and classes.", + "type": "comment" + }, + "554": { + "file_id": 32, + "content": "/graph_of_thoughts/parser/parser.py", + "type": "filepath" + }, + "555": { + "file_id": 32, + "content": "The code includes an abstract class that defines three methods for parsing different language model responses, utilizing thought states and text inputs. The 'pass' statement serves as a placeholder or temporary measure.", + "type": "summary" + }, + "556": { + "file_id": 32, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main authors: Robert Gerstenberger, Nils Blach\nfrom __future__ import annotations\nfrom abc import ABC, abstractmethod\nfrom typing import Dict, List, Union\nclass Parser(ABC):\n \"\"\"\n Abstract base class that defines the interface for all parsers.\n Parsers are used to parse the responses from the language models.\n \"\"\"\n @abstractmethod\n def parse_aggregation_answer(\n self, states: List[Dict], texts: List[str]\n ) -> Union[Dict, List[Dict]]:\n \"\"\"\n Parse the response from the language model for a aggregation prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the response from the language model.", + "type": "code", + "location": "/graph_of_thoughts/parser/parser.py:1-31" + }, + "557": { + "file_id": 32, + "content": "Parser abstract class for language model responses parsing. Defines an interface to be implemented by subclasses. Used for aggregation prompts and takes thought states and language model responses as input, returning updated thought states after parsing the response.", + "type": "comment" + }, + "558": { + "file_id": 32, + "content": " :rtype: Union[Dict, List[Dict]]\n \"\"\"\n pass\n @abstractmethod\n def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict:\n \"\"\"\n Parse the response from the language model for an improve prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought state after parsing the response from the language model.\n :rtype: Dict\n \"\"\"\n pass\n @abstractmethod\n def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:\n \"\"\"\n Parse the response from the language model for a generate prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the response from the language model.", + "type": "code", + "location": "/graph_of_thoughts/parser/parser.py:32-59" + }, + "559": { + "file_id": 32, + "content": "This code defines three abstract methods in a class: `parse_improve_answer`, `parse_generate_answer`, and `parse`. These methods are responsible for parsing responses from a language model given a thought state and text inputs. The return types vary depending on the method, with `parse` returning either a dictionary or a list of dictionaries.", + "type": "comment" + }, + "560": { + "file_id": 32, + "content": " :rtype: List[Dict]\n \"\"\"\n pass\n @abstractmethod\n def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool:\n \"\"\"\n Parse the response from the language model for a validation prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: Whether the thought state is valid or not.\n :rtype: bool\n \"\"\"\n pass\n @abstractmethod\n def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]:\n \"\"\"\n Parse the response from the language model for a score prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The scores for the thought states.\n :rtype: List[float]\n \"\"\"", + "type": "code", + "location": "/graph_of_thoughts/parser/parser.py:60-89" + }, + "561": { + "file_id": 32, + "content": "This code defines an abstract base class with three methods for parsing different types of language model responses. The `parse_thought_answer` method takes a thought state and text response to determine if the thought is valid. The `parse_validation_answer` method takes a list of thought states and text responses to determine if each thought's state is valid. Finally, the `parse_score_answer` method takes a list of thought states and text responses and returns the scores for each thought state.", + "type": "comment" + }, + "562": { + "file_id": 32, + "content": " pass", + "type": "code", + "location": "/graph_of_thoughts/parser/parser.py:90-90" + }, + "563": { + "file_id": 32, + "content": "The code contains a 'pass' statement, which is used as a placeholder and does nothing. It could be used for future implementation or as a temporary measure during development.", + "type": "comment" + }, + "564": { + "file_id": 33, + "content": "/graph_of_thoughts/prompter/__init__.py", + "type": "filepath" + }, + "565": { + "file_id": 33, + "content": "This line imports the Prompter class from the \"prompter\" module within the current package, allowing its functionality to be accessed by other parts of the codebase.", + "type": "summary" + }, + "566": { + "file_id": 33, + "content": "from .prompter import Prompter", + "type": "code", + "location": "/graph_of_thoughts/prompter/__init__.py:1-1" + }, + "567": { + "file_id": 33, + "content": "This line imports the Prompter class from the \"prompter\" module within the current package, allowing its functionality to be accessed by other parts of the codebase.", + "type": "comment" + }, + "568": { + "file_id": 34, + "content": "/graph_of_thoughts/prompter/prompter.py", + "type": "filepath" + }, + "569": { + "file_id": 34, + "content": "The code presents an abstract base class, Prompter, that generates language model prompts through two methods: `aggregation_prompt()` and `improve_prompt()`. It also includes optional parameters and keyword arguments for subclass customization.", + "type": "summary" + }, + "570": { + "file_id": 34, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main authors: Robert Gerstenberger, Nils Blach\nfrom __future__ import annotations\nfrom abc import ABC, abstractmethod\nfrom typing import Dict, List\nclass Prompter(ABC):\n \"\"\"\n Abstract base class that defines the interface for all prompters.\n Prompters are used to generate the prompts for the language models.\n \"\"\"\n @abstractmethod\n def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str:\n \"\"\"\n Generate a aggregation prompt for the language model.\n :param state_dicts: The thought states that should be aggregated.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The aggregation prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def improve_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate an improve prompt for the language model.", + "type": "code", + "location": "/graph_of_thoughts/prompter/prompter.py:1-36" + }, + "571": { + "file_id": 34, + "content": "This code is an abstract base class called Prompter, which defines interfaces for all prompters. It helps generate prompts for language models in the form of aggregation and improve prompts. The class has two abstract methods: `aggregation_prompt()` and `improve_prompt()`, both with their own parameters and return types.", + "type": "comment" + }, + "572": { + "file_id": 34, + "content": " The thought state is unpacked to allow for additional keyword arguments\n and concrete implementations to specify required arguments explicitly.\n :param kwargs: Additional keyword arguments.\n :return: The improve prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def generate_prompt(self, num_branches: int, **kwargs) -> str:\n \"\"\"\n Generate a generate prompt for the language model.\n The thought state is unpacked to allow for additional keyword arguments\n and concrete implementations to specify required arguments explicitly.\n :param num_branches: The number of responses the prompt should ask the LM to generate.\n :type num_branches: int\n :param kwargs: Additional keyword arguments.\n :return: The generate prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def validation_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate a validation prompt for the language model.\n The thought state is unpacked to allow for additional keyword arguments", + "type": "code", + "location": "/graph_of_thoughts/prompter/prompter.py:37-65" + }, + "573": { + "file_id": 34, + "content": "This code defines a base class for generating prompts and validation prompts for language models. The `generate_prompt` and `validation_prompt` methods are abstract, indicating that concrete implementations should override them. The methods accept an optional parameter `num_branches`, and additional keyword arguments (`kwargs`) to allow for customization in subclasses. The thought state is unpacked to enable explicit specification of required arguments.", + "type": "comment" + }, + "574": { + "file_id": 34, + "content": " and concrete implementations to specify required arguments explicitly.\n :param kwargs: Additional keyword arguments.\n :return: The validation prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str:\n \"\"\"\n Generate a score prompt for the language model.\n :param state_dicts: The thought states that should be scored,\n if more than one, they should be scored together.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The score prompt.\n :rtype: str\n \"\"\"\n pass", + "type": "code", + "location": "/graph_of_thoughts/prompter/prompter.py:66-86" + }, + "575": { + "file_id": 34, + "content": "This code defines an abstract class with two methods: `generate_prompt()` and `score_prompt()`. The first method generates a validation prompt, and the second method generates a score prompt. Both methods accept additional keyword arguments. State dictionaries are used as input for the `score_prompt()` method to generate prompts for multiple thought states simultaneously.", + "type": "comment" + }, + "576": { + "file_id": 35, + "content": "/paper/README.md", + "type": "filepath" + }, + "577": { + "file_id": 35, + "content": "The code provides instructions to access and visualize arXiv preprint data, which is stored in the `final_results_gpt35.tar.bz2` archive. The `plots.py` file needs to be executed after unpacking the archive for visualization purposes.", + "type": "summary" + }, + "578": { + "file_id": 35, + "content": "## Plot Data\nThe data used to create the figure of the arXiv preprint article can be\nfound in the `final_results_gpt35.tar.bz2` archive. Unpack the archive\nand run the file `plots.py`.", + "type": "code", + "location": "/paper/README.md:1-5" + }, + "579": { + "file_id": 35, + "content": "The code provides instructions to access and visualize arXiv preprint data, which is stored in the `final_results_gpt35.tar.bz2` archive. The `plots.py` file needs to be executed after unpacking the archive for visualization purposes.", + "type": "comment" + }, + "580": { + "file_id": 36, + "content": "/paper/plots.py", + "type": "filepath" + }, + "581": { + "file_id": 36, + "content": "Both scripts utilize Python to process data from JSON files, generate boxplots, and customize visualizations with various settings such as titles, colors, y-axis limits, cost thresholds, and display options.", + "type": "summary" + }, + "582": { + "file_id": 36, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Robert Gerstenberger\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():", + "type": "code", + "location": "/paper/plots.py:1-29" + }, + "583": { + "file_id": 36, + "content": "This Python script reads data from a directory of JSON files, organizes it by folders, and stores the results in a dictionary. It uses the matplotlib library for plotting, but the code provided focuses on reading and organizing data, not plotting itself. The script is likely part of a larger program that utilizes this data for further analysis or visualization.", + "type": "comment" + }, + "584": { + "file_id": 36, + "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]", + "type": "code", + "location": "/paper/plots.py:30-58" + }, + "585": { + "file_id": 36, + "content": "This code snippet sorts the results and then calculates final scores for different methods based on metrics like score, solution status, prompt and completion tokens, and cost.", + "type": "comment" + }, + "586": { + "file_id": 36, + "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_final_scores_doc_merge(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 0\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in reversed(result[\"data\"]):\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n if \"operation\" in op and op[\"operation\"] == \"score\":\n try:\n score = max(op[\"scores\"])\n break\n except:\n continue\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]\n )", + "type": "code", + "location": "/paper/plots.py:59-87" + }, + "587": { + "file_id": 36, + "content": "This function calculates the final scores for each method in the results_complete dictionary, considering factors like cost, prompt and completion tokens, and operation scores. It sorts the scores in ascending order before returning them.", + "type": "comment" + }, + "588": { + "file_id": 36, + "content": " scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory, score_method):\n results_complete = get_complete_results(base_directory)\n scores = score_method(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n name,\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog\"],\n methods_labels=[\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT\"],\n model=\"GPT-3.5\",\n length=32,\n y_lower=0,\n y_upper=16,\n cost_upper=1.8,\n display_solved=True,\n annotation_offset=1,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n if name == \"set_intersection\":", + "type": "code", + "location": "/paper/plots.py:88-123" + }, + "589": { + "file_id": 36, + "content": "The code defines a function `get_plotting_data` that takes a base directory and a score method as input, returns plotting data for different methods by extracting scores, solved counts, and costs from the complete results. Another function, `plot_results`, is defined which takes various parameters such as name, results, methods order, etc., and plots the results based on the specified parameters. The code also includes checks to ensure that only valid methods are considered for plotting.", + "type": "comment" + }, + "590": { + "file_id": 36, + "content": " scores_ordered = [\n [min(score, length) for score in results[method][\"scores\"] if score != 1000]\n for method in methods_order\n ]\n elif name == \"sorting\":\n scores_ordered = [\n [\n min(score, length)\n for score in results[method][\"scores\"]\n if score != 100 and score != 300\n ]\n for method in methods_order\n ]\n elif name == \"keyword_counting\":\n scores_ordered = [\n [\n score\n for score in results[method][\"scores\"]\n if score != 100 and score != 300\n ]\n for method in methods_order\n ]\n elif name == \"document_merging\":\n scores_ordered = [\n [score for score in results[method][\"scores\"]] for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n if name == \"keyword_counting\" or name == \"document_merging\":", + "type": "code", + "location": "/paper/plots.py:124-153" + }, + "591": { + "file_id": 36, + "content": "The code is filtering scores and costs based on specific conditions for different tasks (e.g., sorting, keyword counting, document merging) and creating a figure with axes. For each task, it generates a list of filtered scores and total costs.", + "type": "comment" + }, + "592": { + "file_id": 36, + "content": " fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5))\n else:\n fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n plt.yticks(fontsize=fig_fontsize)\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n if name == \"keyword_counting\":\n ax.set_xticklabels(methods_labels, fontsize=10)\n else:\n ax.set_xticklabels(methods_labels, fontsize=fig_fontsize)\n if name == \"document_merging\":\n ax.set_ylim(y_lower, 12 if display_solved else 9.75)\n else:\n ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1)\n if name == \"sorting\" or name == \"set_intersection\":\n ax1_yticks = range(\n y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)\n )\n ax.set_yticks(ax1_yticks)\n if display_left_ylabel:", + "type": "code", + "location": "/paper/plots.py:154-184" + }, + "593": { + "file_id": 36, + "content": "Creates boxplots for sorted scores based on methods order, sets x-tick labels and y-limits according to the current name (method), adjusts y-ticks depending on length and displays left ylabel if needed.", + "type": "comment" + }, + "594": { + "file_id": 36, + "content": " if name == \"keyword_counting\":\n ax.set_ylabel(\n f\"Number of errors; the lower the better\", fontsize=fig_fontsize\n )\n elif name == \"document_merging\":\n ax.set_ylabel(\n f\"Score (out of 10); the higher the better\", fontsize=fig_fontsize\n )\n else:\n ax.set_ylabel(\n f\"#incorrect elements; the lower the better\", fontsize=fig_fontsize\n )\n if name == \"sorting\" or name == \"set_intersection\":\n ax.set_title(f\"{length} elements\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:", + "type": "code", + "location": "/paper/plots.py:185-212" + }, + "595": { + "file_id": 36, + "content": "If \"keyword_counting\", set ylabel as \"Number of errors; the lower the better\". If \"document_merging\", set ylabel as \"Score (out of 10); the higher the better\". Otherwise, set ylabel as \"#incorrect elements; the lower the better\". If \"sorting\" or \"set_intersection\", set title as length of elements. Add a blue bar chart for total cost using ax2. Set tick colors and ylim on ax2. Customize ytick positions for ax2 using provided interval.", + "type": "comment" + }, + "596": { + "file_id": 36, + "content": " ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]\n ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n if name == \"keyword_counting\" or name == \"document_merging\":\n fig.savefig(f\"{name}_{model}.pdf\", bbox_inches=\"tight\")\n else:\n fig.savefig(f\"{name}_{model}_{length}.pdf\", bbox_inches=\"tight\")\nplot_results(\n \"set_intersection\",\n get_plotting_data(\"set_intersection_gpt35_032\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog2\"],", + "type": "code", + "location": "/paper/plots.py:213-246" + }, + "597": { + "file_id": 36, + "content": "Setting the y-label for a plot, displaying the number of solved methods, and saving the figure with appropriate filename based on the method name and model.", + "type": "comment" + }, + "598": { + "file_id": 36, + "content": " length=32,\n y_upper=19,\n cost_upper=2,\n display_solved=True,\n annotation_offset=0.5,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"set_intersection\",\n get_plotting_data(\"set_intersection_gpt35_064\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog2\"],\n length=64,\n y_upper=32,\n cost_upper=5.4,\n display_solved=True,\n annotation_offset=0.2,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"set_intersection\",\n get_plotting_data(\"set_intersection_gpt35_128\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog2\"],\n length=128,\n y_upper=94,\n cost_upper=12,\n display_solved=True,\n annotation_offset=-3,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_032\", get_final_scores),\n length=32,\n display_solved=False,\n annotation_offset=0.5,\n display_left_ylabel=True,\n display_right_ylabel=True,", + "type": "code", + "location": "/paper/plots.py:247-289" + }, + "599": { + "file_id": 36, + "content": "The code snippet is defining functions and parameters for plotting data from various models. It uses the 'plot_results' function with different arguments to visualize the results of operations such as set intersection and sorting. The plots have various settings like length, upper limit, cost, display options, etc. to customize the visual representation of the data.", + "type": "comment" + } +} \ No newline at end of file diff --git a/docs/data/6.json b/docs/data/6.json new file mode 100644 index 0000000..4bff850 --- /dev/null +++ b/docs/data/6.json @@ -0,0 +1,24 @@ +{ + "600": { + "file_id": 36, + "content": ")\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_064\", get_final_scores),\n length=64,\n y_upper=64,\n cost_upper=5.1,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_128\", get_final_scores),\n length=128,\n y_upper=128,\n cost_upper=17,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"keyword_counting\",\n get_plotting_data(\"keyword_counting_gpt35\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"gsp4\", \"gsp8\", \"gspx\"],\n methods_labels=[\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT4\", \"GoT8\", \"GoTx\"],\n y_upper=35,\n cost_upper=9,\n display_solved=True,\n annotation_offset=-0.3,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"document_merging\",\n get_plotting_data(\"document_merging_gpt35_16k\", get_final_scores_doc_merge),\n methods_order=[\"io\", \"cot\", \"tot\", \"gsp\", \"gsp2\"],", + "type": "code", + "location": "/paper/plots.py:290-330" + }, + "601": { + "file_id": 36, + "content": "Code snippet contains multiple function calls to plot results using different sets of data and parameters. It plots data for tasks \"sorting\" and \"keyword_counting\", and \"document_merging\". Each call specifies the task, data, methods order, labels, limits, display options, and other settings.", + "type": "comment" + }, + "602": { + "file_id": 36, + "content": " methods_labels=[\"IO\", \"CoT\", \"ToT\", \"GoT\", \"GoT2\"],\n y_upper=10,\n cost_upper=15,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)", + "type": "code", + "location": "/paper/plots.py:331-337" + }, + "603": { + "file_id": 36, + "content": "Parameters for plotting methods labels, upper limit of y-axis, cost threshold, and display options.", + "type": "comment" + } +} \ No newline at end of file diff --git a/docs/doc/0cea9af3-6d7e-464b-9c98-67d890a84704.json b/docs/doc/0cea9af3-6d7e-464b-9c98-67d890a84704.json new file mode 100644 index 0000000..0e9301b --- /dev/null +++ b/docs/doc/0cea9af3-6d7e-464b-9c98-67d890a84704.json @@ -0,0 +1,10 @@ +{ + "summary": "This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example, with prompt files available to test prompts manually in a console. Individual example directories provide more information on specific examples.", + "details": [ + { + "comment": "This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example, with prompt files available to test prompts manually in a console. Individual example directories provide more information on specific examples.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/README.md\":0-6", + "content": "# Examples\nThis directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example.\nWe further include prompt files for each example that can be used to test prompts manually in a console.\nPlease refer to the individual example directories for more information on the specific example." + } + ] +} \ No newline at end of file diff --git a/docs/doc/210f2677-be0b-44ed-911c-75384dfccc5e.json b/docs/doc/210f2677-be0b-44ed-911c-75384dfccc5e.json new file mode 100644 index 0000000..2d1820b --- /dev/null +++ b/docs/doc/210f2677-be0b-44ed-911c-75384dfccc5e.json @@ -0,0 +1,360 @@ +{ + "summary": "Both comments discuss company-supplier agreements, covering aspects such as NDAs, IT system maintenance, late delivery penalties, termination provisions, confidentiality clauses, and governing laws.", + "details": [ + { + "comment": "This code appears to contain two example Non-Disclosure Agreements (NDA) in JSON format, with fields such as parties involved, purpose of information sharing, definition of Confidential Information, obligations and penalties for breaching the agreement, applicable laws, and signatures.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":0-2", + "content": "[\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\n1. Agreement between [Your Company Name] and [Recipient Name] on [Date].\\n2. Information sharing for the purpose of [specific project or purpose].\\n3. \\\"Confidential Information\\\" includes all potentially commercially valuable information, specifically software development tactics, processes, and in-house research results.\\n4. Receiving party is obligated to protect the Confidential Information, use it solely for the disclosed purpose, and not disclose it without consent.\\n5. Breach penalties include injunctive relief, other remedies, and a $200,000 fee per breach.\\n6. The Agreement applies to the Parties and their successors and assigns. It contains all related agreements and lack of enforcement doesn't imply waiver.\\n7. The Agreement is under the laws of [State].\\n8. Signed by [Your Company Name] and [Recipient Name] at the above date.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective from [Effective Date], this NDA involves [Your Company Name]" + }, + { + "comment": "The code is a confidentiality agreement between a Disclosing Party and a Recipient (Receiving Party) involving the exchange of confidential information related to a specific topic for a stated purpose. It outlines obligations for the Receiving Party, exclusions, and non-competition clauses.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":2-2", + "content": " (\\\"Disclosing Party\\\"), and [Recipient Name] (\\\"Receiving Party\\\").\\n\\n1. Purpose: The Disclosing Party will disclose confidential information related to [Topic of Research] to the Receiving Party for [Purpose].\\n\\n2. Confidential Information: Defined as all non-public reports, data, designs, and other materials provided by the Disclosing Party to the Receiving Party.\\n\\n3. Receiving Party's Obligations:\\n a. Use, reproduce, or distribute the confidential information only for the agreed purpose.\\n b. Restrict access to the information to necessary parties, ensuring they abide by strict confidentiality.\\n c. Return or destroy all confidential information upon request or at the end of the agreement.\\n\\n4. Exclusions: Information will not be classified as confidential if it is already known to the Receiving Party, publicly known, or independently developed by the Receiving Party.\\n\\n5. Non-Competition: The Receiving Party will not engage in any competing business against the Disclo" + }, + { + "comment": "The code defines a confidentiality and non-disclosure agreement between AquaBlue Innovations and PineTree Solutions, with provisions for protecting classified information during potential technological alliance discussions.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":2-3", + "content": "sing Party during the agreement and one year after its termination.\\n\\n6. Term and Termination: The agreement is valid for [e.g., \\\"two years\\\"], unless terminated earlier with [e.g., \\\"30 days\\\"] written notice. The Receiving Party's non-disclosure and non-competition obligations persist post-termination.\\n\\n7. General Provisions:\\n a. Governing Law: [Your State]'s laws apply.\\n b. Amendments: Only valid if written and signed by both parties.\\n c. Entire Agreement: This contract overrules previous related agreements.\\n\\nSigned as of the Effective Date by [Your Company Name] - Disclosing Party [Recipient Name] - Receiving Party.\",\n \"CONFIDENTIALITY & NON-DISCLOSURE AGREEMENT\\n\\n Entities Involved:\\n Effective [Date], between [AquaBlue Innovations], established in [State], and [PineTree Solutions], a registered entity.\\n\\n Objective:\\n To safeguard classified data during talks of a potential technological alliance.\\n\\n Specification of Protected Information:\\n " + }, + { + "comment": "Code snippet describes a confidentiality agreement between AquaBlue Innovations and PineTree Solutions, detailing non-disclosure obligations, violation consequences, and legal reference.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":3-4", + "content": "Particularly:\\n\\na. System designs and architectural schematics.\\nb. Proprietary computational algorithms.\\n\\n Receiver's Obligations:\\n a. Maintain strict non-disclosure using best practices.\\n b. Employ solely for the aforementioned aim.\\n c. No unveiling without explicit authorization.\\n\\n Violation Ramifications:\\n A charge of $280,000 for every infringement, plus possible legal proceedings.\\n\\n General Terms:\\n Binding for both parties and any successors. This encapsulates the entire accord.\\n\\n Legal Reference:\\n Governed as per [State]'s legal framework.\\n\\n Attestation:\\n Duly signed on [Date].\\n\\n[AquaBlue Innovations] [PineTree Solutions]\",\n \"SECRECY & DISCLOSURE AGREEMENT\\n\\n Contracting Parties:\\n Dated [Date], drawn between [AquaBlue Innovations], a [State]-based corporation, and [PineTree Solutions], a licensed organization.\\n\\n Aim:\\n To protect exclusive insights amidst dialogues for a technological partnership.\\n\\n C" + }, + { + "comment": "The code provides the details of a non-disclosure and non-competition agreement between AquaBlue Innovations and PineTree Solutions. It outlines the categorization of sensitive data, commitments of the recipient, repercussions for non-compliance, overall provisions, jurisdictional standpoint, and ratification details.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":4-5", + "content": "ategorization of Sensitive Data:\\n Includes:\\n\\na. Internal software blueprints.\\nb. Intellectual property awaiting patents.\\n\\n Commitments of Recipient:\\n a. Uphold confidentiality, ensuring data integrity.\\n b. Utilize strictly for collaborative ventures.\\n c. No exposure without prior consensus.\\n\\n Repercussions for Non-Compliance:\\n $295,000 fine for each transgression, and the option for legal recourse.\\n\\n Overall Provisions:\\n Legally enforceable for signatories and successors. Complete and sole agreement.\\n\\n Juridical Standpoint:\\n Under the auspices of [State] laws.\\n\\n Ratification:\\n Confirmed and endorsed on [Date].\\n\\n[AquaBlue Innovations] [PineTree Solutions]\",\n \"This Non-Disclosure and Non-Competition Agreement is made between [Your Company Name] and [Contractor Name/Company].\\n\\n1. Confidentiality: The Contractor acknowledges access to the Company's confidential information during their relationship.\\n\\n2. Non-Disclosure: The" + }, + { + "comment": "This code contains the terms of a contract between a company and a contractor, outlining confidentiality obligations, non-competition agreement, return of information, legal remedies, governing law, and acknowledgment by both parties.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":5-5", + "content": " Contractor agrees not to disclose, use, reproduce, or distribute this confidential information unless necessary for their obligations.\\n\\n3. Non-Competition: The Contractor agrees not to compete with the company or assist others in doing so for one year after the termination of their relationship. They also agree not to solicit the company's clients or customers for the benefit of a competitor for one year.\\n\\n4. Return of Confidential Information: At the end of the relationship or upon the company's request, the Contractor will return all confidential information and copies thereof.\\n\\n5. Remedies: For any breach, the Company may seek specific performance and injunctive relief, in addition to other remedies.\\n\\n6. Governing Law: The Agreement is governed by the laws of [Your State].\\n\\n7. Entire Agreement: This document replaces all previous agreements and understandings on the subject.\\n\\nBoth parties acknowledge understanding and voluntarily accepting the Agreement.\\n\\nSignatures required from [Your Company Name] and [Contractor Name/Company].\"," + }, + { + "comment": "This is a loyalty agreement between a company and an employee, outlining the specialized training provided to the employee at no cost in exchange for their commitment to work for the company for a specified period. The employee may need to pay compensation if they leave within two years after completing training, subject to certain exceptions. The agreement is governed by the laws of [State/Country] and is binding to all involved parties and their successors.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":6-7", + "content": " \"This Loyalty Agreement is between [Company Name] and [Employee Full Name], where the company agrees to provide specialized training at no cost to the employee, who in turn commits to work for the company for a specified period. If the employee leaves the company within two years after completing training, they must pay $50,000 as compensation for training costs, payable within 30 days of termination. Exceptions to this repayment include termination without cause, resignation due to breach of agreement by the company, or other agreed upon circumstances. Any changes to this agreement must be in writing and signed by both parties, and the agreement will be governed by the laws of [State/Country]. This agreement is binding to all involved parties and their successors. Both the company and the employee sign to attest to these terms.\",\n \"EMPLOYEE LOYALTY AGREEMENT\\n\\nThis agreement is entered into by [Company Name] and [Employee Name] to protect the company's business interests, goodw" + }, + { + "comment": "This code outlines the terms and conditions of an employment agreement, including non-disclosure, non-competition, non-solicitation clauses, property return policy, legal remedies, severability, governing law, and amendment procedures.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":7-7", + "content": "ill, and confidential information, and affirm employee's loyalty. \\n\\n1. Non-disclosure: Employee agrees to not disclose or use company's confidential information, during or post-employment. \\n\\n2. Non-competition: Employee will not work for or establish a competitor within [e.g., \\\"50\\\"] miles from the company for [e.g., \\\"12\\\"] months post-employment.\\n\\n3. Non-solicitation: Employee will not solicit clients or employees of the company for [e.g., \\\"12\\\"] months post-employment.\\n\\n4. Return of Property: Employee will return all company property upon termination.\\n\\n5. Remedies: Company can seek injunction for a breach or potential breach of this agreement.\\n\\n6. Severability: If any provision of this agreement is held invalid, the remainder of the Agreement will continue.\\n\\n7. Governing Law: This agreement will be governed by the laws of [State, e.g., \\\"California\\\"].\\n\\n8. Agreement: This is the entire agreement and supersedes prior negotiations.\\n\\n9. Amendments: Any changes must " + }, + { + "comment": "Code snippet represents a loyalty agreement template between a company and a contractor. It includes confidentiality, non-compete and non-solicitation clauses, return of property upon termination, legal remedies in case of breach, and adherence to specified state laws. The agreement can only be amended with both parties' written signature.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":7-9", + "content": "be in writing and signed by both parties.\\n\\nSignatures of both parties indicate agreement to these terms.\\n\\n[Company Name] - Authorized Signatory [Employee Name]\",\n \"This Loyalty Agreement is between [Company Name] and [Contractor Company Name]. The Agreement ensures the Contractor's loyalty and confidentiality towards the Company during and post engagement. Contractor agrees not to use or disclose the Company's confidential information, or engage in competing business or solicitation for a period of [e.g., \\\"12\\\"] months post termination. Contractor must return all Company property upon termination. In case of breach, Company can seek legal remedies including injunction. The Agreement remains valid even if a provision is held invalid. The Agreement follows [State, e.g., \\\"California\\\"] laws and replaces all previous understandings. It can be amended only in writing with both parties' signature.\",\n \"B2B CONTRACTOR LOYALTY AGREEMENT\\n\\nThis Agreement is made on _____ day of ______" + }, + { + "comment": "This code is a contractual agreement between Company Name and Contractor Company Name, outlining confidentiality, non-competition, non-solicitation, property return, and penalty for breach clauses.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":9-9", + "content": ", 20, between [Company Name], located at [Company Address] (\\\"Company\\\"), and [Contractor Company Name], located at [Contractor Address] (\\\"Contractor\\\").\\n\\n1. CONFIDENTIALITY\\n\\nContractor agrees not to disclose, use, or allow the use of the Company's confidential information during or after the relationship, except as required for their services to the Company.\\n\\n2. NON-COMPETITION\\n\\nFor 12 months post-relationship, the Contractor won't provide similar services to any entity competing with the Company within a 50-mile radius of any Company location.\\n\\n3. NON-SOLICITATION\\n\\nFor 12 months post-relationship, the Contractor won't solicit or induce any entity or individual connected to the Company to cease or reduce their relationship with the Company.\\n\\n4. RETURN OF PROPERTY\\n\\nUpon relationship termination or on Company's request, the Contractor will immediately return all Company property and data.\\n\\n5. PENALTY FOR BREACH\\n\\nIn the event of a breach of this Agreement, the Contra" + }, + { + "comment": "This code represents a B2B contractor loyalty agreement between Company Name and Contractor Company Name, effective on a specific date. The contract outlines terms including the definition of confidential information, non-disclosure obligations, payment terms, governing law, and agreement superseding prior discussions or agreements. Both parties must sign below to agree to these terms.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":9-10", + "content": "ctor shall pay the Company a penalty of $50,000.\\n\\n6. GOVERNING LAW\\n\\nThis Agreement is governed by [State, e.g., \\\"California\\\"] laws.\\n\\n7. ENTIRE AGREEMENT\\n\\nThis Agreement supersedes prior discussions and agreements between the parties.\\n\\nBy signing below, the parties agree to these terms.\\n\\n[Company Name] - Signatory [Contractor Company Name] - Signatory\\nDate: _______________________ Date: _______________________\",\n \"B2B CONTRACTOR LOYALTY AGREEMENT\\n\\nThis Agreement is made on _____ day of ______, 20, between [Company Name], located at [Company Address] (\\\"Company\\\"), and [Contractor Company Name], located at [Contractor Address] (\\\"Contractor\\\").\\n\\n1. DEFINITION OF CONFIDENTIAL INFORMATION\\n\\nFor the purposes of this Agreement, \\\"confidential information\\\" shall refer to research results, software created, devices produced by the Company, and any other information deemed proprietary or not generally known to the public.\\n\\n2. CONFIDENTIALITY\\n\\nContractor agrees not to " + }, + { + "comment": "Contract specifying confidentiality, non-compete, non-solicitation, property return, breach penalty, and governing law for a relationship between a company and contractor.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":10-10", + "content": "disclose, use, or allow the use of the Company's confidential information, as defined herein, during or after the relationship, except as required for their services to the Company.\\n\\n3. NON-COMPETITION\\n\\nFor 12 months post-relationship, the Contractor won't provide similar services to any entity competing with the Company within a 50-mile radius of any Company location.\\n\\n4. NON-SOLICITATION\\n\\nFor 12 months post-relationship, the Contractor won't solicit or induce any entity or individual connected to the Company to cease or reduce their relationship with the Company.\\n\\n5. RETURN OF PROPERTY\\n\\nUpon relationship termination or on Company's request, the Contractor will immediately return all Company property, including all items containing or pertaining to confidential information.\\n\\n6. PENALTY FOR BREACH\\n\\nIn the event of a breach of this Agreement, the Contractor shall pay the Company a penalty of $50,000.\\n\\n7. GOVERNING LAW\\n\\nThis Agreement is governed by [State, e.g., \\\"Ca" + }, + { + "comment": "The code contains the text of a Non-Disclosure Agreement (NDA) between Company and Supplier, including terms such as purpose of sharing confidential information, non-disclosure obligations for Supplier, exceptions, penalties, and agreement validity.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":10-11", + "content": "lifornia\\\"] laws.\\n\\n8. ENTIRE AGREEMENT\\n\\nThis Agreement supersedes prior discussions and agreements between the parties.\\n\\nBy signing below, the parties agree to these terms.\\n\\n[Company Name] - Signatory [Contractor Company Name] - Signatory\",\n \"The Non-Disclosure Agreement (NDA) dated [Date] is between [Company], based in [Country/State], and [Supplier], also incorporated in [Country/State]. The Company intends to disclose confidential information to the Supplier for [purpose]. This confidential data can include business strategies, financial data, customer information, and product designs. The Supplier agrees to refrain from sharing this information, barring any legal requirements. Exceptions to this confidentiality are in cases where the information becomes public or was already known by the Supplier before the Company's disclosure. If the Supplier breaches this agreement, they face a financial penalty of [$]. The NDA is valid for [X years], unless the Company provides writte" + }, + { + "comment": "This code contains a non-disclosure and non-compete agreement between a company and a supplier, with details including the effective date, parties involved, confidentiality obligations, non-competition restrictions, termination procedures, and governing laws.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":11-12", + "content": "n termination. Upon the Company's request, the Supplier must return or destroy all copies of Confidential Information. This agreement supersedes previous agreements and can only be altered by a written document approved by both parties. The NDA is governed by the laws of [specific country/state].\",\n \"NON-DISCLOSURE AND NON-COMPETE AGREEMENT\\n\\nEffective Date: [Date]\\n\\nPARTIES:\\n\\n Company: [Full Legal Name of Company], located at [Company Address].\\n Supplier: [Full Legal Name of Supplier], located at [Supplier Address].\\n\\n1. CONFIDENTIALITY:\\n\\nSupplier shall not disclose Company's confidential information, which includes business strategies, financial data, and customer details, to any third party. This confidentiality obligation lasts for [X years, e.g., \\\"5 years\\\"] from the date of disclosure.\\n\\n2. NON-COMPETITION:\\n\\nFor [X years, e.g., \\\"3 years\\\"] following the termination of their business relationship, Supplier agrees not to engage in or start any business that dir" + }, + { + "comment": "This code represents a contract or agreement between a company and a supplier, outlining the terms of their business relationship. It includes provisions such as competition limits, penalty for breach, return of information, governing law, and acknowledgement by both parties.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":12-13", + "content": "ectly competes with Company within a [X mile/km radius, e.g., \\\"50-mile radius\\\"] of Company's primary business location.\\n\\n3. PENALTY FOR BREACH:\\n\\nShould Supplier breach this Agreement, they shall pay Company a penalty of [specific amount, e.g., \\\"$50,000\\\"], in addition to any other legal remedies available to Company.\\n\\n4. RETURN OF INFORMATION:\\n\\nUpon request, Supplier shall return or destroy all of Company's confidential information and confirm its deletion in writing.\\n\\n5. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of New York\\\"].\\n\\nAGREEMENT ACKNOWLEDGEMENT:\\n\\n__________ [Company] __________ [Supplier]\",\n \"DATA ANALYSIS EMPLOYEE AGREEMENT\\n\\nThis Agreement (\\\"Agreement\\\") is made and entered into as of [Date], by and between [Company Name], a [legal structure, e.g., \\\"corporation\\\"] incorporated under the laws of [State/Country], with its principal place of business at [Company Address], herein referred to as " + }, + { + "comment": "This code defines an employment agreement for a Data Analyst, specifying the employee's duties, term, compensation, and confidentiality obligations.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":13-13", + "content": "the \\\"Company,\\\" and [Employee Name], an individual residing at [Employee Address], herein referred to as the \\\"Employee.\\\"\\n\\n Position and Duties:\\n a. The Company hereby employs Employee in the capacity of Data Analyst.\\n b. The Employee's primary duties will be to [specific data analysis tasks, e.g., \\\"analyze sales data, forecast trends, and produce reports for managerial review\\\"].\\n\\n Term: The Employee's engagement will commence on [Start Date] and will terminate on [End Date].\\n\\n Compensation: For the services rendered by the Employee under this Agreement, the Company will pay Employee a total sum of [specific amount, e.g., \\\"$5,000\\\"] payable on [payment schedule, e.g., \\\"a monthly basis\\\"].\\n\\n Confidentiality: The Employee agrees not to disclose or use, either during or after the term of employment, any proprietary or confidential information or data of the Company without the Company's prior written consent, except as necessary in the course of performin" + }, + { + "comment": "Code is an agreement between a company and an employee, defining intellectual property ownership, termination terms, governing law, amendments process, and more. It outlines the legal relationship and responsibilities for both parties.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":13-14", + "content": "g their duties for the Company.\\n\\n Intellectual Property: Any works, developments, or inventions created by the Employee in the course of this employment related to the Company's business will remain the sole property of the Company.\\n\\n Termination: Either party may terminate this Agreement with [e.g., \\\"30\\\"] days written notice. Upon termination, Employee agrees to return all company property and data.\\n\\n Governing Law: This Agreement shall be governed by and construed under the laws of [State/Country].\\n\\n Amendments: This Agreement may only be amended in writing and signed by both parties.\\n\\n Entire Agreement: This Agreement contains the entire agreement between the parties and supersedes all prior negotiations, understandings, and agreements between the parties.\\n\\nThe parties hereto have executed this Agreement as of the date first above written.\\n\\n[Company Name or Authorized [Employee Name]\\nRepresentative Name, Title]\",\n \"DATA ANALYSIS SERVICE AGREEMENT\\n\\" + }, + { + "comment": "This code snippet is the beginning of a legal agreement between a company and a contractor, outlining the scope of work, term, and compensation for data analysis services.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":14-14", + "content": "nThis Agreement (\\\"Agreement\\\") is made and entered into as of [Date], by and between [Company Name], a [legal structure, e.g., \\\"corporation\\\"] incorporated under the laws of [State/Country], with its principal place of business at [Company Address], herein referred to as the \\\"Company,\\\" and [Contractor Business Name], a [legal structure, e.g., \\\"limited liability company\\\"] organized under the laws of [State/Country], with its principal place of business at [Contractor Business Address], herein referred to as the \\\"Contractor.\\\"\\n\\n Scope of Work:\\n a. The Contractor agrees to provide data analysis services to the Company.\\n b. The specific services will include [specific data analysis tasks, e.g., \\\"analyzing sales data, forecasting trends, and producing reports for managerial review\\\"].\\n\\n Term: The Contractor's engagement will commence on [Start Date] and will terminate on [End Date].\\n\\n Compensation: For the services rendered by the Contractor under this Agreeme" + }, + { + "comment": "This code is a contract between the Company and Contractor, outlining payment terms, confidentiality agreements, intellectual property ownership, termination conditions, governing law, and amendments to the agreement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":14-14", + "content": "nt, the Company will pay the Contractor a total sum of [specific amount, e.g., \\\"$5,000\\\"] payable on [payment schedule, e.g., \\\"a monthly basis\\\"].\\n\\n Confidentiality: The Contractor agrees not to disclose or use, either during or after the term of this Agreement, any proprietary or confidential information or data of the Company without the Company's prior written consent, except as necessary in the course of providing the services.\\n\\n Intellectual Property: Any works, developments, or inventions created by the Contractor in the course of providing the services related to the Company's business will remain the sole property of the Company.\\n\\n Termination: Either party may terminate this Agreement with [e.g., \\\"30\\\"] days written notice. Upon termination, Contractor agrees to return all company data and any other proprietary materials.\\n\\n Governing Law: This Agreement shall be governed by and construed under the laws of [State/Country].\\n\\n Amendments: This Agreemen" + }, + { + "comment": "This code appears to be a non-disclosure agreement (NDA) between a researcher and a university. The NDA outlines the terms of confidentiality, purpose, obligations, exceptions, and duration for sharing research-related materials. It also mentions the possibility of amending the agreement in writing and signed by both parties, and that this agreement contains the entire understanding between them, superseding any prior agreements.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":14-15", + "content": "t may only be amended in writing and signed by both parties.\\n\\n Entire Agreement: This Agreement contains the entire agreement between the parties and supersedes all prior negotiations, understandings, and agreements between the parties.\\n\\nThe parties hereto have executed this Agreement as of the date first above written.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nDate: [Insert Date]\\n\\nParties: [University Name], [University Address] (\\\"Disclosing Party\\\") and [Researcher's Full Name], [Researcher's Address] (\\\"Receiving Party\\\").\\n\\n1. Purpose: For the research of [Briefly Describe the Research or Project].\\n\\n2. Confidential Information: Includes data, studies, reports, patents, and other valuable business-related material.\\n\\n3. Obligations:\\na. Confidential Information must remain secret.\\nb. It's for the specified purpose only.\\nc. No third-party disclosure without consent.\\n\\n4. Exceptions: Public knowledge, third-party shared info, or independently developed.\\n\\n5. Duration: " + }, + { + "comment": "This code represents a legal agreement between a business and a university, outlining the terms of their cooperation for research, innovation, and facilitation. The agreement includes provisions regarding confidentiality, return/destruction of information, transfer restrictions, governing law, amendment process, and signing parties' agreement to the terms.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":15-16", + "content": "Confidentiality lasts [X years, e.g., 2 years] from disclosure.\\n\\n6. Return: All Confidential Information must be returned or destroyed upon request.\\n\\n7. No Transfer: This doesn't grant property rights or licenses.\\n\\n8. Law: Governed by [State/Country] laws.\\n\\n9. Amendments: Only in writing and signed.\\n\\nAgreement: By signing, parties agree to the terms.\",\n \"UNIVERSITY-BUSINESS COOPERATION AGREEMENT\\n\\nThis Cooperation Agreement (the \\\"Agreement\\\") is made and entered into on [Insert Date], by and between [Business Name], a [business type, e.g., \\\"corporation\\\"] located at [Business Address], hereinafter referred to as the \\\"Business', and [University Name], a higher education institution located at [University Address], hereinafter referred to as the \\\"University\\\".\\n\\n1. Objective:\\n\\nThe purpose of this Agreement is to define the terms under which the Business and the University will cooperate to [Objective e.g., \\\"jointly conduct research, promote innovation, and facilitate" + }, + { + "comment": "This code is outlining the terms and conditions for a research collaboration between two organizations, including areas of cooperation, intellectual property sharing, funding contributions, confidentiality, and agreement duration.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":16-16", + "content": " the exchange of knowledge in the field of _________\\\"].\\n\\n2. Scope of Cooperation:\\n\\na. Research Collaborations: Joint research initiatives, sharing of resources, and publications.\\n\\nb. Internships and Placements: Facilitation of student internships, projects, and job placements.\\n\\nc. Seminars and Workshops: Organizing joint seminars, conferences, and workshops.\\n\\nd. Facilities and Resource Sharing: Providing access to labs, equipment, libraries, etc.\\n\\n3. Intellectual Property:\\n\\nAll intellectual property developed jointly will be shared, and a separate agreement will detail the rights, ownership, and any revenue distribution.\\n\\n4. Funding and Resources:\\n\\nBoth parties agree to jointly contribute [Specify Amount or Percentage], and additional fund sourcing details will be determined on a project-by-project basis.\\n\\n5. Confidentiality:\\n\\nBoth parties agree to maintain the confidentiality of shared proprietary information.\\n\\n6. Duration and Termination:\\n\\nThis Agreement wi" + }, + { + "comment": "This is a sample Supply Agreement for a university laboratory between the Supplier and the University. The agreement outlines terms, dispute resolution, amendments, liability, and governing law.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":16-17", + "content": "ll remain in effect for [e.g., \\\"three years\\\"] from the date of signing, unless terminated earlier by either party with [e.g., \\\"30 days\\\"] written notice.\\n\\n7. Dispute Resolution:\\n\\nAny disputes arising from this Agreement will first attempt resolution through mediation. If unresolved, disputes will be subject to the jurisdiction of [State/Country].\\n\\n8. Amendments:\\n\\nChanges to this Agreement must be in writing and signed by both parties.\\n\\n9. Liability:\\n\\nEach party assumes responsibility for its actions and is not liable for the actions of the other party.\\n\\n10. Governing Law:\\n\\nThis Agreement is governed by the laws of [State/Country].\\n\\nIN WITNESS WHEREOF, both parties have executed this Agreement as of the date first mentioned above.\",\n \"SUPPLY AGREEMENT FOR UNIVERSITY LABORATORY\\n\\nThis Supply Agreement (the \\\"Agreement\\\"), made as of [Insert Date], is entered into by and between [Supplier Name], a [business entity type, e.g., \\\"corporation\\\"] having its principal o" + }, + { + "comment": "The code outlines an agreement between a Supplier and a University for the provision of specific products/materials/equipment to be used in the University's laboratory, with details outlined in Annex A. It covers terms of supply, payment terms, and warranty information.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":17-17", + "content": "ffice at [Supplier Address], hereinafter referred to as the \\\"Supplier', and [University Name], a higher education institution located at [University Address], acting through its [specific department or laboratory, e.g., \\\"Department of Chemistry\\\"], hereinafter referred to as the \\\"University\\\".\\n\\n1. Purpose:\\n\\nThe Supplier agrees to provide specific products/materials/equipment, as detailed in Annex A, to the University for use in its laboratory.\\n\\n2. Terms of Supply:\\n\\na. Description of Goods: The goods to be supplied are detailed in Annex A attached herewith.\\n\\nb. Delivery: Goods will be delivered to [University Address or specific lab address] within [specific timeframe].\\n\\nc. Pricing: The price for the goods is set out in Annex A and includes all packaging, transportation, and delivery costs unless otherwise specified.\\n\\n3. Payment Terms:\\n\\nPayments will be made by the University within [e.g., \\\"30 days\\\"] of receiving the invoice from the Supplier.\\n\\n4. Warranty:\\n\\nThe" + }, + { + "comment": "This code provides a template for an agreement between a supplier and the University, outlining warranties on goods, confidentiality terms, termination conditions, governing law, and amendment procedures.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":17-17", + "content": " Supplier warrants that all goods supplied under this Agreement will be free from defects for a period of [specific duration, e.g., \\\"12 months\\\"] from the date of delivery.\\n\\n5. No Disclosure Clause:\\n\\na. The University agrees not to disclose, reproduce, or distribute any proprietary information, trade secrets, or other confidential details related to the products/materials/equipment provided by the Supplier without the Supplier's prior written consent.\\n\\nb. This clause remains effective for a period of [e.g., \\\"5 years\\\"] from the date of the last delivery of the goods under this Agreement.\\n\\n6. Termination:\\n\\nEither party may terminate this Agreement with [e.g., \\\"30 days\\\"] written notice if the other party breaches any term of this Agreement and fails to remedy such breach within the notice period.\\n\\n7. Governing Law:\\n\\nThis Agreement shall be governed by and interpreted in accordance with the laws of [State/Country].\\n\\n8. Amendments:\\n\\nModifications to this Agreement mus" + }, + { + "comment": "This code represents a template for a Laboratory Supply Agreement, including sections on purpose, delivery, payment, warranty, non-disclosure, and termination. It provides a framework for suppliers and universities to establish contractual agreements regarding the supply of goods to laboratories.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":17-18", + "content": "t be in writing and signed by both parties.\\n\\nIN WITNESS WHEREOF, the parties hereto have executed this Supply Agreement as of the date first above written.\",\n \"LABORATORY SUPPLY AGREEMENT\\n\\nDate: [Insert Date]\\n\\nParties:\\n\\n Supplier: [Supplier Name], [Supplier Address]\\n University: [University Name], [University Address]\\n\\n1. Purpose: Supplier will provide goods as listed in Annex A to the University's laboratory.\\n\\n2. Delivery: Within [specific timeframe, e.g., \\\"30 days\\\"] to [specific lab address].\\n\\n3. Payment: University will pay within [e.g., \\\"30 days\\\"] of invoice receipt.\\n\\n4. Warranty: Goods are defect-free for [e.g., \\\"12 months\\\"] from delivery.\\n\\n5. Non-disclosure: University will not disclose Supplier\\u2019s proprietary details for [e.g., \\\"5 years\\\"]. Breach will result in a penalty of [specific amount or formula, e.g., \\\"$5,000 per incident or actual damages, whichever is greater\\\"].\\n\\n6. Termination: [e.g., \\\"30 days\\\"] notice for breaches unresolve" + }, + { + "comment": "This code represents a sample freelance agreement document with various sections such as services provided, payment terms, deadline, confidentiality, and termination. It is governed by the laws of a specific state/country and allows for amendments only through written changes signed by both parties.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":18-19", + "content": "d within said period.\\n\\n7. Law: Governed by [State/Country] laws.\\n\\n8. Amendments: Both parties must sign written changes.\",\n \"FREELANCER AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Client: [Client Full Name or Company Name], located at [Client Address].\\n Freelancer: [Freelancer Full Name], located at [Freelancer Address].\\n\\n1. SERVICES:\\n\\nFreelancer agrees to provide the following services: [Brief description of services, e.g., \\\"web design, content creation, graphic design\\\"].\\n\\n2. PAYMENT TERMS:\\n\\nFor the services rendered, Client agrees to pay Freelancer a total of [Total Amount, e.g., \\\"$1,000\\\"]. Payments shall be made as follows: [Payment structure, e.g., \\\"50% upfront, 50% upon completion\\\"].\\n\\n3. DEADLINE:\\n\\nThe services will be completed by [End Date, e.g., \\\"December 31, 2023\\\"].\\n\\n4. CONFIDENTIALITY:\\n\\nFreelancer agrees to maintain confidentiality regarding all proprietary information of the Client.\\n\\n5. TERMINATION:\\n\\nEither party may terminate" + }, + { + "comment": "This is a contractual agreement between a client and a freelancer, detailing the services provided by the freelancer, payment terms, termination notice period, independent contractor status, governing law, and amendment requirements.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":19-20", + "content": " this agreement with [X days, e.g., \\\"14 days\\\"] written notice. Upon termination, payments will be adjusted for work completed.\\n\\n6. INDEPENDENT CONTRACTOR:\\n\\nFreelancer is an independent contractor and not an employee of the Client. No benefits, rights, or obligations of employment are conferred by this agreement.\\n\\n7. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of New York\\\"].\\n\\n8. AMENDMENTS:\\n\\nAny changes to this agreement must be in writing and signed by both parties.\",\n \"FREELANCER AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Client: [Client Full Name or Company Name], located at [Client Address].\\n Freelancer: [Freelancer Full Name], located at [Freelancer Address].\\n\\n1. SERVICES:\\n\\nFreelancer agrees to provide the following services: [Brief description of services, e.g., \\\"web design, content creation, graphic design\\\"].\\n\\n2. PAYMENT TERMS:\\n\\nFor the services rendered, Client agrees to pay Freel" + }, + { + "comment": "This code is a template for a legal agreement between a client and freelancer, outlining payment terms, deadlines, penalties for late delivery or confidentiality breach, confidentiality agreement, termination clause, and the status of the freelancer as an independent contractor.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":20-20", + "content": "ancer a total of [Total Amount, e.g., \\\"$1,000\\\"]. Payments shall be made as follows: [Payment structure, e.g., \\\"50% upfront, 50% upon completion\\\"].\\n\\n3. DEADLINE:\\n\\nThe services will be completed by [End Date, e.g., \\\"December 31, 2023\\\"].\\n\\n4. PENALTIES:\\n\\na. Late Delivery: If Freelancer fails to deliver the completed service by the specified deadline, a penalty of [specific amount, e.g., \\\"$50\\\"] per day will be deducted from the final payment until the service is delivered.\\n\\nb. Confidentiality Breach: Breaching the confidentiality clause will result in a penalty of [specific amount, e.g., \\\"$2,000\\\"].\\n\\n5. CONFIDENTIALITY:\\n\\nFreelancer agrees to maintain confidentiality regarding all proprietary information of the Client.\\n\\n6. TERMINATION:\\n\\nEither party may terminate this agreement with [X days, e.g., \\\"14 days\\\"] written notice. Upon termination, payments will be adjusted for work completed.\\n\\n7. INDEPENDENT CONTRACTOR:\\n\\nFreelancer is an independent contractor and " + }, + { + "comment": "This code outlines an agreement between Company A and Company B for a joint research project, detailing duties, financial contributions, confidentiality, intellectual property ownership, termination terms, risks and liabilities, amendment requirements, and the duration of the agreement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":20-21", + "content": "not an employee of the Client. No benefits, rights, or obligations of employment are conferred by this agreement.\\n\\n8. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of New York\\\"].\\n\\n9. AMENDMENTS:\\n\\nAny changes to this agreement must be in writing and signed by both parties.\",\n \"This document outlines the terms of cooperation between Company A and Company B for a joint research project. The duties of each company are designated, with a detailed financial contribution outlined in Appendix A. Confidentiality is strictly enforced, and any intellectual property created will be jointly owned. All published findings will be reviewed by both parties for protection of proprietary information. Termination of this agreement requires 30 days' written notice, and each party assumes any risks or liabilities during this collaboration. Amendments must be in writing and signed by both parties. The duration of the agreement lasts from the s" + }, + { + "comment": "Code snippet represents the start of a business agreement document, with fields for effective date and names and addresses of the businesses involved. It outlines the purpose of collaboration, terms of service, payment terms, and confidentiality commitments made by both parties upon signing.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":21-22", + "content": "tart date to the end date, unless extended. By signing, both parties acknowledge and agree to these terms.\",\n \"BUSINESS AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Business B: [Full Legal Name of Business B], located at [Business B Address].\\n\\n1. PURPOSE:\\n\\nThis Agreement outlines the terms of the collaboration/project/service between Business A and Business B regarding [Brief Description of the Collaboration/Project/Service].\\n\\n2. TERMS OF SERVICE:\\n\\n Business A agrees to: [Specific tasks/responsibilities, e.g., \\\"Supply 500 units of Product X monthly.\\\"].\\n Business B agrees to: [Specific tasks/responsibilities, e.g., \\\"Pay $50 per unit of Product X within 30 days of delivery.\\\"].\\n\\n3. PAYMENT TERMS:\\n\\nPayments shall be made as follows: [Payment structure, e.g., \\\"Payment due within 30 days of invoice.\\\"].\\n\\n4. CONFIDENTIALITY:\\n\\nBoth parties commit to maintaining confidentia" + }, + { + "comment": "This code is a Business Agreement template containing sections for confidentiality, termination, governing law, amendments, and acknowledgement. The agreement states that all proprietary information exchanged must be kept confidential, and the agreement can be terminated with X days notice. It's governed by specific country/state laws and any modifications must be in writing signed by both parties.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":22-23", + "content": "lity regarding all proprietary information exchanged during this agreement.\\n\\n5. TERMINATION:\\n\\nEither party may terminate this Agreement with [X days, e.g., \\\"30 days\\\"] written notice. If breached, the aggrieved party may seek remedies as per governing laws.\\n\\n6. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n7. AMENDMENTS:\\n\\nModifications to this Agreement must be in writing and signed by both parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBy signing below, both parties affirm their understanding and acceptance of this Business Agreement.\",\n \"CONFIDENTIALITY:\\n\\n4.1. Confidential Information: For the purposes of this Agreement, \\\"Confidential Information\\\" refers to any data or information, regardless of its form, proprietary to or maintained as confidential by either party, which is not publicly known and which is disclosed during the term of this Agreement or in relation to the collaboration/project/service.\\n\\n4." + }, + { + "comment": "This code is from a legal agreement, specifically addressing the protection and non-disclosure of confidential information between two parties. It outlines the exclusions for what constitutes as confidential information and stipulates that upon agreement termination or request, all copies must be returned.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":23-23", + "content": "2. Protection and Non-Disclosure: Both parties agree to use the Confidential Information solely for the purposes of the Agreement and will exert reasonable efforts to prevent the unauthorized disclosure or use of the Confidential Information. Neither party shall disclose, reproduce, or distribute any portion of the Confidential Information without the disclosing party's prior written consent.\\n\\n4.3. Exclusions: Confidential Information shall not include any data or information which:\\n\\n Is or becomes publicly known through no wrongful act of the receiving party;\\n Is independently developed by the receiving party without the use of the Confidential Information;\\n Is rightfully received from a third party without any obligation of confidentiality;\\n Is disclosed under legal requirement or order.\\n\\n4.4. Return or Destruction: Upon the termination of this Agreement, or at the request of the disclosing party, the receiving party shall return all copies of the Confidential In" + }, + { + "comment": "This code contains a confidentiality agreement and a loyalty agreement, including details such as effective dates, parties involved, duration of obligations, non-poaching clauses, and survival of obligations after the termination of the agreement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":23-24", + "content": "formation to the disclosing party or certify in writing that it has destroyed all such copies.\\n\\n4.5. Duration: The obligations set forth in this Confidentiality section shall survive the termination or expiration of this Agreement for a period of [specific time, e.g., \\\"five years\\\"].\",\n \"LOYALTY AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Party A: [Full Legal Name of Party A], located at [Party A Address].\\n Party B: [Full Legal Name of Party B], located at [Party B Address].\\n\\n1. LOYALTY COMMITMENT:\\n\\nBoth parties acknowledge the mutual value of their business relationship. They commit to work in good faith, ensuring a collaborative environment that prioritizes trust, loyalty, and shared objectives.\\n\\n2. NON-POACHING OF EMPLOYEES:\\n\\nFor the duration of this Agreement and [specific time after termination, e.g., \\\"for 12 months following its termination\\\"], neither Party A nor Party B shall, without the prior written consent of the other party:\\n\\na. Directly or i" + }, + { + "comment": "Non-compete and loyalty agreement between Business A and another party, outlining restrictions on soliciting employees, hiring former employees, breach consequences, governing law, amendment rules, and acknowledgment of understanding.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":24-25", + "content": "ndirectly solicit, induce, or encourage any employees of the other party to terminate their employment or to engage in employment or other services elsewhere.\\nb. Hire, employ, or contract the services of any employee of the other party who has been employed by the said party within the last 12 months.\\n\\n3. BREACH:\\n\\nAny violation of the clauses in this Agreement will be deemed a material breach and may result in legal action or other remedies as available by law.\\n\\n4. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n5. AMENDMENTS:\\n\\nAny modifications to this Agreement must be in writing and signed by both parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBy signing below, both parties affirm their understanding and acceptance of this Loyalty Agreement.\",\n \"NON-COMPETE AND LOYALTY AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Busines" + }, + { + "comment": "This code is a legal agreement ensuring loyalty and preventing competition between two parties during and after collaboration or engagement, with sections for non-compete, non-poaching, and loyalty.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":25-25", + "content": "s B: [Full Legal Name of Business B], located at [Business B Address].\\n\\n1. PURPOSE:\\n\\nThis Agreement is designed to protect the proprietary and business interests of both parties by ensuring loyalty and preventing competition during and after the period of collaboration or engagement.\\n\\n2. NON-COMPETE:\\n\\nFor the duration of this Agreement and [specific time after termination, e.g., \\\"for 24 months following its termination\\\"], neither party shall:\\n\\na. Engage in or support any venture that directly competes with the core business of the other party within [specific geographical region, e.g., \\\"the State of California\\\"].\\nb. Invest in, partner with, or advise any business entity that competes directly with the other party.\\n\\n3. LOYALTY AND NON-POACHING:\\n\\nBoth parties pledge their commitment to a loyal business relationship. Specifically:\\n\\na. Neither party will, without the prior written consent of the other, solicit, induce, or encourage any employees or contractors of the o" + }, + { + "comment": "This code snippet represents a legal agreement between two parties, outlining the terms and conditions of their engagement. It includes clauses on termination, confidentiality, breach remedies, governing law, and amendments.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":25-25", + "content": "ther party to terminate their engagement or to join another business.\\nb. Neither party shall disparage or encourage others to disparage the other party, its products, services, or its employees.\\n\\n4. CONFIDENTIALITY:\\n\\nBoth parties agree to maintain confidentiality regarding any proprietary or business-sensitive information exchanged during the course of this Agreement, ensuring that such information isn't disclosed without the explicit consent of the party owning that information.\\n\\n5. BREACH AND REMEDIES:\\n\\nA violation of any provision in this Agreement will be deemed a significant breach. The aggrieved party shall be entitled to seek injunctive relief, damages, or any other remedies available under the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n6. GOVERNING LAW:\\n\\nThis Agreement shall be governed by and interpreted in accordance with the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n7. AMENDMENTS:\\n\\nModifications or amend" + }, + { + "comment": "This code represents an amendment to a Non-Compete and Loyalty Agreement between two businesses, specifically changing the length of engagement. It includes the updated agreement details, such as dates, business names, and locations. The amendment is made on a specific date and modifies the original agreement's section related to the duration of engagement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":25-26", + "content": "ments to this Agreement must be in writing and duly signed by authorized representatives of both parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBy signing below, representatives from both businesses affirm their understanding and acceptance of this Non-Compete and Loyalty Agreement.\",\n \"AMENDMENT TO CONTRACT: LENGTH OF ENGAGEMENT\\n\\nThis Amendment is made on [Date], and amends the Non-Compete and Loyalty Agreement dated [Original Agreement Date] between:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Business B: [Full Legal Name of Business B], located at [Business B Address].\\n\\nAMENDMENT:\\n\\nThe parties hereby agree to amend the Non-Compete and Loyalty Agreement as follows:\\n\\nSection [Specific Section Number, e.g., \\\"2\\\"] - Length of Engagement\\n\\nThe period of engagement between Business A and Business B as stipulated in the original Agreement is hereby extended/shortened/set to commence from [New Start Date] and conclude on [New End Date].\\n\\nGENERAL " + }, + { + "comment": "This code is for creating an amendment to a contract, specifically for adjusting the fees for late delivery in case of delayed product/service delivery by either Business A or B. The original agreement and this amendment together represent the entire understanding between the parties involved. Both parties acknowledge and approve the amendment with their signatures.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":26-27", + "content": "PROVISIONS:\\n\\n Except as specifically amended herein, all other terms and conditions of the original Agreement remain unchanged and in full effect.\\n This Amendment, together with the original Agreement, represents the entire understanding between the parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBoth parties, by their signatures below, indicate their acceptance and approval of this Amendment.\",\n \"AMENDMENT TO CONTRACT: FEES FOR LATE DELIVERY\\n\\nThis Amendment is made on [Date], and amends the Agreement dated [Original Agreement Date] between:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Business B: [Full Legal Name of Business B], located at [Business B Address].\\n\\nAMENDMENT:\\n\\nThe parties hereby agree to amend the Agreement as follows:\\n\\nSection [Specific Section Number, e.g., \\\"3\\\"] - Fees for Late Delivery\\n\\na. If Business A/B fails to deliver the products/services by the agreed-upon deadline, a late fee of [Specific Amount or Percentage," + }, + { + "comment": "This code defines an amendment to a contract, specifying late fees for delayed payments, maximum limits, deduction methods, and preserves original agreement terms. It also includes acknowledgement from both parties upon signing.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":27-28", + "content": " e.g., \\\"$100\\\" or \\\"5% of the total contract value\\\"] shall be applied for each [time period, e.g., \\\"day\\\"] of delay.\\n\\nb. The total late fees shall not exceed [Specific Maximum Amount or Percentage, e.g., \\\"$1,000\\\" or \\\"20% of the total contract value\\\"].\\n\\nc. The fees will be deducted from the final payment or invoiced separately, as deemed appropriate by the non-defaulting party.\\n\\nGENERAL PROVISIONS:\\n\\n Except as specifically amended herein, all other terms and conditions of the original Agreement remain unchanged and in full effect.\\n This Amendment, together with the original Agreement, represents the entire understanding between the parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBoth parties, by their signatures below, indicate their acceptance and approval of this Amendment.\",\n \"AMENDMENT TO CONTRACT\\n\\nThis Amendment (the \\\"Amendment\\\") is entered into on [Date], between [Party One Name], hereinafter referred to as the \\\"First Party', and [Party Two Name], hereinafter referred" + }, + { + "comment": "The code is a legal document amendment, referring to the Original Contract, adding additional responsibilities for maintaining and ensuring the smooth functioning of existing IT systems.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":28-28", + "content": " to as the \\\"Second Party'', collectively referred to as the \\\"Parties\\\".\\n\\nWHEREAS, the Parties entered into a contract dated [Original Contract Date], hereinafter referred to as the \\\"Original Contract', for [Brief Description of the Original Contract, e.g., \\\"provision of IT services to First Party\\\"];\\n\\nWHEREAS, the Parties now wish to amend the Original Contract to add additional responsibilities pertaining to the maintenance of existing IT systems;\\n\\nNOW, THEREFORE, in consideration of the mutual covenants contained herein and for other good and valuable consideration, the receipt and sufficiency of which is hereby acknowledged, the Parties agree as follows:\\n\\n Additional Responsibility:\\n\\n The Second Party shall assume the responsibility of maintaining and ensuring the smooth functioning of the existing IT systems of the First Party. This responsibility includes, but is not limited to:\\n\\n a. Regular monitoring of the IT systems for any anomalies or issues.\\n\\n " + }, + { + "comment": "This code snippet outlines amendment details for a contract. It includes revised responsibilities such as prompt troubleshooting, routine updates and patches, additional tasks, new compensation terms, duration and termination clauses remaining the same unless agreed upon in writing, and existing terms not affected unless in conflict with the amendment.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":28-28", + "content": "b. Prompt troubleshooting and rectification of any issues identified.\\n\\n c. Routine updates and patches to ensure the systems are secure and up-to-date.\\n\\n d. Any other related tasks as deemed necessary by the First Party.\\n\\n Compensation:\\n\\n As a result of this additional responsibility, the Parties agree to a revised compensation of [New Compensation Details, e.g., \\\"$XXX per month\\\"]. All other payment terms as outlined in the Original Contract shall remain unchanged.\\n\\n Duration and Termination:\\n\\n The duration and termination clauses from the Original Contract shall remain applicable to this Amendment unless otherwise agreed upon in writing by the Parties.\\n\\n Miscellaneous:\\n\\n All other terms and conditions of the Original Contract, which are not specifically amended by this Amendment, shall remain in full force and effect. In the event of a conflict between this Amendment and the Original Contract, the terms of this Amendment shall prevail.\\n\\n " + }, + { + "comment": "This code section contains legal terms and conditions, specifically an amendment to a contract with governing law and milestones for software development.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":28-29", + "content": "Entire Agreement:\\n\\n This Amendment, along with the Original Contract, constitutes the entire agreement between the Parties and supersedes any prior understandings, written or oral, relating to the subject matter of this Amendment.\\n\\n Governing Law:\\n\\n This Amendment shall be governed by the laws of [Jurisdiction, e.g., \\\"State of New York\\\"].\",\n \"This appendix, part of the Contract between Party One and Party Two, sets milestones and deadlines for Party Two. \\n\\nMilestone 1 involves tasks such as gathering requirements, designing user interface etc. with the objective of developing initial prototype of a software application. Delivery Deadline: September 15, 2023. \\n\\nMilestone 2 involves tasks like incorporating feedback and conducting beta testing, aiming at finalizing and testing the software application. Delivery Deadline: October 15, 2023. \\n\\nEach milestone's completion will be reviewed by Party One; if requirements aren't met, Party Two may correct and resubmit. P" + }, + { + "comment": "This code defines an appendix to a contract that modifies the delivery time due to mutual agreement between parties. The original and revised delivery dates are provided, along with a brief explanation for the change.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":29-30", + "content": "ayment terms and penalties are outlined in the main Contract. This appendix is executed adhering to main Contract's terms and conditions.\",\n \"APPENDIX B \\u2013 CHANGE IN TIME OF DELIVERY\\n\\nThis Appendix is an addendum to the contract (the \\\"Contract\\\") dated [Original Contract Date], entered into between [Party One Name], hereinafter referred to as the \\\"First Party', and [Party Two Name], hereinafter referred to as the \\\"Second Party\\\". The purpose of this Appendix is to amend and modify the delivery time as specified in the original Contract.\\n\\n Original Delivery Time: As per the terms of the original Contract, the delivery time was set for [Original Delivery Date, e.g., \\\"September 15, 2023\\\"].\\n\\n Revised Delivery Time: The Parties, through mutual agreement, have now decided to amend the delivery time. The new delivery date shall be [Revised Delivery Date, e.g., \\\"October 10, 2023\\\"].\\n\\n Reason for Change: [Provide a brief explanation for the change in delivery time, " + }, + { + "comment": "The code defines consequences and prevailing terms for a contract's delayed delivery, with the Appendix taking precedence in case of inconsistencies.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":30-30", + "content": "e.g., \\\"Due to unforeseen challenges in the production process, additional time is required to ensure that the deliverables meet the agreed-upon quality standards.\\\"]\\n\\n Consequences of Delay: Unless otherwise stated in the main body of the Contract:\\n\\n a. If the Second Party fails to meet the revised delivery time, penalties or consequences as outlined in the original Contract for late delivery will apply from the revised delivery date.\\n\\n b. All other terms related to late delivery, including but not limited to penalties, refunds, or rights to terminate, remain effective and unchanged by this Appendix.\\n\\n Prevailing Terms: All other terms and conditions of the original Contract not specifically amended by this Appendix shall remain in full force and effect. In the event of any inconsistency or conflict between the original Contract and this Appendix, the terms of this Appendix shall prevail with respect to the change in the delivery time.\\n\\n Acknowledgment: By sig" + }, + { + "comment": "This code appears to be extracting text chunks from a file named \"pure_documents.json\" at lines 30-31, possibly representing various appendices or amendments within an existing contract. These sections outline revised delivery times and additional confidential information categories beyond the original scope of the agreement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":30-31", + "content": "ning this Appendix, the Parties acknowledge and agree to the revised delivery time and any associated consequences of delays.\\n\\nThis Appendix is executed as an acknowledgment and agreement to the revised delivery time and shall be considered an integral part of the original Contract.\",\n \"APPENDIX C \\u2013 ADDITIONAL CONFIDENTIAL INFORMATION\\n\\nThis Appendix is an extension of the contract (the \\\"Contract\\\") dated [Original Contract Date], between [Party One Name] (\\\"First Party\\\") and [Party Two Name] (\\\"Second Party\\\"). It outlines additional categories of confidential information beyond those detailed in the Contract.\\n\\n Additional Confidential Information Includes:\\n\\n a. Non-public financial data.\\n\\n b. Unpublished marketing strategies and materials.\\n\\n c. Upcoming product or service details.\\n\\n d. Proprietary software codes and processes.\\n\\n e. Personnel records.\\n\\n f. Any data labeled as \\\"Confidential\\\" or \\\"Proprietary\\\" after the Contract\\u2019s e" + }, + { + "comment": "This code defines a legal document containing confidentiality provisions, exclusions, duration, and precedence in case of conflict with the Contract.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":31-32", + "content": "xecution.\\n\\n Protection & Exclusions:\\n\\nBoth Parties shall extend the same protection to this Additional Confidential Information as previously agreed upon in the Contract. Information that becomes public, is received rightfully from a third party, is independently developed, or gets written release authorization is excluded from confidentiality obligations.\\n\\n Duration:\\n\\nThe confidentiality obligations for this Appendix shall persist as defined in the Contract or, if unspecified, for [e.g., \\\"five years\\\"] from the disclosure date.\\n\\n Prevailing Terms:\\n\\nIf there\\u2019s any conflict between this Appendix and the Contract concerning confidentiality, this Appendix takes precedence concerning Additional Confidential Information.\\n\\nExecuted as an integral part of the Contract.\",\n \"AMENDMENT TO NON-DISCLOSURE AGREEMENT\\n\\nThis Amendment (the \\u201cAmendment\\u201d) is made and entered into as of [Amendment Date], by and between [Party A Name], having an address at [Party " + }, + { + "comment": "This code represents a legal document outlining the terms of an amendment to an existing Non-Disclosure Agreement (NDA) between Party A and Party B. The amendment extends the duration of certain time restrictions within the original agreement, with new time specified in Section [X].", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":32-32", + "content": "A Address] (\\u201cParty A\\u201d), and [Party B Name], having an address at [Party B Address] (\\u201cParty B\\u201d), collectively referred to as the \\u201cParties.\\u201d\\n\\nRECITALS\\n\\nWHEREAS, the Parties entered into a Non-Disclosure Agreement dated [Original NDA Date] (the \\u201cOriginal Agreement\\u201d);\\n\\nWHEREAS, the Parties desire to amend the Original Agreement to extend the duration of certain restrictions therein;\\n\\nNOW, THEREFORE, in consideration of the mutual covenants and promises made by the Parties hereto, the Parties agree as follows:\\n\\n Extension of Time Restrictions: The time restriction set forth in Section [X] of the Original Agreement, currently stating a period of [Original Time, e.g., \\\"two (2) years\\\"], is hereby amended and extended to [New Time, e.g., \\\"five (5) years\\\"] from the date of disclosure of the Confidential Information.\\n\\n Full Force and Effect: Except as expressly modified by this Amendment, all terms, conditions, and provisions of the Or" + }, + { + "comment": "This code represents an amendment to an original business agreement. It includes provisions for conflict resolution, counterparts execution, and governing law. The agreement is between two businesses - Business A and Business B, with a specified effective date.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":32-33", + "content": "iginal Agreement shall remain in full force and effect. In the event of any conflict between the terms of this Amendment and the Original Agreement, the terms of this Amendment shall govern.\\n\\n Counterparts: This Amendment may be executed in counterparts, each of which shall be deemed an original and all of which together shall constitute one and the same instrument.\\n\\n Governing Law: This Amendment shall be governed by and construed in accordance with the laws of [Governing State or Country, e.g., \\\"the State of California\\\"], without regard to its conflict of laws principles.\\n\\nIN WITNESS WHEREOF, the Parties hereto have executed this Amendment as of the date first above written.\",\n \"BUSINESS COOPERATION AGREEMENT\\n\\nThis Agreement is between [Business A Name], at [Business A Address] (\\\"Business A\\\"), and [Business B Name], at [Business B Address] (\\\"Business B\\\"), effective [Day, Month, Year].\\n\\n1. Purpose:\\nBoth businesses will cooperate in [brief description, e.g., \\\"" + }, + { + "comment": "This code represents a business cooperation agreement between Business A and Business B, outlining their joint marketing responsibilities, term, confidentiality, governing law, and amendment processes. An appendix is also included to extend the confidentiality conditions of the original agreement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":33-34", + "content": "joint marketing\\\"].\\n\\n2. Responsibilities:\\n\\n Business A will: [Key obligation, e.g., \\\"Promote Business B in newsletters.\\\"]\\n Business B will: [Key obligation, e.g., \\\"Display Business A products.\\\"]\\n\\n3. Term:\\nEffective from the above date for [e.g., \\\"12 months\\\"]. Either party can terminate with [e.g., \\\"30 days\\\"] notice.\\n\\n4. Confidentiality:\\nConfidential information remains private, during and post-agreement.\\n\\n5. Governing Law:\\nGoverning laws of [State/Country, e.g., \\\"California\\\"].\\n\\n6. Amendments:\\nChanges must be written and signed by both parties.\",\n \"APPENDIX TO BUSINESS COOPERATION AGREEMENT\\n\\nEXTENSION OF CONFIDENTIALITY CONDITIONS\\n\\nThis Appendix is made as of [Day, Month, Year], and is appended to the Business Cooperation Agreement dated [Original Agreement Date] (\\\"Original Agreement\\\") between [Business A Name], located at [Business A Address] (\\\"Business A\\\") and [Business B Name], located at [Business B Address] (\\\"Business B\\\").\\n\\n1. Extensio" + }, + { + "comment": "Section of code describes an amendment to a confidentiality agreement, extending the confidentiality period and keeping other obligations unchanged. It also outlines the governing law for the amendment.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":34-35", + "content": "n of Confidentiality Period:\\nThe confidentiality period stipulated in Section 4 (or the appropriate section number) of the Original Agreement is hereby extended. Previously set to expire [Original Expiry Date], it will now extend to [New Expiry Date].\\n\\n2. Continued Obligations:\\nAll other confidentiality obligations and conditions outlined in the Original Agreement remain unchanged and in full effect.\\n\\n3. Entire Agreement:\\nThis Appendix, in conjunction with the Original Agreement, constitutes the entire agreement between the parties regarding the subject matter herein.\\n\\n4. Governing Law:\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\nIN WITNESS WHEREOF, both parties hereto have executed this Appendix as of the date first above written.\",\n \"APPENDIX: LOYALTY CLAUSE\\n\\nEffective [Day, Month, Year], attached to the Agreement dated [Original Agreement Date] between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Loya" + }, + { + "comment": "This code snippet represents a legal document with sections for commitment, consequences of breach, and governing law. The commitment section outlines loyalty pledge and restraints from competitive activities. The consequences section describes potential agreement termination and legal actions for breaches. Lastly, the governing law section states the applicable state or country's laws to govern the agreement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":35-36", + "content": "lty Commitment:\\nFor one year from the Effective Date, both parties pledge loyalty by refraining from activities harmful or competitive to the other within the context of the Agreement.\\n\\n2. Consequences:\\nBreaches may result in Agreement termination and legal action as per the original terms.\\n\\n3. Governing Law:\\nGoverned by the laws of [State/Country, e.g., \\\"California\\\"].\",\n \"APPENDIX: CONFIDENTIALITY CLAUSE\\n\\nThis Appendix is appended to the B2B Contractor Agreement (\\\"Agreement\\\") dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"] between [Company Name], hereinafter referred to as \\\"Company', and [Contractor Name], hereinafter referred to as \\\"Contractor\\\".\\n\\n1. Confidentiality:\\n\\n1.1 Both Company and Contractor acknowledge that they may have access to or receive information during the term of the Agreement which is confidential to the disclosing party (\\\"Confidential Information\\\").\\n\\n1.2 Confidential Information shall not include information that:\\n\\n is or b" + }, + { + "comment": "This code snippet contains a confidentiality agreement clause, which outlines the rules for handling and protecting sensitive information. The receiving party is required to use the Confidential Information only for performing under the Agreement, take precautions to prevent unauthorized disclosure, and obtain written consent before reproducing or distributing it. The obligations of this clause continue for a specified duration (e.g., two years) after the expiration or termination of the Agreement, and the receiving party must return or destroy all copies upon request or termination.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":36-36", + "content": "ecomes public knowledge without breach of this clause;\\n was known by the receiving party before receipt from the disclosing party;\\n is received from a third party without breach of any obligation of confidentiality.\\n\\n1.3 The receiving party shall:\\n\\n use the Confidential Information only for performing under the Agreement;\\n take all reasonable precautions to prevent any unauthorized disclosure of the Confidential Information;\\n not disclose, reproduce, or distribute Confidential Information without the written consent of the disclosing party.\\n\\n2. Duration:\\n\\nThe obligations set forth in this Appendix shall continue for a period of [e.g., \\\"two years\\\"] from the date of termination or expiration of the Agreement.\\n\\n3. Return or Destruction:\\n\\nUpon the expiration or termination of the Agreement, or upon the disclosing party's request, the receiving party shall return or, if directed by the disclosing party, destroy all copies of the Confidential Information.\\n\\n" + }, + { + "comment": "This code snippet represents a Confidentiality Agreement between two parties, defining the scope of confidential information, obligations to protect it, and its duration post-agreement termination or expiration.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":36-37", + "content": "4. Governing Law:\\n\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the Agreement.\",\n \"APPENDIX: CONFIDENTIALITY CLAUSE\\n\\nThis Appendix is part of the Agreement dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"] between [Company Name] (\\\"Company\\\") and [Contractor Name] (\\\"Contractor\\\").\\n\\n1. Confidential Information:\\nBoth parties may access or receive the other's confidential information (\\\"Confidential Information\\\") during the Agreement term. Confidential Information excludes publicly known details, data known prior, or information obtained from third parties without confidentiality obligations.\\n\\n2. Obligations:\\nThe recipient shall:\\n\\n Use the Confidential Information solely for the Agreement's purpose.\\n Prevent unauthorized disclosures.\\n Not disclose without prior written consent.\\n\\n3. Duration:\\nObligations persist for [e.g., \\\"two years\\\"] post Agreement termination or expiration.\\n\\n4. Return/Des" + }, + { + "comment": "Non-Disclosure Agreement (NDA) between a tech company and a contractor, effective on [Effective Date], defining Confidential Information, its use, protection, and obligation duration.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":37-38", + "content": "truction:\\nUpon Agreement conclusion, or on request, all Confidential Information copies should be returned or destroyed.\\n\\n5. Governing Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective [Effective Date, e.g., \\\"August 15, 2023\\\"], between [Tech Company Name], located at [Tech Company Address], (\\\"Company\\\") and [Contractor's Full Name], located at [Contractor Address], (\\\"Contractor\\\").\\n\\nPurpose:\\nContractor will access Company's confidential information during their engagement.\\n\\n1. Definition:\\n\\\"Confidential Information\\\" means proprietary data related to the Company\\u2019s business, excluding publicly known details, prior known information, or data from third parties without confidentiality bounds.\\n\\n2. Obligation:\\nContractor shall:\\n\\n Use Confidential Information solely for engagement purposes.\\n Prevent unauthorized disclosure.\\n\\n3. Duration:\\nObligations persist for [e.g., \\\"two years\\\"] from disclosure" + }, + { + "comment": "Section describes terms for confidential information handling, remedies for breach, and governing law. Appendix extends contract duration by two years while keeping other terms unchanged.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":38-39", + "content": " date.\\n\\n4. Return:\\nContractor shall return all Confidential Information items upon engagement completion or Company's request, retaining no copies.\\n\\n5. Remedies:\\nBreach may result in legal actions, damages, and costs.\\n\\n6. Governing Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"APPENDIX: EXTENSION OF CONTRACT DURATION\\n\\nThis Appendix is a part of the Agreement initially dated [Original Agreement Date, e.g., \\\"August 15, 2021\\\"], between [Party A Name], located at [Party A Address] (\\\"Party A\\\") and [Party B Name], located at [Party B Address] (\\\"Party B\\\").\\n\\n1. Duration Extension:\\nThe duration of the Agreement referenced above is hereby extended for an additional two (2) years from the original expiration date. With this extension, the new expiration date of the Agreement will be [New Expiration Date, e.g., \\\"August 15, 2025\\\"].\\n\\n2. All Other Terms Remain Unchanged:\\nExcept for the extension of the contract duration as described herein, all other term" + }, + { + "comment": "This code snippet represents a confidentiality agreement between Company and Supplier, with details such as governing law, effective date, obligations of the Supplier regarding Confidential Information, and superseding previous understandings.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":39-40", + "content": "s and conditions of the Agreement remain unchanged and in full effect.\\n\\n3. Entire Agreement:\\nThis Appendix, in conjunction with the original Agreement, constitutes the entire agreement between Party A and Party B. Any previous understandings, written or oral, relating to the subject of this Appendix are superseded by the terms herein.\\n\\n4. Governing Law:\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the original Agreement.\",\n \"CONFIDENTIALITY AGREEMENT\\n\\nEffective [Effective Date, e.g., \\\"August 15, 2023\\\"], between [Company Name], located at [Company Address] (\\\"Company\\\"), and [Supplier Name], located at [Supplier Address] (\\\"Supplier\\\").\\n\\n1. Definition:\\n\\\"Confidential Information\\\" means proprietary data of the Company, excluding:\\n\\n Pre-disclosed or publicly known data.\\n Info from third parties without confidentiality bounds.\\n\\n2. Obligations:\\nSupplier will:\\n\\n Use Confidential Information solely for b" + }, + { + "comment": "This code is part of a legal document agreement. It specifies the obligations, duration, remedies for breaches, and governing law in case of any violations. It also includes details about notifying suspected breaches, rectification timelines, fees for breaches, and potential legal actions if necessary.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":40-41", + "content": "usiness purposes with the Company.\\n Protect its secrecy and prevent unauthorized disclosure.\\n Return or destroy all Confidential Information upon request or business completion.\\n\\n3. Duration:\\nObligations last for [e.g., \\\"two years\\\"] from disclosure date.\\n\\n4. Remedies:\\nBreaches may result in legal actions, damages, and costs by the Company.\\n\\n5. Governing Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"APPENDIX: BREACH CONSEQUENCES\\n\\nRelated to the Agreement on [Original Agreement Date, e.g., \\\"August 15, 2023\\\"], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Notification:\\nSuspected breaches must be reported in writing by the non-breaching party.\\n\\n2. Rectification:\\nThe breaching party has [e.g., \\\"14 days\\\"] from notification to rectify, unless irreparable.\\n\\n3. Fees:\\nBreaches incur a penalty of [e.g., \\\"$10,000\\\"], aside from claimed damages.\\n\\n4. Legal Actions:\\nUnresolved or damaging breaches may lead to lega" + }, + { + "comment": "This code defines contract termination clauses, including termination for breach, termination by notice, and obligations upon termination. It also specifies that the Agreement is governed by specific state or country laws.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":41-42", + "content": "l actions, including injunctive relief, damages, and legal fees.\\n\\n5. Termination:\\nRepeated or severe breaches can cause Agreement termination by the non-breaching party.\\n\\n6. Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"APPENDIX: TERMS OF CONTRACT TERMINATION\\n\\nRelated to the Agreement on [Original Agreement Date, e.g., \\\"August 15, 2023\\\"], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Termination for Breach:\\nIf either party breaches any conditions of the Agreement, the non-breaching party may terminate the Agreement immediately upon written notice to the breaching party.\\n\\n2. Termination by Notice:\\nEither party may terminate the Agreement for any reason by providing a written notice to the other party. The termination will become effective 30 days after the receipt of such notice.\\n\\n3. Obligations Upon Termination:\\nUpon termination, all rights and obligations under the Agreement will cease, except for those which by thei" + }, + { + "comment": "This code represents an Appendix titled \"Obligations Upon Termination\" in a legal agreement between Party A and Party B. It outlines the responsibilities of both parties, such as returning property, maintaining confidentiality, and settling outstanding payments upon termination of the agreement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":42-43", + "content": "r nature should survive termination (e.g., confidentiality, liability for prior breaches, etc.).\\n\\n4. Governing Law:\\nThis Appendix, and any disputes arising from it, will be governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the original Agreement.\",\n \"APPENDIX: OBLIGATIONS UPON TERMINATION\\n\\nPertaining to the Agreement dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Return of Property:\\nUpon termination, each party shall promptly return to the other all property, materials, and assets belonging to the other party, unless otherwise specified in the Agreement.\\n\\n2. Confidential Information:\\nBoth parties shall continue to abide by any confidentiality obligations set forth in the Agreement. Any confidential information must be returned or destroyed, as instructed by the owning party.\\n\\n3. Outstanding Payments:\\nAll due payments must be settled within [e.g., \\\"14 day" + }, + { + "comment": "Non-Disclosure Agreement (NDA) with termination, non-disparagement, survival of provisions, notifications, transition assistance, and governing law clauses.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":43-44", + "content": "s\\\"] of termination, as per the terms of the original Agreement.\\n\\n4. Non-Disparagement:\\nBoth parties agree not to make any derogatory or disparaging statements about the other party post-termination.\\n\\n5. Survival of Provisions:\\nAny provisions in the Agreement that, by their nature, should persist beyond termination (e.g., indemnity, liability, confidentiality) will continue to be in effect.\\n\\n6. Notifications:\\nEach party must inform their respective stakeholders, if necessary, about the termination in a manner that maintains the goodwill and reputation of both parties.\\n\\n7. Transition Assistance:\\nTo ensure a smooth transition, both parties agree to cooperate, as reasonably requested by the other, for a period of [e.g., \\\"30 days\\\"] after termination.\\n\\n8. Governing Law:\\nThis Appendix is governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the original Agreement.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective [Date, e.g., \\\"August 15, 2023\\\"], be" + }, + { + "comment": "The code is for a confidentiality agreement between a client and a business. It outlines the purpose, terms of confidentiality, definition of confidential information, duration of obligations, return/destruction process, legal remedies, and governing laws.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":44-45", + "content": "tween [Client Name], (\\\"Client\\\") and [Business Name], (\\\"Business\\\").\\n\\nPurpose:\\nProtection of confidential information exchanged due to potential collaboration.\\n\\n1. Confidentiality:\\nBusiness agrees to keep secret all Confidential Information shared by Client.\\n\\n2. Definition:\\n\\\"Confidential Information\\\" is non-public data shared by either party, excluding info that's publicly available, already known, or received without confidentiality constraints.\\n\\n3. Duration:\\nObligations last [e.g., \\\"two years\\\"] from the date of disclosure.\\n\\n4. Return/Destruction:\\nUpon Client's request, Business will return or destroy all Confidential Information.\\n\\n5. Remedies:\\nUnauthorized disclosures may lead to legal action by Client, including damages.\\n\\n6. Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"IT SERVICES AGREEMENT\\n\\nEffective Date: [Date, e.g., \\\"August 15, 2023\\\"]\\n\\nParties:\\n\\n [Client Name], located at [Client Address] (\\\"Client\\\")\\n [Service Prov" + }, + { + "comment": "This code is defining the basic structure and content of a service agreement between a client and a service provider, including scope of work, payment terms, duration, termination clauses, confidentiality, limitation of liability, governing law, and stating that this constitutes the full agreement between both parties.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":45-45", + "content": "ider Name], located at [Service Provider Address] (\\\"Provider\\\")\\n\\nScope of Work:\\nProvider agrees to offer IT services, including [e.g., \\\"network setup, software installation, and routine maintenance\\\"], as detailed in Attachment A.\\n\\nPayment:\\nClient shall pay Provider [e.g., \\\"$1,000\\\"] per month. Invoices will be sent monthly and are due within [e.g., \\\"30 days\\\"].\\n\\nDuration:\\nThis Agreement starts on [Start Date] and ends on [End Date], unless terminated earlier.\\n\\nTermination:\\nEither party may terminate with [e.g., \\\"30 days\\\"] written notice. Upon termination, any unpaid fees for services rendered become immediately due.\\n\\nConfidentiality:\\nBoth parties agree to keep all business and technical information confidential.\\n\\nLimitation of Liability:\\nProvider's liability is limited to the amount paid by the Client for the specific service causing damage.\\n\\nGoverning Law:\\nThis Agreement is governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\nEntire Agreement:\\nThis constitutes the full agreement between both parties.\"," + }, + { + "comment": "This code contains different types of legal documents including a Confidentiality Amendment to NDA and a Loyalty Agreement, which outline the terms between two parties.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":46-47", + "content": " \"CONFIDENTIALITY AMENDMENT TO NDA\\n\\nThis Amendment, effective [Date, e.g., \\\"August 15, 2023\\\"], modifies the NDA dated [Original Agreement Date] between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Responsibilities:\\n\\na) Protection: Parties must safeguard Confidential Information at least as they do their own.\\n\\nb) Access: Access is limited to those needing it who are also bound by confidentiality.\\n\\nc) Breach Notification: Parties must immediately inform the other of any breaches.\\n\\nd) Return/Destruction: Upon request or agreement end, parties must return or certify the destruction of Confidential Information.\\n\\ne) No Reverse Engineering: Receiving party shall not reverse engineer any provided items.\\n\\n2. Remedies:\\nUnauthorized disclosures permit injunctive relief and other legal remedies.\\n\\n3. Original Agreement:\\nExcept for this Amendment, the NDA remains unchanged.\\n\\nGoverning Law:\\nAs per the NDA.\",\n \"LOYALTY AGREEMENT\\n\\nThis Agreement (\\\"" + }, + { + "comment": "This code defines a contractual agreement between two parties (Party A and Party B) for collaboration, loyalty commitment, and non-solicitation. The agreement is made as of a specific date and aims to establish a loyal relationship in their joint business endeavors.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":47-47", + "content": "Agreement\\\") is made as of [Date, e.g., \\\"August 15, 2023\\\"], between:\\n\\n [Party A Name], with its principal office at [Party A Address] (\\\"Party A\\\"),\\n [Party B Name], with its principal office at [Party B Address] (\\\"Party B\\\").\\n\\nPurpose:\\nThe parties wish to collaborate and establish a loyal relationship in their joint business endeavors.\\n\\n1. Loyalty Commitment:\\n\\na) Both parties commit to act in good faith and refrain from engaging in any activity or partnership that might conflict with the interests of the other party during the term of this Agreement.\\n\\nb) Neither party shall assist, collaborate, or engage with third parties that may cause harm or disrepute to the other party.\\n\\nc) Each party shall prioritize the other's interests in situations where opportunities arise from their collaboration.\\n\\n2. Non-Solicitation:\\nDuring the term of this Agreement, and for [e.g., \\\"one year\\\"] thereafter, neither party shall solicit or attempt to entice away any clients, cust" + }, + { + "comment": "This is a business consulting contract between Client and Consultant. It includes clauses for scope of work, payment terms, duration, termination, confidentiality, governing law, and entire agreement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":47-48", + "content": "omers, or employees of the other party.\\n\\n3. Duration:\\nThis Agreement will begin on the Effective Date and remain in effect for [e.g., \\\"two years\\\"] unless terminated earlier by mutual consent.\\n\\n4. Termination:\\nEither party may terminate this Agreement with [e.g., \\\"30 days\\\"] written notice if the other party breaches any term herein.\\n\\n5. Confidentiality:\\nBoth parties agree to maintain the confidentiality of all proprietary or non-public information obtained during the collaboration.\\n\\n6. Governing Law:\\nThis Agreement is governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\n7. Entire Agreement:\\nThis document constitutes the full understanding between both parties, superseding all prior discussions, agreements, or understandings.\",\n \"BUSINESS CONSULTING CONTRACT\\n\\nThis Consulting Contract (\\\"Contract\\\") is made as of [Date, e.g., \\\"August 15, 2023\\\"], between:\\n\\n [Client Name], with its principal office at [Client Address] (\\\"Client\\\"),\\n [Consultant N" + }, + { + "comment": "Consulting contract between Client and Consultant, outlines services provided, compensation terms, duration, termination conditions, and confidentiality agreement.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":48-48", + "content": "ame], with its principal office at [Consultant Address] (\\\"Consultant\\\").\\n\\nPurpose:\\nThe Consultant will provide professional consulting services to the Client as described below.\\n\\n1. Scope of Services:\\nConsultant agrees to offer services including, but not limited to:\\na) Business strategy development\\nb) Market analysis\\nc) [Other services as needed]\\nAny additional services will require an amendment to this Contract.\\n\\n2. Compensation:\\nFor services rendered, the Client shall pay the Consultant [e.g., \\\"$100\\\"] per hour. Invoices will be issued [e.g., \\\"monthly\\\"] and are due within [e.g., \\\"30 days\\\"] of receipt.\\n\\n3. Duration:\\nThis Contract begins on [Start Date] and ends on [End Date], unless extended by mutual agreement or terminated earlier.\\n\\n4. Termination:\\nEither party can terminate this Contract with [e.g., \\\"30 days\\\"] written notice. In case of termination, the Client will pay for services rendered up to the notice date.\\n\\n5. Confidentiality:\\nThe Consultant sh" + }, + { + "comment": "This code snippet represents an agreement between two parties, Party A and Party B, with sections covering confidentiality, non-compete clauses, independent contractor status, governing law, and entire agreement. It also references Appendix A regarding confidentiality breach fees.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":48-49", + "content": "all maintain the confidentiality of all proprietary information received during the engagement, unless obligated by law to disclose.\\n\\n6. Non-compete:\\nFor [e.g., \\\"six months\\\"] after Contract termination, the Consultant agrees not to provide similar services to any direct competitor of the Client within [e.g., \\\"50 miles\\\"] of the Client's primary location.\\n\\n7. Independent Contractor:\\nThe Consultant is an independent contractor and not an employee of the Client.\\n\\n8. Governing Law:\\nThis Contract shall be governed by and interpreted under the laws of [State/Country, e.g., \\\"California\\\"].\\n\\n9. Entire Agreement:\\nThis Contract represents the entire understanding between both parties, superseding all prior negotiations, discussions, or agreements.\",\n \"APPENDIX A: CONFIDENTIALITY BREACH FEES\\n\\nThis Appendix is attached to and made part of the Contract (\\\"Original Contract\\\") dated [Original Contract Date], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n" + }, + { + "comment": "This code defines the fees and penalties for breaching confidentiality in the Original Contract, including immediate penalty amounts, reimbursement for direct financial losses, and coverage of legal fees and costs. Payment terms are outlined as well, with disputes to be resolved according to the dispute resolution stipulations in the contract.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":49-49", + "content": "\\n1. Purpose:\\nThis Appendix defines the fees and penalties associated with any breach of confidentiality as stipulated in the Original Contract.\\n\\n2. Confidentiality Breach Fee:\\nIn the event of a breach of the confidentiality provisions in the Original Contract by either party:\\n\\na) The breaching party will be liable for an immediate penalty of [specific amount, e.g., \\\"$10,000\\\"].\\n\\nb) If the breach results in any direct financial loss to the non-breaching party, the breaching party shall additionally reimburse the non-breaching party for the full amount of such loss.\\n\\nc) The breaching party will also bear all costs, including legal fees, that the non-breaching party incurs while addressing or remedying the breach.\\n\\n3. Payment Terms:\\nPayment of any penalty or reimbursement as defined above shall be made within [e.g., \\\"30 days\\\"] of written notification of the breach.\\n\\n4. Disputes:\\nAny disputes related to this Appendix shall be resolved as stipulated in the dispute resolu" + }, + { + "comment": "This code represents a legal document, specifically an appendix to a contract, which outlines strict confidentiality breach penalties. The appendix is attached to the Contract dated [Original Contract Date] between [Party A Name] and [Party B Name]. It states the immediate penalty upon breaching confidentiality, potential additional fees for reputational harm or business loss, full reimbursement for direct financial losses caused by the breach, all associated legal and remedy costs to be borne by the breaching party, and that the breaching party must swiftly rectify the breach. Payment is due within a certain period.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":49-50", + "content": "tion clause of the Original Contract.\\n\\n5. Continuation of Original Contract:\\nExcept as modified by this Appendix, the Original Contract remains in full force and effect.\\n\\n6. Governing Law:\\nThis Appendix, consistent with the Original Contract, is governed by the laws of [State/Country, e.g., \\\"California\\\"].\",\n \"APPENDIX A: STRICT CONFIDENTIALITY BREACH PENALTIES\\n\\nThis Appendix is annexed to the Contract dated [Original Contract Date] between [Party A Name] and [Party B Name].\\n\\n1. Breach Fees:\\nIf a party breaches confidentiality:\\n\\na) Immediate penalty: [e.g., \\\"$50,000\\\"].\\n\\nb) For reputational harm or business loss: Additional [e.g., \\\"$100,000\\\"].\\n\\nc) Full reimbursement for direct financial losses caused by the breach.\\n\\nd) All associated legal and remedy costs borne by the breaching party.\\n\\n2. Remedial Actions:\\nThe breaching party must swiftly rectify the breach, potentially including public apologies or recalling disclosed information.\\n\\n3. Payment:\\nDue withi" + }, + { + "comment": "Code snippet outlines contractual terms for a breach notification, termination clause, and governing law.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/pure_documents.json\":50-51", + "content": "n [e.g., \\\"15 days\\\"] of breach notification.\\n\\n4. Termination:\\nNon-breaching party can immediately terminate the main contract upon a breach.\\n\\n5. Governing Law:\\nThis Appendix adheres to [State/Country, e.g., \\\"California\\\"] laws.\"\n]" + } + ] +} \ No newline at end of file diff --git a/docs/doc/211f5959-9ce5-4512-bcce-0c8afe4961a9.json b/docs/doc/211f5959-9ce5-4512-bcce-0c8afe4961a9.json new file mode 100644 index 0000000..4bacf58 --- /dev/null +++ b/docs/doc/211f5959-9ce5-4512-bcce-0c8afe4961a9.json @@ -0,0 +1,60 @@ +{ + "summary": "Both scripts utilize Python to process data from JSON files, generate boxplots, and customize visualizations with various settings such as titles, colors, y-axis limits, cost thresholds, and display options.", + "details": [ + { + "comment": "This Python script reads data from a directory of JSON files, organizes it by folders, and stores the results in a dictionary. It uses the matplotlib library for plotting, but the code provided focuses on reading and organizing data, not plotting itself. The script is likely part of a larger program that utilizes this data for further analysis or visualization.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":0-28", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Robert Gerstenberger\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():" + }, + { + "comment": "This code snippet sorts the results and then calculates final scores for different methods based on metrics like score, solution status, prompt and completion tokens, and cost.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":29-57", + "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]" + }, + { + "comment": "This function calculates the final scores for each method in the results_complete dictionary, considering factors like cost, prompt and completion tokens, and operation scores. It sorts the scores in ascending order before returning them.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":58-86", + "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_final_scores_doc_merge(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 0\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in reversed(result[\"data\"]):\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n if \"operation\" in op and op[\"operation\"] == \"score\":\n try:\n score = max(op[\"scores\"])\n break\n except:\n continue\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]\n )" + }, + { + "comment": "The code defines a function `get_plotting_data` that takes a base directory and a score method as input, returns plotting data for different methods by extracting scores, solved counts, and costs from the complete results. Another function, `plot_results`, is defined which takes various parameters such as name, results, methods order, etc., and plots the results based on the specified parameters. The code also includes checks to ensure that only valid methods are considered for plotting.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":87-122", + "content": " scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory, score_method):\n results_complete = get_complete_results(base_directory)\n scores = score_method(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n name,\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog\"],\n methods_labels=[\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT\"],\n model=\"GPT-3.5\",\n length=32,\n y_lower=0,\n y_upper=16,\n cost_upper=1.8,\n display_solved=True,\n annotation_offset=1,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n if name == \"set_intersection\":" + }, + { + "comment": "The code is filtering scores and costs based on specific conditions for different tasks (e.g., sorting, keyword counting, document merging) and creating a figure with axes. For each task, it generates a list of filtered scores and total costs.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":123-152", + "content": " scores_ordered = [\n [min(score, length) for score in results[method][\"scores\"] if score != 1000]\n for method in methods_order\n ]\n elif name == \"sorting\":\n scores_ordered = [\n [\n min(score, length)\n for score in results[method][\"scores\"]\n if score != 100 and score != 300\n ]\n for method in methods_order\n ]\n elif name == \"keyword_counting\":\n scores_ordered = [\n [\n score\n for score in results[method][\"scores\"]\n if score != 100 and score != 300\n ]\n for method in methods_order\n ]\n elif name == \"document_merging\":\n scores_ordered = [\n [score for score in results[method][\"scores\"]] for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n if name == \"keyword_counting\" or name == \"document_merging\":" + }, + { + "comment": "Creates boxplots for sorted scores based on methods order, sets x-tick labels and y-limits according to the current name (method), adjusts y-ticks depending on length and displays left ylabel if needed.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":153-183", + "content": " fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5))\n else:\n fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n plt.yticks(fontsize=fig_fontsize)\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n if name == \"keyword_counting\":\n ax.set_xticklabels(methods_labels, fontsize=10)\n else:\n ax.set_xticklabels(methods_labels, fontsize=fig_fontsize)\n if name == \"document_merging\":\n ax.set_ylim(y_lower, 12 if display_solved else 9.75)\n else:\n ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1)\n if name == \"sorting\" or name == \"set_intersection\":\n ax1_yticks = range(\n y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)\n )\n ax.set_yticks(ax1_yticks)\n if display_left_ylabel:" + }, + { + "comment": "If \"keyword_counting\", set ylabel as \"Number of errors; the lower the better\". If \"document_merging\", set ylabel as \"Score (out of 10); the higher the better\". Otherwise, set ylabel as \"#incorrect elements; the lower the better\". If \"sorting\" or \"set_intersection\", set title as length of elements. Add a blue bar chart for total cost using ax2. Set tick colors and ylim on ax2. Customize ytick positions for ax2 using provided interval.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":184-211", + "content": " if name == \"keyword_counting\":\n ax.set_ylabel(\n f\"Number of errors; the lower the better\", fontsize=fig_fontsize\n )\n elif name == \"document_merging\":\n ax.set_ylabel(\n f\"Score (out of 10); the higher the better\", fontsize=fig_fontsize\n )\n else:\n ax.set_ylabel(\n f\"#incorrect elements; the lower the better\", fontsize=fig_fontsize\n )\n if name == \"sorting\" or name == \"set_intersection\":\n ax.set_title(f\"{length} elements\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:" + }, + { + "comment": "Setting the y-label for a plot, displaying the number of solved methods, and saving the figure with appropriate filename based on the method name and model.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":212-245", + "content": " ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]\n ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n if name == \"keyword_counting\" or name == \"document_merging\":\n fig.savefig(f\"{name}_{model}.pdf\", bbox_inches=\"tight\")\n else:\n fig.savefig(f\"{name}_{model}_{length}.pdf\", bbox_inches=\"tight\")\nplot_results(\n \"set_intersection\",\n get_plotting_data(\"set_intersection_gpt35_032\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog2\"]," + }, + { + "comment": "The code snippet is defining functions and parameters for plotting data from various models. It uses the 'plot_results' function with different arguments to visualize the results of operations such as set intersection and sorting. The plots have various settings like length, upper limit, cost, display options, etc. to customize the visual representation of the data.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":246-288", + "content": " length=32,\n y_upper=19,\n cost_upper=2,\n display_solved=True,\n annotation_offset=0.5,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"set_intersection\",\n get_plotting_data(\"set_intersection_gpt35_064\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog2\"],\n length=64,\n y_upper=32,\n cost_upper=5.4,\n display_solved=True,\n annotation_offset=0.2,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"set_intersection\",\n get_plotting_data(\"set_intersection_gpt35_128\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog2\"],\n length=128,\n y_upper=94,\n cost_upper=12,\n display_solved=True,\n annotation_offset=-3,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_032\", get_final_scores),\n length=32,\n display_solved=False,\n annotation_offset=0.5,\n display_left_ylabel=True,\n display_right_ylabel=True," + }, + { + "comment": "Code snippet contains multiple function calls to plot results using different sets of data and parameters. It plots data for tasks \"sorting\" and \"keyword_counting\", and \"document_merging\". Each call specifies the task, data, methods order, labels, limits, display options, and other settings.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":289-329", + "content": ")\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_064\", get_final_scores),\n length=64,\n y_upper=64,\n cost_upper=5.1,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_128\", get_final_scores),\n length=128,\n y_upper=128,\n cost_upper=17,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"keyword_counting\",\n get_plotting_data(\"keyword_counting_gpt35\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"gsp4\", \"gsp8\", \"gspx\"],\n methods_labels=[\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT4\", \"GoT8\", \"GoTx\"],\n y_upper=35,\n cost_upper=9,\n display_solved=True,\n annotation_offset=-0.3,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"document_merging\",\n get_plotting_data(\"document_merging_gpt35_16k\", get_final_scores_doc_merge),\n methods_order=[\"io\", \"cot\", \"tot\", \"gsp\", \"gsp2\"]," + }, + { + "comment": "Parameters for plotting methods labels, upper limit of y-axis, cost threshold, and display options.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/plots.py\":330-336", + "content": " methods_labels=[\"IO\", \"CoT\", \"ToT\", \"GoT\", \"GoT2\"],\n y_upper=10,\n cost_upper=15,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/33c839ec-9562-4993-8007-5179237aa1f5.json b/docs/doc/33c839ec-9562-4993-8007-5179237aa1f5.json new file mode 100644 index 0000000..56d6dbb --- /dev/null +++ b/docs/doc/33c839ec-9562-4993-8007-5179237aa1f5.json @@ -0,0 +1,50 @@ +{ + "summary": "The Language Models module supports GPT-4/GPT-3.5 and Llama-2, with functionality for instantiating LLMs, adding new ones, and using OpenAI API features like pricing and response_token_cost. It is implemented in a base class for building language models that allows for querying and retrieving response texts.", + "details": [ + { + "comment": "This code introduces the Language Models module and explains its purpose. It currently supports GPT-4/GPT-3.5 (Remote - OpenAI API) and Llama-2 (Local - HuggingFace Transformers). The following sections describe how to instantiate individual LLMs and add new ones to the framework. The LLM instantiation process involves creating a copy of `config_template.json`, filling in configuration details based on the used model, and adjusting predefined configurations or creating a new one with an unique key for GPT-4/GPT-3.5.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/README.md\":0-17", + "content": "# Language Models\nThe Language Models module is responsible for managing the large language models (LLMs) used by the Controller.\nCurrently, the framework supports the following LLMs:\n- GPT-4 / GPT-3.5 (Remote - OpenAI API)\n- Llama-2 (Local - HuggingFace Transformers) \nThe following sections describe how to instantiate individual LLMs and how to add new LLMs to the framework.\n## LLM Instantiation\n- Create a copy of `config_template.json` named `config.json`.\n- Fill configuration details based on the used model (below).\n### GPT-4 / GPT-3.5\n- Adjust predefined `chatgpt`, `chatgpt4` or create new configuration with an unique key.\n| Key | Value |" + }, + { + "comment": "This table maps model IDs to their respective OpenAI names and calculates prompt token costs based on OpenAI pricing, which is used for determining cumulative prices per language modeling (LLM) instance.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/README.md\":18-20", + "content": "|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| model_id | Model name based on [OpenAI model overview](https://platform.openai.com/docs/models/overview). |\n| prompt_token_cost | Price per 1000 prompt tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance. " + }, + { + "comment": "The code defines 'response_token_cost', a variable representing the price per 1000 response tokens, which follows OpenAI's pricing. It also includes 'temperature', a parameter controlling randomness and creativity in responses. The value is between 0.0 and 2.0, defaulting to 1.0, with further details available in the OpenAI API reference. Lastly, 'max_tokens' sets the maximum number of tokens generated in chat completions.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/README.md\":20-23", + "content": " |\n| response_token_cost | Price per 1000 response tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance. |\n| temperature | Parameter of OpenAI models that controls randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 2.0, default is 1.0. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature). |\n| max_tokens | The maximum number of tokens to generate in the chat completion. Value " + }, + { + "comment": "This code defines three input parameters for the OpenAI API's chat creation endpoint: \"model\", \"stop\", and \"organization\". The model parameter specifies the language model to use, with its maximum context size determined by the OpenAI model overview. The stop parameter identifies a sequence of characters that halt further token generation, referencing the OpenAI API reference for more information. Lastly, organization is an optional field used for API requests, which can be left empty.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/README.md\":23-25", + "content": "depends on the maximum context size of the model specified in the [OpenAI model overview](https://platform.openai.com/docs/models/overview). More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-max_tokens). |\n| stop | String or array of strings specifying sequence of characters which if detected, stops further generation of tokens. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-stop). |\n| organization | Organization to use for the API requests (may be empty). |" + }, + { + "comment": "The code snippet is initializing a language model controller using the ChatGPT class. It takes in the path to a configuration file and a model name corresponding to the selected configuration key. The model can be predefined (llama7b-hf, llama13b-hf, llama70b-hf) or custom with a unique key.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/README.md\":26-41", + "content": "| api_key | Personal API key that will be used to access OpenAI API. |\n- Instantiate the language model based on the selected configuration key (predefined / custom).\n```\nlm = controller.ChatGPT(\n \"path/to/config.json\", \n model_name=\n)\n```\n### Llama-2\n- Requires local hardware to run inference and a HuggingFace account.\n- Adjust predefined `llama7b-hf`, `llama13b-hf`, `llama70b-hf` or create a new configuration with an unique key.\n| Key | Value |\n|---------------------|----------------" + }, + { + "comment": "This code block is defining the parameters for a language model, including the Llama 2 model identifier (`model_id`), the local directory where the model will be stored and accessed (`cache_dir`), the price per 1000 prompt tokens (`prompt_token_cost`), the price per 1000 response tokens (`response_token_cost`), and a parameter for temperature control. Note that currently, these costs are not used due to the local model being cost-free.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/README.md\":41-46", + "content": "-----------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| model_id | Specifies HuggingFace Llama 2 model identifier (`meta-llama/`). |\n| cache_dir | Local directory where model will be downloaded and accessed. |\n| prompt_token_cost | Price per 1000 prompt tokens (currently not used - local model = no cost). |\n| response_token_cost | Price per 1000 response tokens (currently not used - local model = no cost). |\n| temperature | Parameter " + }, + { + "comment": "The code initializes a language model (Llama2HF) with a specified configuration key, which determines the randomness and creativity of responses. It also sets top-K sampling method from Transformers tutorial and maximum tokens to generate in chat completion. Access to Llama-2 is requested via Meta form using the same email as HuggingFace account, then access HuggingFace Llama-2 model page.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/README.md\":46-58", + "content": "that controls randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 1.0, default is 0.6. |\n| top_k | Top-K sampling method described in [Transformers tutorial](https://huggingface.co/blog/how-to-generate). Default value is set to 10. |\n| max_tokens | The maximum number of tokens to generate in the chat completion. More tokens require more memory. |\n- Instantiate the language model based on the selected configuration key (predefined / custom).\n```\nlm = controller.Llama2HF(\n \"path/to/config.json\", \n model_name=\n)\n```\n- Request access to Llama-2 via the [Meta form](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) using the same email address as for the HuggingFace account.\n- After the access is granted, go to [HuggingFace Llama-2 model ca" + }, + { + "comment": "This code provides instructions for adding a new LLM (Language Language Model) to the existing model. To do so, create a subclass of `AbstractLanguageModel` and use the constructor to load configuration and instantiate the language model if needed. The model is downloaded from HuggingFace into the cache directory specified in the config.json. Running queries with larger models may require multiple GPUs, which will be automatically split by the Transformers library.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/README.md\":58-75", + "content": "rd](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), log in and accept the license (_\"You have been granted access to this model\"_ message should appear).\n- Generate HuggingFace access token.\n- Log in from CLI with: `huggingface-cli login --token `.\nNote: 4-bit quantization is used to reduce the model size for inference. During instantiation, the model is downloaded from HuggingFace into the cache directory specified in the `config.json`. Running queries using larger models will require multiple GPUs (splitting across many GPUs is done automatically by the Transformers library).\n## Adding LLMs\nMore LLMs can be added by following these steps:\n- Create new class as a subclass of `AbstractLanguageModel`.\n- Use the constructor for loading configuration and instantiating the language model (if needed). \n```\nclass CustomLanguageModel(AbstractLanguageModel):\n def __init__(\n self,\n config_path: str = \"\",\n model_name: str = \"llama7b-hf\",\n cache: bool = False\n ) -> None:" + }, + { + "comment": "The code is a part of a class that serves as a base for building language models. It loads configuration and initializes the model. The `query` method calls the LLM to get responses based on a query, while `get_response_texts` retrieves raw texts from the response structure produced by `query`. These methods are abstract and need to be implemented in child classes.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/README.md\":76-94", + "content": " super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # Load data from configuration into variables if needed\n # Instantiate LLM if needed\n```\n- Implement `query` abstract method that is used to get a list of responses from the LLM (call to remote API or local model inference).\n```\ndef query(self, query: str, num_responses: int = 1) -> Any:\n # Support caching \n # Call LLM and retrieve list of responses - based on num_responses \n # Return LLM response structure (not only raw strings) \n```\n- Implement `get_response_texts` abstract method that is used to get a list of raw texts from the LLM response structure produced by `query`.\n```\ndef get_response_texts(self, query_response: Union[List[Dict], Dict]) -> List[str]:\n # Retrieve list of raw strings from the LLM response structure \n```" + } + ] +} \ No newline at end of file diff --git a/docs/doc/3a316a1b-8eab-437e-a0af-d0bb26acbfbb.json b/docs/doc/3a316a1b-8eab-437e-a0af-d0bb26acbfbb.json new file mode 100644 index 0000000..e494ba3 --- /dev/null +++ b/docs/doc/3a316a1b-8eab-437e-a0af-d0bb26acbfbb.json @@ -0,0 +1,20 @@ +{ + "summary": "The Graph of Operations class manages operation execution plans, initializing with empty lists and providing a method to append operations. It iterates through predecessors, removing leaves and appending operations without successors.", + "details": [ + { + "comment": "This code represents the Graph of Operations class, which is responsible for managing the execution plan of thought operations. It initializes with empty lists for operations, roots, and leaves, and provides a method to append an operation to all leaves in the graph while updating relationships.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/graph_of_operations.py\":0-31", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom __future__ import annotations\nfrom typing import List\nfrom graph_of_thoughts.operations.operations import Operation\nclass GraphOfOperations:\n \"\"\"\n Represents the Graph of Operations, which prescribes the execution plan of thought operations.\n \"\"\"\n def __init__(self) -> None:\n \"\"\"\n Initializes a new Graph of Operations instance with empty operations, roots, and leaves.\n The roots are the entry points in the graph with no predecessors.\n The leaves are the exit points in the graph with no successors.\n \"\"\"\n self.operations: List[Operation] = []\n self.roots: List[Operation] = []\n self.leaves: List[Operation] = []\n def append_operation(self, operation: Operation) -> None:\n \"\"\"\n Appends an operation to all leaves in the graph and updates the relationships." + }, + { + "comment": "This code appends an operation to the graph and adjusts roots and leaves accordingly. If there are no roots, it sets the added operation as both root and leaf with no predecessors. If the added operation has no predecessors, it adds it as a new root.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/graph_of_operations.py\":33-63", + "content": " :param operation: The operation to append.\n :type operation: Operation\n \"\"\"\n self.operations.append(operation)\n if len(self.roots) == 0:\n self.roots = [operation]\n else:\n for leave in self.leaves:\n leave.add_successor(operation)\n self.leaves = [operation]\n def add_operation(self, operation: Operation) -> None:\n \"\"\"\n Add an operation to the graph considering its predecessors and successors.\n Adjust roots and leaves based on the added operation's position within the graph.\n :param operation: The operation to add.\n :type operation: Operation\n \"\"\"\n self.operations.append(operation)\n if len(self.roots) == 0:\n self.roots = [operation]\n self.leaves = [operation]\n assert (\n len(operation.predecessors) == 0\n ), \"First operation should have no predecessors\"\n else:\n if len(operation.predecessors) == 0:\n self.roots.append(operation)" + }, + { + "comment": "Iterates through predecessors of an operation, removes leaves if they are also operation's predecessors, appends the operation to the leaves list if it has no successors.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/graph_of_operations.py\":64-68", + "content": " for predecessor in operation.predecessors:\n if predecessor in self.leaves:\n self.leaves.remove(predecessor)\n if len(operation.successors) == 0:\n self.leaves.append(operation)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/3b0530b9-09df-4bf9-92e1-a25262e77d4c.json b/docs/doc/3b0530b9-09df-4bf9-92e1-a25262e77d4c.json new file mode 100644 index 0000000..8d8d90a --- /dev/null +++ b/docs/doc/3b0530b9-09df-4bf9-92e1-a25262e77d4c.json @@ -0,0 +1,20 @@ +{ + "summary": "The code snippet provides license terms for redistributing \"graph-of-thoughts\" project, mentions authors of the research paper \"Graph of Thoughts,\" and gives its arXiv preprint identifier.", + "details": [ + { + "comment": "This code snippet is a license agreement for the \"graph-of-thoughts\" project. It outlines the terms and conditions for redistribution of source and binary forms, ensuring proper copyright notices and prohibiting unauthorized endorsements or promotions.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/LICENSE\":0-22", + "content": "Copyright (c) 2023 ETH Zurich.\n All rights reserved.\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are\nmet:\n- Redistributions of source code must retain the above copyright\n notice, this list of conditions and the following disclaimer.\n- Redistributions in binary form must reproduce the above copyright\n notice, this list of conditions and the following disclaimer listed\n in this license in the documentation and/or other materials\n provided with the distribution.\n- Neither the name of the copyright holders nor the names of its\n contributors may be used to endorse or promote products derived from\n this software without specific prior written permission.\nThe copyright holders provide no reassurances that the source code\nprovided does not infringe any patent, copyright, or any other\nintellectual property rights of third parties. The copyright holders\ndisclaim any liability to any recipient for claims brought against" + }, + { + "comment": "This code snippet contains the license terms for a software, specifying that it is provided \"as is\" without warranties and disclaiming liability for any damages arising from its use. Additionally, the code includes instructions to cite the software in any published work that uses it.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/LICENSE\":23-45", + "content": "recipient by any third party for infringement of that parties\nintellectual property rights.\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\nLIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\nA PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\nOWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\nSPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\nLIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\nTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\nCitation\n========\nAny published work which uses this software should include the\nfollowing citation:\n----------------------------------------------------------------------" + }, + { + "comment": "This code snippet lists the authors of a research paper titled \"Graph of Thoughts: Solving Elaborate Problems with Large Language Models\" and provides its arXiv preprint identifier, which is arXiv:2308.09687.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/LICENSE\":46-51", + "content": "Maciej Besta, Nils Blach, Ales Kubicek, Robert Gerstenberger, Lukas\nGianinazzi, Joanna Gajda, Tomasz Lehmann, Micha\u0142 Podstawski, Hubert\nNiewiadomski, Piotr Nyczyk, Torsten Hoefler: Graph of Thoughts: Solving\nElaborate Problems with Large Language Models. In: arXiv preprint\narXiv:2308.09687\n----------------------------------------------------------------------" + } + ] +} \ No newline at end of file diff --git a/docs/doc/3e9ea6a0-67fc-4b96-a38e-eb83e5cb25bd.json b/docs/doc/3e9ea6a0-67fc-4b96-a38e-eb83e5cb25bd.json new file mode 100644 index 0000000..ed9f7b0 --- /dev/null +++ b/docs/doc/3e9ea6a0-67fc-4b96-a38e-eb83e5cb25bd.json @@ -0,0 +1,15 @@ +{ + "summary": "The code showcases a document merging approach using various methods like IO, CoT, ToT, and GoT. It takes 50 sample documents from `documents.csv`, applies chosen techniques, and outputs results in an LLM-named directory with debug logs and separate JSON files for each approach.", + "details": [ + { + "comment": "This code demonstrates a document merging use case using different approaches, including IO, Chain-of-Thought (CoT), Tree of Thought (ToT), and Graph of Thoughts (GoT). It uses 50 sample documents from `documents.csv`, executes the selected samples with chosen approaches, and saves results in a directory named by the LLM, approaches, day, and start time.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/README.md\":0-27", + "content": "# Document Merging\nThe use case in this directory generates new Non-Disclosure Agreement (NDA) based on several input ones that partially overlap in terms of their contents. \nWe provide implementations of five different approaches:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT)\n- Graph of Thoughts (GoT):\n - GoT: aggregation of fully merged NDAs\n - GoT2: aggregation of partially merged NDAs\n## Data\nWe provide an input file with 50 samples: `documents.csv`.\n## Execution\nThe file to execute the use case is called\n`doc_merge.py`. In the main body, one can\nselect the specific samples to be run (variable samples) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are" + }, + { + "comment": "This code generates a configuration file named `config.json` that contains input data, selected approaches, LLM name, and budget information. Additionally, it logs prompts, responses, and debug data in `log.log`. Each approach directory holds separate JSON files for every sample with the Graph Reasoning State (GRS) included. To plot the data, change the results directory at line 158 of `plot.py` and run `python3 plot.py`.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/README.md\":28-37", + "content": "created. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 158 of `plot.py` and run `python3\nplot.py` to plot your data." + } + ] +} \ No newline at end of file diff --git a/docs/doc/42b586af-f715-430a-8eaa-489513178ae3.json b/docs/doc/42b586af-f715-430a-8eaa-489513178ae3.json new file mode 100644 index 0000000..09d9fe8 --- /dev/null +++ b/docs/doc/42b586af-f715-430a-8eaa-489513178ae3.json @@ -0,0 +1,10 @@ +{ + "summary": "This code imports the necessary classes (AbstractLanguageModel, ChatGPT, and Llama2HF) from their respective submodules in the language_models package.", + "details": [ + { + "comment": "This code imports the necessary classes (AbstractLanguageModel, ChatGPT, and Llama2HF) from their respective submodules in the language_models package.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/__init__.py\":0-2", + "content": "from .abstract_language_model import AbstractLanguageModel\nfrom .chatgpt import ChatGPT\nfrom .llamachat_hf import Llama2HF" + } + ] +} \ No newline at end of file diff --git a/docs/doc/442814a1-6938-498f-a984-d0c2c323e6a4.json b/docs/doc/442814a1-6938-498f-a984-d0c2c323e6a4.json new file mode 100644 index 0000000..f718fc6 --- /dev/null +++ b/docs/doc/442814a1-6938-498f-a984-d0c2c323e6a4.json @@ -0,0 +1,40 @@ +{ + "summary": "The code creates a ChatGPT class that inherits from AbstractLanguageModel, initializes with configuration and model details, sets query parameters, supports multiple responses, uses OpenAI's chat API, incorporates backoff and caching for optimization, logs response texts and costs, and utilizes `get_response_texts` to extract response strings.", + "details": [ + { + "comment": "This code is the initialization of a class called ChatGPT. It inherits from AbstractLanguageModel and initializes with configuration, model details, and caching options. The config_path parameter is for the path to a configuration file and defaults to an empty string. The model_name parameter specifies the model to be used, defaulting to \"chatgpt\", and cache can be set to True or False for enabling or disabling caching respectively.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/chatgpt.py\":0-34", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport backoff\nimport os\nimport random\nimport time\nfrom typing import List, Dict, Union\nfrom openai import OpenAI, OpenAIError\nfrom openai.types.chat.chat_completion import ChatCompletion\nfrom .abstract_language_model import AbstractLanguageModel\nclass ChatGPT(AbstractLanguageModel):\n \"\"\"\n The ChatGPT class handles interactions with the OpenAI models using the provided configuration.\n Inherits from the AbstractLanguageModel and implements its abstract methods.\n \"\"\"\n def __init__(\n self, config_path: str = \"\", model_name: str = \"chatgpt\", cache: bool = False\n ) -> None:\n \"\"\"\n Initialize the ChatGPT instance with configuration, model details, and caching options.\n :param config_path: Path to the configuration file. Defaults to \"\".\n :type config_path: str\n " + }, + { + "comment": "The code initializes a model with a specified name and sets the cache flag. It retrieves the model ID, prompt token cost, response token cost, temperature, and maximum number of tokens for chat completion from the configuration file.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/chatgpt.py\":34-48", + "content": " :param model_name: Name of the model, default is 'chatgpt'. Used to select the correct configuration.\n :type model_name: str\n :param cache: Flag to determine whether to cache responses. Defaults to False.\n :type cache: bool\n \"\"\"\n super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # The model_id is the id of the model that is used for chatgpt, i.e. gpt-4, gpt-3.5-turbo, etc.\n self.model_id: str = self.config[\"model_id\"]\n # The prompt_token_cost and response_token_cost are the costs for 1000 prompt tokens and 1000 response tokens respectively.\n self.prompt_token_cost: float = self.config[\"prompt_token_cost\"]\n self.response_token_cost: float = self.config[\"response_token_cost\"]\n # The temperature of a model is defined as the randomness of the model's output.\n self.temperature: float = self.config[\"temperature\"]\n # The maximum number of tokens to generate in the chat completion." + }, + { + "comment": "This code initializes an instance of a language model and sets parameters such as maximum tokens, stop sequence, organization, API key, and initializes the OpenAI client. It also includes a query method to ask the language model for responses.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/chatgpt.py\":49-68", + "content": " self.max_tokens: int = self.config[\"max_tokens\"]\n # The stop sequence is a sequence of tokens that the model will stop generating at (it will not generate the stop sequence).\n self.stop: Union[str, List[str]] = self.config[\"stop\"]\n # The account organization is the organization that is used for chatgpt.\n self.organization: str = self.config[\"organization\"]\n if self.organization == \"\":\n self.logger.warning(\"OPENAI_ORGANIZATION is not set\")\n self.api_key: str = os.getenv(\"OPENAI_API_KEY\", self.config[\"api_key\"])\n if self.api_key == \"\":\n raise ValueError(\"OPENAI_API_KEY is not set\")\n # Initialize the OpenAI Client\n self.client = OpenAI(api_key=self.api_key, organization=self.organization)\n def query(\n self, query: str, num_responses: int = 1\n ) -> Union[List[ChatCompletion], ChatCompletion]:\n \"\"\"\n Query the OpenAI model for responses.\n :param query: The query to be posed to the language model." + }, + { + "comment": "The code defines a function that takes a query and the number of desired responses. If the query is in the cache, it returns the corresponding response(s). If not, it calls the OpenAI chat model to generate responses for the given query. It supports generating multiple responses by repeatedly calling the OpenAI model until the required number of responses are obtained or an exception occurs. The function also logs any warnings during the process.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/chatgpt.py\":69-93", + "content": " :type query: str\n :param num_responses: Number of desired responses, default is 1.\n :type num_responses: int\n :return: Response(s) from the OpenAI model.\n :rtype: Dict\n \"\"\"\n if self.cache and query in self.respone_cache:\n return self.respone_cache[query]\n if num_responses == 1:\n response = self.chat([{\"role\": \"user\", \"content\": query}], num_responses)\n else:\n response = []\n next_try = num_responses\n total_num_attempts = num_responses\n while num_responses > 0 and total_num_attempts > 0:\n try:\n assert next_try > 0\n res = self.chat([{\"role\": \"user\", \"content\": query}], next_try)\n response.append(res)\n num_responses -= next_try\n next_try = min(num_responses, next_try)\n except Exception as e:\n next_try = (next_try + 1) // 2\n self.logger.warning(" + }, + { + "comment": "This code is defining a class with a chat method that sends messages to the OpenAI model and retrieves the response. The method implements backoff on OpenAI error, allowing for multiple attempts if an error occurs. It also includes caching functionality to improve performance by storing previous responses in a cache.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/chatgpt.py\":94-118", + "content": " f\"Error in chatgpt: {e}, trying again with {next_try} samples\"\n )\n time.sleep(random.randint(1, 3))\n total_num_attempts -= 1\n if self.cache:\n self.respone_cache[query] = response\n return response\n @backoff.on_exception(backoff.expo, OpenAIError, max_time=10, max_tries=6)\n def chat(self, messages: List[Dict], num_responses: int = 1) -> ChatCompletion:\n \"\"\"\n Send chat messages to the OpenAI model and retrieves the model's response.\n Implements backoff on OpenAI error.\n :param messages: A list of message dictionaries for the chat.\n :type messages: List[Dict]\n :param num_responses: Number of desired responses, default is 1.\n :type num_responses: int\n :return: The OpenAI model's response.\n :rtype: ChatCompletion\n \"\"\"\n response = self.client.chat.completions.create(\n model=self.model_id,\n messages=messages," + }, + { + "comment": "This code interacts with an OpenAI model, specifically the ChatGPT API. It takes a query as input and generates multiple responses using the API. The code keeps track of usage costs in terms of prompt and completion tokens, and logs the response text along with the cost for each generated response. The `get_response_texts` method extracts the response texts from the query response dictionary or list of dictionaries returned by the OpenAI model.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/chatgpt.py\":119-145", + "content": " temperature=self.temperature,\n max_tokens=self.max_tokens,\n n=num_responses,\n stop=self.stop,\n )\n self.prompt_tokens += response.usage.prompt_tokens\n self.completion_tokens += response.usage.completion_tokens\n prompt_tokens_k = float(self.prompt_tokens) / 1000.0\n completion_tokens_k = float(self.completion_tokens) / 1000.0\n self.cost = (\n self.prompt_token_cost * prompt_tokens_k\n + self.response_token_cost * completion_tokens_k\n )\n self.logger.info(\n f\"This is the response from chatgpt: {response}\"\n f\"\\nThis is the cost of the response: {self.cost}\"\n )\n return response\n def get_response_texts(\n self, query_response: Union[List[ChatCompletion], ChatCompletion]\n ) -> List[str]:\n \"\"\"\n Extract the response texts from the query response.\n :param query_response: The response dictionary (or list of dictionaries) from the OpenAI model." + }, + { + "comment": "This function converts a single ChatCompletion or list of them into a list of response strings by iterating over the choices within each completion and extracting their content.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/chatgpt.py\":146-156", + "content": " :type query_response: Union[List[ChatCompletion], ChatCompletion]\n :return: List of response strings.\n :rtype: List[str]\n \"\"\"\n if not isinstance(query_response, List):\n query_response = [query_response]\n return [\n choice.message.content\n for response in query_response\n for choice in response.choices\n ]" + } + ] +} \ No newline at end of file diff --git a/docs/doc/4ad0e7eb-9359-46ee-91a5-5765b4dbec15.json b/docs/doc/4ad0e7eb-9359-46ee-91a5-5765b4dbec15.json new file mode 100644 index 0000000..b308b28 --- /dev/null +++ b/docs/doc/4ad0e7eb-9359-46ee-91a5-5765b4dbec15.json @@ -0,0 +1,30 @@ +{ + "summary": "The Operations module manages thought manipulation with language models and helper classes, including 'ValidateAndImprove' and 'Generate' operations, as well as three additional operations: **KeepValid**, **Selector**, and **GroundTruth** for thought processing systems.", + "details": [ + { + "comment": "This code snippet describes the Operations module, which contains operations for manipulating and processing thoughts represented by the Thought class. It uses a language model and helper classes like Prompter and Parser for communication and result extraction. The Graph of Operations is the main class that orchestrates operations and maintains thought graph state.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/README.md\":0-13", + "content": "# Operations\nThe Operations module contains operations to manipulate and process thoughts represented by the [Thought](thought.py) class. \nOperations interface with a language model and use other helper classes like [Prompter](../prompter/prompter.py) and [Parser](../parser/parser.py) for effective communication and extraction of results from the language model. \nThe [Graph of Operations](graph_of_operations.py) class is the main class of the module and is responsible for orchestrating the operations, defining their relationships and maintaining the state of the thought graph, also known as Graph Reasoning State.\n## Graph of Operations\nThe [GraphOfOperations](graph_of_operations.py) class facilitates the creation and management of a directed graph representing the sequence and interrelationships of operations on thoughts. Here\u2019s how you can construct and work with the Graph of Operations:\n### Initialization\nCreating a new instance of GraphOfOperations:\n```python\nfrom graph_of_thoughts.operations import GraphOfOperations" + }, + { + "comment": "The code initializes a GraphOfOperations object, which starts empty and can be used to add operations with relationships. Operations can be appended at the end or added while specifying their predecessors. Available operations include the Score operation for scoring thoughts using LLM or custom scoring functions.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/README.md\":15-39", + "content": "graph = GraphOfOperations()\n```\nUpon initialization, the graph will be empty with no operations, roots, or leaves.\n### Adding Operations\n**Append Operation:** You can append operations to the end of the graph using the append_operation method. This ensures that the operation becomes a successor to all current leaf operations in the graph.\n```python\nfrom graph_of_thoughts.operations import Generate\noperationA = Generate()\ngraph.append_operation(operationA)\n```\n**Add Operation with Relationships:** If you want to define specific relationships for an operation, use the add_operation method.\n```python\noperationB = Generate()\noperationB.predecessors.append(operationA)\ngraph.add_operation(operationB)\n```\nRemember to set up the predecessors (and optionally successors) for your operation before adding it to the graph.\n## Available Operations\nThe following operations are available in the module:\n**Score:** Collect all thoughts from preceding operations and score them either using the LLM or a custom scoring function." + }, + { + "comment": "This code describes several operations for a thought processing system. The 'ValidateAndImprove' operation validates each thought and attempts to improve it if invalid, while the 'Generate' operation generates new thoughts based on previous ones or initial input to the Controller. Optional parameters include number of samples, scoring function, validation function, and whether to improve or generate new thoughts.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/README.md\":40-50", + "content": "- num_samples (Optional): The number of samples to use for scoring, defaults to 1.\n- combined_scoring (Optional): Whether to score all thoughts together in a single prompt or separately, defaults to False.\n- scoring_function (Optional): A function that takes in a list of thought states and returns a list of scores for each thought.\n**ValidateAndImprove:** For each thought, validate it and if it is invalid, improve it. \n- num_samples (Optional): The number of samples to use for validation, defaults to 1.\n- improve (Optional): Whether to improve the thought if it is invalid, defaults to True.\n- num_tries (Optional): The number of times to try improving the thought, before giving up, defaults to 3.\n- validate_function (Optional): A function that takes in a thought state and returns a boolean indicating whether the thought is valid.\n**Generate:** Generate new thoughts from the current thoughts. If no previous thoughts are available, the thoughts are initialized with the input to the [Controller](../controller/controller.py). " + }, + { + "comment": "This code snippet provides details about the available operations and their respective parameters for generating, aggregating, or filtering thoughts. It allows users to generate multiple responses, combine them into a single thought, or keep the best N thoughts based on scores. The code also includes default values for optional parameters to ease usage.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/README.md\":51-61", + "content": "- num_branches_prompt (Optional): Number of responses that each prompt should generate (passed to prompter). Defaults to 1.\n- num_branches_response (Optional): Number of responses the LLM should generate for each prompt. Defaults to 1.\n**Improve:** Improve the current thoughts. This operation is similar to the ValidateAndImprove operation, but it does not validate the thoughts and always tries to improve them. \n**Aggregate:** Aggregate the current thoughts into a single thought. This operation is useful when you want to combine multiple thoughts into a single thought. \n- num_responses (Optional): Number of responses to request from the LLM (generates multiple new thoughts). Defaults to 1.\n**KeepBestN:** Keep the best N thoughts from the preceding thoughts. Assumes that the thoughts are already scored and throws an error if they are not.\n- n: The number of thoughts to keep in order of score.\n- higher_is_better (Optional): Whether higher scores are better (True) or lower scores are better (False). Defaults to True." + }, + { + "comment": "This code defines three operations: **KeepValid** retains valid thoughts, **Selector** selects a subset of thoughts using a selection function, and **GroundTruth** checks if the preceding/current thoughts solve the problem (requires known ground truth).", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/README.md\":63-69", + "content": "**KeepValid:** Keep only the valid thoughts from the preceding thoughts. Assumes that each thought has already been validated, if not, it will be considered valid.\n**Selector:** Select a number of thoughts from the preceding thoughts using a selection function. This is useful if subsequent operations should only be applied to a subset of the preceding thoughts.\n- selector: A function that takes in a list of thoughts and returns a list of thoughts to select.\n**GroundTruth**: Evaluates if the preceding/current thoughts solve the problem and equal the ground truth. This operation is useful for terminating the graph and checking if the final thoughts solve the problem, but is only useful if the ground truth is known.\n- ground_truth_evaluator: A function that takes in a thought state and returns a boolean indicating whether the thought solves the problem." + } + ] +} \ No newline at end of file diff --git a/docs/doc/4f89764f-01c4-48c9-aaae-47f193ac7c6c.json b/docs/doc/4f89764f-01c4-48c9-aaae-47f193ac7c6c.json new file mode 100644 index 0000000..a41a520 --- /dev/null +++ b/docs/doc/4f89764f-01c4-48c9-aaae-47f193ac7c6c.json @@ -0,0 +1,35 @@ +{ + "summary": "The code collects and processes results from various AI methods, storing them in dictionaries for analysis or visualization. It generates boxplots to display the final scores of different methods with customizable y-axis settings and font size. The code also sets labels, plots a bar graph, adds annotations, adjustments, and text, saves as PDF, replaces characters in model names, and calls another function.", + "details": [ + { + "comment": "This code retrieves complete results from a given base directory. It iterates through each folder in the directory, loads JSON files within each folder, and stores the key-value pairs as dictionaries within lists under each folder's name in a results dictionary. The code also checks if directories are not empty folders.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/plot.py\":0-28", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Robert Gerstenberger\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():" + }, + { + "comment": "This code organizes and processes results from various AI methods, extracting scores, solved status, prompt/completion tokens, and cost for each method. It stores this information in a dictionary for further analysis or visualization.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/plot.py\":29-57", + "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]" + }, + { + "comment": "The code retrieves final scores from complete results and organizes them into a dictionary for plotting. It then creates a new dictionary with scores, solved problems count, and costs for each method. This data is used to plot the results in a graph, considering options like method order, model, length, cost limits, and display settings.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/plot.py\":58-93", + "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"got\"],\n model=\"GPT-3.5\",\n length=32,\n y_lower=0,\n cost_upper=0.0,\n display_solved=True,\n annotation_offset=0,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n scores_ordered = [\n [score for score in results[method][\"scores\"] if score != 1000]\n for method in methods_order" + }, + { + "comment": "This code creates a boxplot to visualize the results of different methods. It sets the y-axis limits and ticks based on the length of the data, and customizes the font size for better readability. The code also handles the display of additional information (solved count) by adjusting the range of y-axis ticks accordingly.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/plot.py\":94-129", + "content": " ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n methods_labels = [\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT\"]\n plt.yticks(fontsize=fig_fontsize)\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(methods_labels, fontsize=fig_fontsize)\n y_upper = length\n range_increase = 1\n if display_solved:\n if length < 48:\n range_increase = 2\n elif length < 96:\n range_increase = 4\n else:\n range_increase = 8\n ax.set_ylim(y_lower, y_upper + range_increase)\n ax1_yticks = range(\n y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)\n )\n ax.set_yticks(ax1_yticks)\n if display_left_ylabel:" + }, + { + "comment": "This code sets y-axis label, title, and twin axis for plotting. It then plots a bar graph using the twin axis, setting the y-axis limits and ticks based on specified conditions. Finally, it checks if certain conditions are met and adds annotations or adjusts the graph accordingly.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/plot.py\":130-161", + "content": " ax.set_ylabel(\n f\"#incorrect elements; the lower the better\", fontsize=fig_fontsize\n )\n ax.set_title(f\"{length} elements\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n if cost_upper > 0:\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]" + }, + { + "comment": "This code is adding text annotations to a plot, incrementing a count variable, and saving the final plot as a PDF. It replaces certain characters in the model name and calls another function for more plotting results with specific parameters.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/plot.py\":162-183", + "content": " ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"set_intersection_{model}_{length}.pdf\", bbox_inches=\"tight\")\nplot_results(\n get_plotting_data(\"results/\"),\n length=32,\n display_solved=True,\n model=\"GPT-3.5\",\n display_left_ylabel=True,\n display_right_ylabel=True,\n)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/51b07dd0-8338-4393-8a08-9a958d2115ea.json b/docs/doc/51b07dd0-8338-4393-8a08-9a958d2115ea.json new file mode 100644 index 0000000..4707727 --- /dev/null +++ b/docs/doc/51b07dd0-8338-4393-8a08-9a958d2115ea.json @@ -0,0 +1,10 @@ +{ + "summary": "This line imports the Prompter class from the \"prompter\" module within the current package, allowing its functionality to be accessed by other parts of the codebase.", + "details": [ + { + "comment": "This line imports the Prompter class from the \"prompter\" module within the current package, allowing its functionality to be accessed by other parts of the codebase.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/prompter/__init__.py\":0-0", + "content": "from .prompter import Prompter" + } + ] +} \ No newline at end of file diff --git a/docs/doc/57b25d90-f16a-47ed-8acc-1205c076e73e.json b/docs/doc/57b25d90-f16a-47ed-8acc-1205c076e73e.json new file mode 100644 index 0000000..a0fb4dc --- /dev/null +++ b/docs/doc/57b25d90-f16a-47ed-8acc-1205c076e73e.json @@ -0,0 +1,165 @@ +{ + "summary": "The comments describe operations that preserve valid thoughts from predecessors, with Comment A introducing an abstract base class for Graph of Thoughts operations and Comment B focusing on the GroundTruth operation in a code context.", + "details": [ + { + "comment": "This code defines an abstract base class for operations in the Graph of Thoughts system. It includes an OperationType Enum representing unique operation identifiers and outlines the interface for all operations. This base class will be used to create concrete implementations of different types of operations within the system.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":0-39", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom __future__ import annotations\nimport logging\nfrom enum import Enum\nfrom typing import List, Iterator, Dict, Callable, Union\nfrom abc import ABC, abstractmethod\nimport itertools\nfrom graph_of_thoughts.operations.thought import Thought\nfrom graph_of_thoughts.language_models import AbstractLanguageModel\nfrom graph_of_thoughts.prompter import Prompter\nfrom graph_of_thoughts.parser import Parser\nclass OperationType(Enum):\n \"\"\"\n Enum to represent different operation types that can be used as unique identifiers.\n \"\"\"\n score: int = 0\n validate_and_improve: int = 1\n generate: int = 2\n improve: int = 3\n aggregate: int = 4\n keep_best_n: int = 5\n keep_valid: int = 6\n ground_truth_evaluator: int = 7\n selector: int = 8\nclass Operation(ABC):\n \"\"\"\n Abstract base class that defines the interface for all operations." + }, + { + "comment": "Initializes a new Operation instance with a unique ID and empty predecessors and successors. The operation can be executed if all its predecessors have been executed. Aggregates thoughts from predecessors to return all thoughts from them.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":40-70", + "content": " \"\"\"\n _ids: Iterator[int] = itertools.count(0)\n operation_type: OperationType = None\n def __init__(self) -> None:\n \"\"\"\n Initializes a new Operation instance with a unique id, and empty predecessors and successors.\n \"\"\"\n self.logger: logging.Logger = logging.getLogger(self.__class__.__name__)\n self.id: int = next(Operation._ids)\n self.predecessors: List[Operation] = []\n self.successors: List[Operation] = []\n self.executed: bool = False\n def can_be_executed(self) -> bool:\n \"\"\"\n Checks if the operation can be executed based on its predecessors.\n :return: True if all predecessors have been executed, False otherwise.\n :rtype: bool\n \"\"\"\n return all(predecessor.executed for predecessor in self.predecessors)\n def get_previous_thoughts(self) -> List[Thought]:\n \"\"\"\n Iterates over all predecessors and aggregates their thoughts.\n :return: A list of all thoughts from the predecessors.\n :rtype: List[Thought]" + }, + { + "comment": "This code defines an Operation class with methods to add predecessors and successors, ensuring proper relationships are updated. The execute method executes the operation after all predecessors have been executed.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":71-104", + "content": " \"\"\"\n previous_thoughts: List[Thought] = [\n thought\n for predecessor in self.predecessors\n for thought in predecessor.get_thoughts()\n ]\n return previous_thoughts\n def add_predecessor(self, operation: Operation) -> None:\n \"\"\"\n Add a preceding operation and update the relationships.\n :param operation: The operation to be set as a predecessor.\n :type operation: Operation\n \"\"\"\n self.predecessors.append(operation)\n operation.successors.append(self)\n def add_successor(self, operation: Operation) -> None:\n \"\"\"\n Add a succeeding operation and update the relationships.\n :param operation: The operation to be set as a successor.\n :type operation: Operation\n \"\"\"\n self.successors.append(operation)\n operation.predecessors.append(self)\n def execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Execute the operation, assuring that all predecessors have been executed." + }, + { + "comment": "The code defines a class with an abstract method for executing operations, requiring a language model (AbstractLanguageModel), prompter (Prompter), and parser (Parser). The class checks if all predecessors have been executed before execution, logs information during execution, marks itself as executed upon completion.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":106-129", + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If not all predecessors have been executed.\n \"\"\"\n assert self.can_be_executed(), \"Not all predecessors have been executed\"\n self.logger.info(\n \"Executing operation %d of type %s\", self.id, self.operation_type\n )\n self._execute(lm, prompter, parser, **kwargs)\n self.logger.debug(\"Operation %d executed\", self.id)\n self.executed = True\n @abstractmethod\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Abstract method for the actual execution of the operation.\n This should be implemented in derived classes." + }, + { + "comment": "This code defines an abstract class \"Operation\" with a method to get associated thoughts and a concrete class \"Score\" that inherits from it. The Score class takes parameters like num_samples, combined_scoring, and scoring_function for scoring thoughts. The get_thoughts method must be implemented in derived classes.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":131-167", + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n \"\"\"\n pass\n @abstractmethod\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Abstract method to retrieve the thoughts associated with the operation.\n This should be implemented in derived classes.\n :return: List of associated thoughts.\n :rtype: List[Thought]\n \"\"\"\n pass\nclass Score(Operation):\n \"\"\"\n Operation to score thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.score\n def __init__(\n self,\n num_samples: int = 1,\n combined_scoring: bool = False,\n scoring_function: Callable[\n [Union[List[Dict], Dict]], Union[List[float], float]\n ] = None,\n ) -> None:" + }, + { + "comment": "This code defines a class for a Score operation that takes a specified number of samples, whether to score thoughts individually or combined, and a scoring function (defaulting to None). It initializes the operation with these parameters and returns the associated scored thoughts.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":168-191", + "content": " \"\"\"\n Initializes a new Score operation.\n :param num_samples: Number of samples to use for scoring. Defaults to 1.\n :type num_samples: int\n :param combined_scoring: Whether to score all thoughts together or individually. Defaults to False.\n :type combined_scoring: bool\n :param scoring_function: A function to score thoughts (if not using LM). Defaults to None.\n :type scoring_function: Takes a list of thought states or a single thought state and\n returns a list of scores or a single score.\n \"\"\"\n super().__init__()\n self.num_samples: int = num_samples\n self.combined_scoring: bool = combined_scoring\n self.thoughts: List[Thought] = []\n self.scoring_function: Callable[\n [Union[List[Dict], Dict]], Union[List[float], float]\n ] = scoring_function\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of scored thoughts." + }, + { + "comment": "This code defines a method that executes a scoring operation on thoughts from predecessors. It first gets the previous thoughts and asserts that there is at least one predecessor. If combined scoring is used, it scores the thoughts together; otherwise, individually. The language model (LM) and prompter are used for prompting if a scoring function is not provided.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":192-217", + "content": " :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the scoring operation by scoring the thoughts from the predecessors.\n If combined scoring is used, the thoughts are scored together, otherwise individually.\n If a scoring function is provided, it is used, otherwise the LM is prompted.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert (\n len(self.predecessors) > 0\n ), \"Score operation needs at least one predecessor\"" + }, + { + "comment": "This code calculates scores for each previous thought using either a scoring function or by generating prompts from the thoughts and querying a language model. The scores are then assigned to the respective thoughts, and new Thought objects are created with the updated scores before being added to the thoughts list.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":219-238", + "content": " if self.combined_scoring:\n previous_thoughts_states = [thought.state for thought in previous_thoughts]\n if self.scoring_function is not None:\n self.logger.debug(\n \"Using scoring function %s to score states\", self.scoring_function\n )\n scores = self.scoring_function(previous_thoughts_states)\n else:\n prompt = prompter.score_prompt(previous_thoughts_states)\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n scores = parser.parse_score_answer(previous_thoughts_states, responses)\n for thought, score in zip(previous_thoughts, scores):\n new_thought = Thought.from_thought(thought)\n new_thought.score = score\n self.thoughts.append(new_thought)" + }, + { + "comment": "This code handles scoring thoughts based on whether a scoring function is defined or not. If the scoring function is not defined, it prompts a language model (LM) to generate responses for each thought state and uses a parser to calculate scores from the LM's responses. The new score is then assigned to the thought object, and the thought is appended to the thoughts list.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":239-262", + "content": " else:\n for thought in previous_thoughts:\n new_thought = Thought.from_thought(thought)\n if self.scoring_function is not None:\n self.logger.debug(\n \"Using scoring function %s to score state\",\n self.scoring_function,\n )\n score = self.scoring_function(thought.state)\n else:\n prompt = prompter.score_prompt([thought.state])\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n score = parser.parse_score_answer([thought.state], responses)[0]\n new_thought.score = score\n self.thoughts.append(new_thought)\n self.logger.info(\n \"Score operation %d scored %d thoughts\"," + }, + { + "comment": "This code defines a class called `ValidateAndImprove` that extends the `Operation` class. It is designed to validate and improve thoughts, with parameters for number of samples, whether to improve if not valid, number of tries before giving up, and a function to validate thoughts (optional). The operation type is specified as \"validate_and_improve\".", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":263-292", + "content": " self.id,\n len(self.thoughts),\n )\nclass ValidateAndImprove(Operation):\n \"\"\"\n Operation to validate and improve thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.validate_and_improve\n def __init__(\n self,\n num_samples: int = 1,\n improve: bool = True,\n num_tries: int = 3,\n validate_function: Callable[[Dict], bool] = None,\n ) -> None:\n \"\"\"\n Initializes a new ValidateAndImprove operation.\n :param num_samples: Number of samples to use for validation. Defaults to 1.\n :type num_samples: int\n :param improve: Whether to improve the thought if it is not valid. Defaults to True.\n :type improve: bool\n :param num_tries: Number of tries to improve the thought before giving up. Defaults to 3.\n :type num_tries: int\n :param validate_function: A function to validate thoughts (if not using LM). Defaults to None.\n :type validate_function: Takes a thought state and returns a boolean." + }, + { + "comment": "This code defines a class called `ValidateAndImprove` with attributes for the number of samples, whether to validate and improve thoughts, the number of tries, and a function to validate the thoughts. It also has methods to get final validated and improved thoughts, and execute validation and improvement using a language model.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":293-318", + "content": " \"\"\"\n super().__init__()\n self.num_samples: int = num_samples\n self.improve: bool = improve\n self.num_tries: int = num_tries\n self.validate_function: Callable[[Dict], bool] = validate_function\n self.thoughts: List[List[Thought]] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the list of final thoughts, after validation and improvement.\n :return: List of final validated and improved thoughts.\n :rtype: List[Thought]\n \"\"\"\n return [thought_list[-1] for thought_list in self.thoughts]\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the ValidateAndImprove operation by validating and improving the predecessors' thoughts.\n If a validation function is provided, it is used, otherwise the LM is prompted.\n If improvement is enabled, the LM is prompted to improve the thought, if it is not valid.\n :param lm: The language model to be used." + }, + { + "comment": "This function gets the previous thoughts, checks that it has at least one predecessor, then iterates through the previous thoughts. It creates a new thought from each previous thought and enters a loop where it validates the current thought's state using a validate function.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":319-343", + "content": " :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert (\n len(self.predecessors) > 0\n ), \"ValidateAndImprove operation needs at least one predecessor\"\n for thought in previous_thoughts:\n thought_list = []\n current_thought = Thought.from_thought(thought)\n current_try = 0\n while True:\n if self.validate_function is not None:\n self.logger.debug(\n \"Using validate function %s to score states\",\n self.validate_function,\n )\n valid = self.validate_function(current_thought.state)" + }, + { + "comment": "Code block retrieves a prompt from prompter, then uses it to get responses from a language model (LM). It validates the response, updates the current thought's validation status and adds it to the thought list. If conditions met, breaks out of the loop.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":344-365", + "content": " else:\n prompt = prompter.validation_prompt(**current_thought.state)\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n valid = parser.parse_validation_answer(\n current_thought.state, responses\n )\n current_thought.valid = valid\n thought_list.append(current_thought)\n if (\n not self.improve\n or current_thought.valid\n or current_try >= self.num_tries\n ):\n break\n improve_prompt = prompter.improve_prompt(**current_thought.state)\n self.logger.debug(\"Prompt for LM: %s\", improve_prompt)\n responses = lm.get_response_texts(" + }, + { + "comment": "This code defines an operation class \"Generate\" for generating thoughts using a language model (LM). It iteratively improves and validates each thought until it reaches the specified number of valid thoughts. Each thought is stored in the \"thoughts\" list. The \"Validate and improve\" operation creates new valid thoughts from previous invalid ones, appending them to the \"thoughts\" list.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":366-398", + "content": " lm.query(improve_prompt, num_responses=1)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n state_update = parser.parse_improve_answer(\n current_thought.state, responses\n )\n current_thought = Thought({**current_thought.state, **state_update})\n current_try += 1\n self.thoughts.append(thought_list)\n self.logger.info(\n \"Validate and improve operation %d created %d valid thoughts from %d previous thoughts\",\n self.id,\n len(\n [\n thought_list[-1]\n for thought_list in self.thoughts\n if thought_list[-1].valid\n ]\n ),\n len(previous_thoughts),\n )\nclass Generate(Operation):\n \"\"\"\n Operation to generate thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.generate\n def __init__(\n self, num_branches_prompt: int = 1, num_branches_response: int = 1" + }, + { + "comment": "This code defines a class for generating thoughts, with parameters for the number of responses per prompt and the language model used. It initializes these parameters, stores generated thoughts in a list, and provides methods to retrieve them. The `_execute` method is responsible for generating thoughts using a language model, prompter, and parser.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":399-426", + "content": " ) -> None:\n \"\"\"\n Initializes a new Generate operation.\n :param num_branches_prompt: Number of responses that each prompt should generate (passed to prompter). Defaults to 1.\n :type num_branches_prompt: int\n :param num_branches_response: Number of responses the LM should generate for each prompt. Defaults to 1.\n :type num_branches_response: int\n \"\"\"\n super().__init__()\n self.num_branches_prompt: int = num_branches_prompt\n self.num_branches_response: int = num_branches_response\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of generated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Generate operation by generating thoughts from the predecessors." + }, + { + "comment": "This function generates thoughts by using a language model (LM) with the predecessor's thought states as prompts. If there are no predecessors, it uses kwargs as a base state to generate thoughts. It then parses and logs the generated prompt for the LM.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":427-450", + "content": " The thoughts are generated by prompting the LM with the predecessors' thought states.\n If there are no predecessors, the kwargs are used as a base state.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n if len(previous_thoughts) == 0 and len(self.predecessors) > 0:\n return\n if len(previous_thoughts) == 0:\n # no predecessors, use kwargs as base state\n previous_thoughts = [Thought(state=kwargs)]\n for thought in previous_thoughts:\n base_state = thought.state\n prompt = prompter.generate_prompt(self.num_branches_prompt, **base_state)\n self.logger.debug(\"Prompt for LM: %s\", prompt)" + }, + { + "comment": "This code generates responses from a language model, parses them using a parser, and appends new thoughts to the thoughts list. If more thoughts are created than expected based on prompt and response numbers, a warning is logged.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":451-475", + "content": " responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_branches_response)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n for new_state in parser.parse_generate_answer(base_state, responses):\n new_state = {**base_state, **new_state}\n self.thoughts.append(Thought(new_state))\n self.logger.debug(\n \"New thought %d created with state %s\",\n self.thoughts[-1].id,\n self.thoughts[-1].state,\n )\n if (\n len(self.thoughts)\n > self.num_branches_prompt\n * self.num_branches_response\n * len(previous_thoughts)\n and self.num_branches_prompt > 0\n ):\n self.logger.warning(\n \"Generate operation %d created more thoughts than expected\",\n self.id,\n )\n self.logger.info(\n \"Generate operation %d created %d new thoughts\", self.id, len(self.thoughts)" + }, + { + "comment": "The code defines a class \"Improve\" which represents an operation to enhance thoughts. It initializes a new Improve operation and gets the associated thoughts after improvement. The \"_execute\" method executes the operation by improving the predecessor's thoughts using language model (LM) prompts.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":476-512", + "content": " )\nclass Improve(Operation):\n \"\"\"\n Operation to improve thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.improve\n def __init__(self) -> None:\n \"\"\"\n Initializes a new Improve operation.\n \"\"\"\n super().__init__()\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation after improvement.\n :return: List of improved thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Improve operation by improving the predecessors' thoughts.\n The thoughts are improved by prompting the LM with the predecessors' thought states.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter" + }, + { + "comment": "This code defines two classes: \"Improve\" and \"Aggregate\", which are subclasses of the \"Operation\" class. The \"Improve\" operation retrieves previous thoughts, improves their prompts using a prompter and language model (LM), gets response texts, parses the responses using a parser, and appends the updated thoughts to the list of thoughts for the current operation. The \"Aggregate\" operation also exists but has no implementation shown in this code snippet.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":513-536", + "content": " :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert len(self.predecessors) > 0, \"Needs at least one predecessor\"\n for thought in previous_thoughts:\n improve_prompt = prompter.improve_prompt(**thought.state)\n self.logger.debug(\"Prompt for LM: %s\", improve_prompt)\n responses = lm.get_response_texts(lm.query(improve_prompt, num_responses=1))\n self.logger.debug(\"Responses from LM: %s\", responses)\n state_update = parser.parse_improve_answer(thought.state, responses)\n self.thoughts.append(Thought({**thought.state, **state_update}))\n self.logger.info(\n \"Improve operation %d improved %d thoughts\", self.id, len(self.thoughts)\n )\nclass Aggregate(Operation):\n \"\"\"" + }, + { + "comment": "This code defines an Aggregate operation class that initializes a new Aggregate operation and gets the associated thoughts after aggregation. It also includes a method to execute the operation by prompting the language model with predecessors' thought states for aggregation.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":537-567", + "content": " Operation to aggregate thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.aggregate\n def __init__(self, num_responses: int = 1) -> None:\n \"\"\"\n Initializes a new Aggregate operation.\n :param num_responses: Number of responses to use for aggregation. Defaults to 1.\n :type num_responses: int\n \"\"\"\n super().__init__()\n self.thoughts: List[Thought] = []\n self.num_responses: int = num_responses\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation after aggregation.\n :return: List of aggregated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Aggregate operation by aggregating the predecessors' thoughts.\n The thoughts are aggregated by prompting the LM with the predecessors' thought states." + }, + { + "comment": "This code is a part of an operation class in Python. It checks if the operation has at least one predecessor and retrieves the previous thoughts from it. Then, it sorts the previous thoughts based on their score and constructs a prompt for aggregation using the prompter. Finally, it stores the states of the previous thoughts.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":569-593", + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"Aggregate operation must have at least one predecessor\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n if len(previous_thoughts) == 0:\n return\n # applied in order of score\n base_state: Dict = {}\n for thought in sorted(previous_thoughts, key=lambda thought: thought.score):\n base_state = {**base_state, **thought.state}\n previous_thought_states = [thought.state for thought in previous_thoughts]\n prompt = prompter.aggregation_prompt(previous_thought_states)" + }, + { + "comment": "The code defines a class `KeepBestN` that represents an operation to keep the best N thoughts from predecessors based on their score. The `__init__` method initializes a new `KeepBestN` object with the maximum number of thoughts to keep and whether higher scores are better.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":595-626", + "content": " self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_responses)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n parsed = parser.parse_aggregation_answer(previous_thought_states, responses)\n if isinstance(parsed, dict):\n parsed = [parsed]\n for new_state in parsed:\n self.thoughts.append(Thought({**base_state, **new_state}))\nclass KeepBestN(Operation):\n \"\"\"\n Operation to keep the best N thoughts from predecessors based on their score.\n \"\"\"\n operation_type: OperationType = OperationType.keep_best_n\n def __init__(self, n: int, higher_is_better: bool = True) -> None:\n \"\"\"\n Initializes a new KeepBestN operation.\n :param n: Maximum number of thoughts to keep.\n :type n: int\n :param higher_is_better: Whether higher scores are better. Defaults to True.\n :type higher_is_better: bool\n :raises AssertionError: If `n` is not greater than zero." + }, + { + "comment": "Class `KeepBestN` initializes its attributes and checks the minimum number of thoughts to keep, then provides a method `get_best_n()` that returns the top N thoughts based on their scores. It raises `AssertionError` if all predecessors haven't been executed or if not all thoughts have been scored.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":627-654", + "content": " \"\"\"\n super().__init__()\n self.n: int = n\n assert self.n > 0, \"KeepBestN operation must keep at least one thought\"\n self.higher_is_better: bool = higher_is_better\n self.thoughts: List[Thought] = []\n def get_best_n(self) -> List[Thought]:\n \"\"\"\n Returns the best N thoughts from the predecessors based on their score.\n :return: List of best N thoughts.\n :rtype: List[Thought]\n :raises AssertionError: If not all predecessors have been executed.\n :raises AssertionError: If not all thoughts have been scored.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert all(\n previous_thought.scored for previous_thought in previous_thoughts\n ), \"Not all thoughts have been scored\"\n try:\n return sorted(\n previous_thoughts,\n key=lambda thought: thought.score,\n reverse=self.higher_is_better,\n )[: self.n]\n except:" + }, + { + "comment": "This code defines a `KeepBestN` operation that keeps the top N thoughts from predecessors based on their scores. It logs an error message with previous operation details and previous thoughts' scores, and returns the sorted list of thoughts. The class has methods to access kept thoughts and execute the operation using given language model, prompter, and parser.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":655-682", + "content": " self.logger.error(\"Error in KeepBestN operation\")\n self.logger.error(\n \"Previous operation: %s\", [op.id for op in self.predecessors]\n )\n self.logger.error(\"Previous thoughts: %s\", previous_thoughts)\n self.logger.error(\n \"Scores: %s\", [thought.score for thought in previous_thoughts]\n )\n return sorted(\n [i for i in previous_thoughts if isinstance(i.score, float)],\n key=lambda thought: thought.score,\n reverse=self.higher_is_better,\n )[: self.n]\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts kept by the operation.\n :return: List of kept thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the KeepBestN operation by keeping the best N thoughts from the predecessors according to their score." + }, + { + "comment": "The code defines a function for the KeepBestN operation, which requires at least one predecessor, and raises AssertionError if any conditions are not met. It retrieves thoughts from predecessors and logs information about the kept thoughts.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":684-707", + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n :raises AssertionError: If not all predecessors have been executed.\n :raises AssertionError: If not all thoughts have been scored.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"KeepBestN operation must have at least one predecessor\"\n self.thoughts = [Thought.from_thought(thought) for thought in self.get_best_n()]\n for thought in self.thoughts:\n self.logger.debug(\n \"Thought %d with state %s kept\", thought.id, thought.state\n )\n self.logger.info(\n \"KeepBestN operation %d kept %d thoughts\", self.id, len(self.thoughts)" + }, + { + "comment": "The `KeepValid` operation keeps valid thoughts from predecessors and returns them. It also preserves unvalidated thoughts. This class initializes a new KeepValid operation and provides methods for retrieving the kept thoughts and executing the operation using a language model, prompter, and parser.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":708-745", + "content": " )\nclass KeepValid(Operation):\n \"\"\"\n Operation to keep valid thoughts from predecessors.\n \"\"\"\n operation_type: OperationType = OperationType.keep_valid\n def __init__(self) -> None:\n \"\"\"\n Initializes a new KeepValid operation.\n \"\"\"\n super().__init__()\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts kept by the operation.\n :return: List of kept thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the KeepValid operation by keeping the valid thoughts from the predecessors.\n Keeps unvalidated thoughts as well.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses." + }, + { + "comment": "The code defines two classes: \"KeepValid\" and \"GroundTruth\". The KeepValid class is an operation that requires at least one predecessor. It collects thoughts from previous operations (excluding those that are not valid or already valid) into a list called \"self.thoughts\". If there are any unvalidated thoughts, it logs a warning. Then, it logs debug and info messages for each thought in the list, including its ID and state, as well as the total number of thoughts kept. The GroundTruth class is an operation that uses a ground truth evaluator to assess if thoughts correctly solve the problem.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":746-777", + "content": " :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"KeepValid operation must have at least one predecessor\"\n self.thoughts: List[Thought] = [\n Thought.from_thought(thought)\n for thought in self.get_previous_thoughts()\n if not thought.validated or thought.valid\n ]\n if any(not thought.validated for thought in self.thoughts):\n self.logger.warning(\n \"KeepValid operation %d has unvalidated thoughts\", self.id\n )\n for thought in self.thoughts:\n self.logger.debug(\n \"Thought %d with state %s kept\", thought.id, thought.state\n )\n self.logger.info(\n \"KeepValid operation %d kept %d thoughts\", self.id, len(self.thoughts)\n )\nclass GroundTruth(Operation):\n \"\"\"\n Operation to evaluate if thoughts correctly solve the problem, using a ground truth evaluator" + }, + { + "comment": "This code defines a class for the GroundTruth operation, which initializes with a ground truth evaluator function. The operation evaluates predecessors' thoughts using this function and stores them in a list of thoughts. The get_thoughts method returns these evaluated thoughts.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":778-806", + "content": " \"\"\"\n operation_type: OperationType = OperationType.ground_truth_evaluator\n def __init__(self, ground_truth_evaluator: Callable[[Dict], bool]) -> None:\n \"\"\"\n Initializes a new GroundTruth operation.\n :param ground_truth_evaluator: A function to evaluate if a thought solves the problem.\n :type ground_truth_evaluator: A function that takes a thought state and returns a boolean.\n \"\"\"\n super().__init__()\n self.ground_truth_evaluator: Callable[[Dict], bool] = ground_truth_evaluator\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of evaluated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the GroundTruth operation by evaluating the predecessors' thoughts using the ground truth evaluator function." + }, + { + "comment": "This code is part of a class that implements the GroundTruth operation. It ensures that the operation has at least one predecessor and evaluates the thoughts generated by the previous operations. The evaluated thoughts are then added to the current operation's thoughts list, and their solved status is determined using the ground_truth_evaluator method. If any exceptions occur during the evaluation process, the solved status is set to False. Finally, an info message is logged indicating how many thoughts were evaluated and how many of them solved the problem.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":808-832", + "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessor.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"GroundTruth operation must have at least one predecessor\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n for thought in previous_thoughts:\n new_thought = Thought.from_thought(thought)\n try:\n new_thought.solved = self.ground_truth_evaluator(new_thought.state)\n except:\n new_thought.solved = False\n self.thoughts.append(new_thought)\n self.logger.info(\n \"GroundTruth operation %d evaluated %d thoughts and %d solved the problem\"," + }, + { + "comment": "This code defines a Selector operation for the Graph of Thoughts, which selects thoughts from predecessors to be used in subsequent operations. The constructor takes a selector function that accepts a list of thoughts and returns a list of selected thoughts. The get_thoughts method returns the thoughts selected by the operation.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":833-863", + "content": " self.id,\n len(self.thoughts),\n len([thought for thought in self.thoughts if thought.solved]),\n )\nclass Selector(Operation):\n \"\"\"\n Operation to select thoughts from predecessors.\n Useful for separating thoughts to perform different, subsequent operations on them.\n \"\"\"\n operation_type: OperationType = OperationType.selector\n def __init__(self, selector: Callable[[List[Thought]], List[Thought]]) -> None:\n \"\"\"\n Initializes a new Selector operation.\n :param selector: A function to select thoughts from the predecessors' thoughts.\n :type selector: A function that takes a list of thoughts and returns a list of thoughts.\n \"\"\"\n super().__init__()\n self.selector: Callable[[List[Thought]], List[Thought]] = selector\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts selected by the operation.\n :return: List of selected thoughts.\n :rtype: List[Thought]" + }, + { + "comment": "This code defines a Selector operation, which selects thoughts from predecessors using a provided selector function. If there are no predecessors, the function calls the selector with a thought containing the provided kwargs as state. The selected thoughts are then returned.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":864-889", + "content": " \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Selector operation by selecting thoughts from the predecessors using the selector function.\n If the Selector has no predecessors, the selector function is called with a thought containing the kwargs as state.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n if len(previous_thoughts) == 0:\n previous_thoughts = [Thought(kwargs)]\n self.thoughts = [\n Thought.from_thought(thought)\n for thought in self.selector(previous_thoughts)" + }, + { + "comment": "This code segment is logging the selection of thoughts by a selector operation. It iterates over each thought in the self.thoughts list, and logs their ID and state. Finally, it logs the total number of thoughts selected by this operation.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/operations.py\":890-899", + "content": " ]\n for thought in self.thoughts:\n self.logger.debug(\n \"Thought %d with state %s selected\", thought.id, thought.state\n )\n self.logger.info(\n \"Selector operation %d selected %d thoughts\", self.id, len(self.thoughts)\n )" + } + ] +} \ No newline at end of file diff --git a/docs/doc/6535c412-26cb-40f0-b4a2-9b9fa1702577.json b/docs/doc/6535c412-26cb-40f0-b4a2-9b9fa1702577.json new file mode 100644 index 0000000..42bf476 --- /dev/null +++ b/docs/doc/6535c412-26cb-40f0-b4a2-9b9fa1702577.json @@ -0,0 +1,20 @@ +{ + "summary": "The code presents an abstract base class, Prompter, that generates language model prompts through two methods: `aggregation_prompt()` and `improve_prompt()`. It also includes optional parameters and keyword arguments for subclass customization.", + "details": [ + { + "comment": "This code is an abstract base class called Prompter, which defines interfaces for all prompters. It helps generate prompts for language models in the form of aggregation and improve prompts. The class has two abstract methods: `aggregation_prompt()` and `improve_prompt()`, both with their own parameters and return types.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/prompter/prompter.py\":0-35", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main authors: Robert Gerstenberger, Nils Blach\nfrom __future__ import annotations\nfrom abc import ABC, abstractmethod\nfrom typing import Dict, List\nclass Prompter(ABC):\n \"\"\"\n Abstract base class that defines the interface for all prompters.\n Prompters are used to generate the prompts for the language models.\n \"\"\"\n @abstractmethod\n def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str:\n \"\"\"\n Generate a aggregation prompt for the language model.\n :param state_dicts: The thought states that should be aggregated.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The aggregation prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def improve_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate an improve prompt for the language model." + }, + { + "comment": "This code defines a base class for generating prompts and validation prompts for language models. The `generate_prompt` and `validation_prompt` methods are abstract, indicating that concrete implementations should override them. The methods accept an optional parameter `num_branches`, and additional keyword arguments (`kwargs`) to allow for customization in subclasses. The thought state is unpacked to enable explicit specification of required arguments.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/prompter/prompter.py\":36-64", + "content": " The thought state is unpacked to allow for additional keyword arguments\n and concrete implementations to specify required arguments explicitly.\n :param kwargs: Additional keyword arguments.\n :return: The improve prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def generate_prompt(self, num_branches: int, **kwargs) -> str:\n \"\"\"\n Generate a generate prompt for the language model.\n The thought state is unpacked to allow for additional keyword arguments\n and concrete implementations to specify required arguments explicitly.\n :param num_branches: The number of responses the prompt should ask the LM to generate.\n :type num_branches: int\n :param kwargs: Additional keyword arguments.\n :return: The generate prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def validation_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate a validation prompt for the language model.\n The thought state is unpacked to allow for additional keyword arguments" + }, + { + "comment": "This code defines an abstract class with two methods: `generate_prompt()` and `score_prompt()`. The first method generates a validation prompt, and the second method generates a score prompt. Both methods accept additional keyword arguments. State dictionaries are used as input for the `score_prompt()` method to generate prompts for multiple thought states simultaneously.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/prompter/prompter.py\":65-85", + "content": " and concrete implementations to specify required arguments explicitly.\n :param kwargs: Additional keyword arguments.\n :return: The validation prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str:\n \"\"\"\n Generate a score prompt for the language model.\n :param state_dicts: The thought states that should be scored,\n if more than one, they should be scored together.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The score prompt.\n :rtype: str\n \"\"\"\n pass" + } + ] +} \ No newline at end of file diff --git a/docs/doc/66a35b11-6817-4e60-ad59-f13cbbeb8722.json b/docs/doc/66a35b11-6817-4e60-ad59-f13cbbeb8722.json new file mode 100644 index 0000000..a1d8f08 --- /dev/null +++ b/docs/doc/66a35b11-6817-4e60-ad59-f13cbbeb8722.json @@ -0,0 +1,20 @@ +{ + "summary": "The code contains helper functions `string_to_list()` and `string_to_set()`, which convert a string-encoded list or set into Python integers. The `test_set_intersection` function compares the intersection of two sets with the sorted list from the input string, counting errors as a score, returning either total errors or 1000 for exceptions.", + "details": [ + { + "comment": "This code defines two helper functions: `string_to_list()` and `string_to_set()`. These functions are used to convert a list encoded in a string into a Python list or set object of integer elements. The `string_to_list()` function converts the input string into an integer list, while the `string_to_set()` function converts it into a set of integers. The assertion is raised if the input string does not contain a list.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/utils.py\":0-35", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# The source code is adapted from the sorting source code written by\n# Nils Blach.\n#\n# main author: Robert Gerstenberger\nfrom typing import Dict, List, Set\ndef string_to_list(string: str) -> List[int]:\n \"\"\"\n Helper function to convert a list encoded inside a string into a Python\n list object of integer elements.\n :param string: Input string containing a list.\n :type string: str\n :return: List of integer elements.\n :rtype: List[int]\n :raise AssertionError: If input string does not contain a list.\n \"\"\"\n assert string[0] == \"[\" and string[-1] == \"]\", \"String is not a list.\"\n return [int(num) for num in string[1:-1].split(\",\")]\ndef string_to_set(string: str) -> Set[int]:\n \"\"\"\n Helper function to convert a list encoded inside a string into a Python\n set object of integer elements.\n :param string: Input string containing a list." + }, + { + "comment": "Function `string_to_list` converts a string input into an integer set. Function `test_set_intersection` checks if the final solution matches the ground truth by converting the result and current states to lists, sorting them, and comparing. Finally, `num_errors` function calculates the number of errors in the given state as a score.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/utils.py\":36-71", + "content": " :type string: str\n :return: Set of integer elements.\n :rtype: Set[int]\n :raise AssertionError: If input string does not contain a list.\n \"\"\"\n assert string[0] == \"[\" and string[-1] == \"]\", \"String is not a list.\"\n return {int(num) for num in string[1:-1].split(\",\")}\ndef test_set_intersection(state: Dict) -> bool:\n \"\"\"\n Function to test whether the final solution matches ground truth.\n :param state: Thought state that represents the final solution.\n :type state: Dict\n :return: Returns whether the solution matches the ground truth.\n :rtype: bool\n \"\"\"\n # convert string to list\n try:\n correct_list = string_to_list(state[\"result\"])\n sorted_list = sorted(string_to_list(state[\"current\"]))\n return sorted_list == correct_list\n except:\n return False\ndef num_errors(state: Dict) -> float:\n \"\"\"\n Function to locally count the number of errors that serves as a score.\n :param state: Thought state to be scored.\n :type state: Dict\n :return: Number of errors." + }, + { + "comment": "This function takes in two sets and a string, calculates the intersection of the sets and compares it with the sorted list from the string. If there is a mismatch between the common elements and the sorted list, it counts the number of errors. Returns the total number of errors found or 1000 if an exception occurs.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/utils.py\":72-98", + "content": " :rtype: float\n \"\"\"\n try:\n set1 = string_to_set(state[\"set1\"])\n set2 = string_to_set(state[\"set2\"])\n if \"subset\" in state and state[\"subset\"] != \"\" and state[\"subset\"] is not None:\n set2 = string_to_set(state[\"subset\"])\n common = sorted(list(set1 & set2))\n llm_solution = sorted(string_to_list(state[\"current\"]))\n num_errors = 0\n common_idx = 0\n llm_idx = 0\n while common_idx < len(common) and llm_idx < len(llm_solution):\n if common[common_idx] == llm_solution[llm_idx]:\n common_idx += 1\n llm_idx += 1\n elif common[common_idx] < llm_solution[llm_idx]:\n common_idx += 1\n num_errors += 1\n elif common[common_idx] > llm_solution[llm_idx]:\n llm_idx += 1\n num_errors += 1\n num_errors += len(common) - common_idx + len(llm_solution) - llm_idx\n return num_errors\n except:\n return 1000" + } + ] +} \ No newline at end of file diff --git a/docs/doc/76401061-be28-4fb3-a263-4b6fca10497d.json b/docs/doc/76401061-be28-4fb3-a263-4b6fca10497d.json new file mode 100644 index 0000000..e7b42e1 --- /dev/null +++ b/docs/doc/76401061-be28-4fb3-a263-4b6fca10497d.json @@ -0,0 +1,15 @@ +{ + "summary": "The code directory contains various sorting algorithm examples for numbers 0-9 with implementations for IO, CoT, ToT, and GoT. It includes data files, Python scripts to execute use cases, and organizes results by name, approaches, day, and time. The plot.py file visualizes the results after modification.", + "details": [ + { + "comment": "This code directory contains examples of sorting algorithms for lists of numbers from 0 to 9. Implementations are provided for IO, Chain-of-Thought (CoT), Tree of Thought (ToT) with two variations, and Graph of Thoughts (GoT). Data includes input files with precomputed samples, and Python scripts execute the use case with options to select samples and approaches.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/README.md\":0-30", + "content": "# Sorting\nThe use case in this directory sorts the provided list of \nnumbers containing numbers from 0 to 9 (duplicates allowed). \nWe provide implementations of five different approaches for \n32, 64 and 128 elements:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT):\n - ToT: wider tree, meaning more branches per level\n - ToT2: tree with more levels, but fewer branches per level\n- Graph of Thoughts (GoT):\n - GoT: split into subarrays / sort / merge\n## Data\nWe provide input files with 100 precomputed samples for each list\nlength: `sorting_.csv`.\n## Execution\nThe files to execute the use case are called\n`sorting_.py`. In the main body, one can select the\nspecific samples to be run (variable sample) and the approaches\n(variable approaches). It is also possible to set a budget in dollars\n(variable budget).\nThe input filename for the samples is currently hardcoded to\n`sorting_.csv`, but can be updated in the function\n`run`.\nThe Python scripts will create the directory `result`, if it is not" + }, + { + "comment": "Code organizes results into separate directories for each run based on the name of LLM, list of approaches, day and start time. Inside these execution-specific directories, config.json contains the configuration, log.log has prompts & responses, and approach directories store GRS files for every sample. Plot data can be visualized by modifying the results directory in plot.py and running python3 plot.py.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/README.md\":31-45", + "content": "already present. In the 'result' directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are\ncreated. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 171 of `plot.py` and update the\nlength parameter in the subsequent line and run `python3 plot.py` to\nplot your data." + } + ] +} \ No newline at end of file diff --git a/docs/doc/83cd9c2d-9a53-466b-91e1-c27f04849955.json b/docs/doc/83cd9c2d-9a53-466b-91e1-c27f04849955.json new file mode 100644 index 0000000..a65d339 --- /dev/null +++ b/docs/doc/83cd9c2d-9a53-466b-91e1-c27f04849955.json @@ -0,0 +1,20 @@ +{ + "summary": "The code defines a function \"scramble\" that shuffles array elements and generates random sets, calculating their intersection for specified samples. It uses numpy's default random generator with seed 42 to generate sets of size 32, writing the input, generated, and intersection sets in CSV format.", + "details": [ + { + "comment": "The code snippet defines a function called \"scramble\" which shuffles the elements of an array randomly. It also contains main code block that specifies input parameters such as set size, range of integer numbers in sets, seed for random number generator, number of samples to be generated, and filename for output CSV file. The purpose is likely to generate a dataset by scrambling the order of elements within sets.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/dataset_gen_intersection.py\":0-38", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Robert Gerstenberger\nimport csv\nimport numpy as np\ndef scramble(array: np.ndarray, rng: np.random.Generator) -> None:\n \"\"\"\n Helper function to change the order of the elements in an array randomly.\n :param array: Array to be scrambled.\n :type: numpy.ndarray\n :param rng: Random number generator.\n :type rng: numpy.random.Generator\n \"\"\"\n size = array.shape[0]\n index_array = rng.integers(0, size, size)\n for i in range(size):\n temp = array[i]\n array[i] = array[index_array[i]]\n array[index_array[i]] = temp\nif __name__ == \"__main__\":\n \"\"\"\n Input(u) : Set size.\n Input(v) : Range of the integer numbers in the sets: 0..v (exclusive)\n Input(w) : Seed for the random number generator.\n Input(x) : Number of samples to be generated.\n Input(y) : Filename for the output CSV file." + }, + { + "comment": "Code generates random sets and calculates their intersection for a given number of samples. It uses numpy's default random generator, with seed 42, to generate sets of size 32. The intersected set sizes are also randomly determined (within certain bounds) for each sample. The code writes the input sets, generated sets, and intersection sets in CSV format.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/dataset_gen_intersection.py\":39-66", + "content": " Output(z) : Input sets and intersected set written a file in the CSV format.\n File contains the sample ID, input set 1, input set 2,\n intersection set.\n \"\"\"\n set_size = 32 # size of the generated sets\n int_value_ubound = 64 # (exclusive) upper limit of generated numbers\n seed = 42 # seed of the random number generator\n num_sample = 100 # number of samples\n filename = \"set_intersection_032.csv\" # output filename\n assert 2 * set_size <= int_value_ubound\n rng = np.random.default_rng(seed)\n intersection_sizes = rng.integers(set_size // 4, 3 * set_size // 4, num_sample)\n np.set_printoptions(\n linewidth=np.inf\n ) # no wrapping in the array fields in the output file\n with open(filename, \"w\") as f:\n fieldnames = [\"ID\", \"SET1\", \"SET2\", \"INTERSECTION\"]\n writer = csv.DictWriter(f, delimiter=\",\", fieldnames=fieldnames)\n writer.writeheader()\n for i in range(num_sample):\n intersection_size = intersection_sizes[i]" + }, + { + "comment": "Code generates a full set of integers, scrambles it, takes an intersection of the set with a specified size, splits the full set into two sets, scramble each set, and writes a row to a CSV file containing ID, SET1, SET2, and sorted INTERSECTION.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/dataset_gen_intersection.py\":68-91", + "content": " full_set = np.arange(0, int_value_ubound, dtype=np.int16)\n scramble(full_set, rng)\n intersection = full_set[:intersection_size].copy()\n sorted_intersection = np.sort(intersection)\n set1 = full_set[:set_size].copy()\n set2 = np.concatenate(\n [intersection, full_set[set_size : 2 * set_size - intersection_size]]\n )\n scramble(set1, rng)\n scramble(set2, rng)\n writer.writerow(\n {\n \"ID\": i,\n \"SET1\": set1.tolist(),\n \"SET2\": set2.tolist(),\n \"INTERSECTION\": sorted_intersection.tolist(),\n }\n )" + } + ] +} \ No newline at end of file diff --git a/docs/doc/8ef4e234-d54a-4d43-a867-e9d2e137c744.json b/docs/doc/8ef4e234-d54a-4d43-a867-e9d2e137c744.json new file mode 100644 index 0000000..97ef979 --- /dev/null +++ b/docs/doc/8ef4e234-d54a-4d43-a867-e9d2e137c744.json @@ -0,0 +1,35 @@ +{ + "summary": "The code imports libraries, defines a get_complete_results() function, reads JSON data and stores it in a dictionary, sorts the keys, retrieves final scores for each method using results_complete dictionary, and includes functions to retrieve plotting data and plot boxplots for scores with total cost bar plots on a secondary y-axis. It also sets custom y-axis positions and labels for plotting the solved status of various methods, saving it as a PDF, and generates data from given results while initializing an instance of the DocMerge class with a cost_upper limit of 15.", + "details": [ + { + "comment": "The code imports necessary libraries, defines a function get_complete_results(), and reads data from JSON files in specified directories. It collects this information into a dictionary, sorts the keys, and returns the complete results for further processing.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/plot.py\":0-28", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():\n results_complete[key] = sorted(" + }, + { + "comment": "This code retrieves and sorts final scores for each method in the results_complete dictionary. It loops through each method, then through each result for that method, calculating the score, solved status, prompt/completion tokens, and cost from the reversed data list. Finally, it appends these values to the corresponding method's scores list, then sorts those scores by key.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/plot.py\":29-58", + "content": " results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 0\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in reversed(result[\"data\"]):\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n if \"operation\" in op and op[\"operation\"] == \"score\":\n try:\n score = max(op[\"scores\"])\n break\n except:\n continue\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]\n )\n scores[method] = sorted(scores[method], key=lambda x: x[0])" + }, + { + "comment": "Function get_plotting_data returns a dictionary of plotting data for different methods, which includes scores, number of solved problems, and costs. Function plot_results plots the results using given parameters like methods order, model, number of nodes, y-axis limits, cost upper limit, etc. The function first ensures that the specified methods are in the result dictionary and then extracts ordered scores and total costs for each method from the results dictionary.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/plot.py\":59-95", + "content": " return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"got\", \"got2\"],\n model=\"GPT-3.5\",\n num_ndas=4,\n y_lower=0,\n y_upper=10,\n cost_upper=1.8,\n display_solved=True,\n annotation_offset=1,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n scores_ordered = [\n [score for score in results[method][\"scores\"]] for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis" + }, + { + "comment": "Creates a boxplot for scores, sets ticks and labels for x-axis, adjusts y-limits, adds a blue bar plot with total costs on the right y-axis, and sets corresponding tick colors and limits.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/plot.py\":96-131", + "content": " fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n methods_labels = [\"IO\", \"CoT\", \"ToT\", \"GoT\", \"GoT2\"]\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(methods_labels)\n # ax.set_xlabel(\"Approach\")\n ax.set_ylim(y_lower, 12 if display_solved else 9.75)\n plt.yticks(fontsize=fig_fontsize)\n if display_left_ylabel:\n ax.set_ylabel(\n f\"Score (out of 10); the higher the better\", fontsize=fig_fontsize\n )\n # ax.set_title(f\"Document Merging\")\n ax2 = ax.twinx()\n ax2.bar(\n positions,\n total_costs,\n alpha=0.5,\n color=\"blue\",\n label=\"Total Cost ($); the lower the better\",\n )\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())" + }, + { + "comment": "This code is setting custom tick positions and labels for the y-axis of a plot, displaying the solved status of various methods, saving the plot as a PDF, and generating plotting data from given results.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/plot.py\":132-167", + "content": " tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]\n ax.text(\n count, annotation_height, f\"Solved: {solved}\", ha=\"center\", va=\"bottom\"\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"doc_merge_{model}_{num_ndas}.pdf\", bbox_inches=\"tight\")\nplot_results(\n get_plotting_data(\"results/\"),\n num_ndas=4,\n display_solved=False,\n model=\"GPT-3.5\",\n y_upper=10,\n display_left_ylabel=True," + }, + { + "comment": "This code snippet is initializing a function, specifically an instance of the class \"DocMerge\", with the parameter 'cost_upper' set to 15. The purpose of this function might be to perform document merging or some similar operation with a specified upper cost limit.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/plot.py\":168-169", + "content": " cost_upper=15,\n)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/9628d64c-cdf3-43c9-bfe3-6aa1c1fe9db2.json b/docs/doc/9628d64c-cdf3-43c9-bfe3-6aa1c1fe9db2.json new file mode 100644 index 0000000..f5aed86 --- /dev/null +++ b/docs/doc/9628d64c-cdf3-43c9-bfe3-6aa1c1fe9db2.json @@ -0,0 +1,35 @@ +{ + "summary": "This code retrieves JSON data, organizes it in a dictionary and plots results using boxplots and bar charts with customizable titles. It also sets y-axis limits, handles missing results and displays solved values.", + "details": [ + { + "comment": "This code retrieves complete results from a given base directory, iterating through each folder and file. It collects JSON data from specified .json files, stores them in the \"results_complete\" dictionary with corresponding key and appends the data to its value.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/plot.py\":0-28", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Ales Kubicek\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():" + }, + { + "comment": "The code sorts the results dictionary by key, then retrieves final scores for each method in the results_complete dictionary. It appends a list of scores (including score, solved status, prompt tokens, completion tokens, and cost) to the corresponding method in the scores dictionary.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/plot.py\":29-57", + "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]" + }, + { + "comment": "This code retrieves and prepares data for plotting keyword counting results. It first gets complete results from a specified base directory, then extracts final scores. The data is then organized into a dictionary format for plotting. The function `plot_results` takes this data, along with optional parameters to adjust the visualization. The code filters out irrelevant scores and orders them based on the input order.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/plot.py\":58-92", + "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"got4\", \"got8\", \"gotx\"],\n model=\"GPT-3.5\",\n y_lower=0,\n y_upper=40,\n cost_upper=1.8,\n display_solved=True,\n annotation_offset=1,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n scores_ordered = [\n [score for score in results[method][\"scores\"] if score != 100 and score != 300]" + }, + { + "comment": "This code generates a boxplot of keyword counting results and adds a bar chart of total costs to the same axes. It uses the matplotlib library for plotting, sets tick and label positions, and allows for customization of y-axis labels and title. The total costs are calculated by summing the \"costs\" values from the \"results\" dictionary for each method in a specified order.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/plot.py\":93-121", + "content": " for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n fig, ax = plt.subplots(dpi=150, figsize=(3.75, 4))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n methods_labels = [\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT4\", \"GoT8\", \"GoTx\"]\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(methods_labels, fontsize=10)\n ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1)\n plt.yticks(fontsize=fig_fontsize)\n if display_left_ylabel:\n ax.set_ylabel(f\"Number of errors; the lower the better\", fontsize=fig_fontsize)\n ax.set_title(f\"Keyword Counting\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)" + }, + { + "comment": "This code is setting the y-axis limits and ticks for a graph, adding annotations for solved solutions, labeling the y-axis, and saving the figure with a specific file name. It also handles missing results by continuing to the next method in case one is not available. The purpose of this code is likely related to plotting a graph that compares different methods or models based on their performance (cost) and whether they solved the problem or not.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/plot.py\":122-157", + "content": " ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]\n ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"keyword_counting_{model}.pdf\", bbox_inches=\"tight\")\nplot_results(" + }, + { + "comment": "This code is calling a function named 'get_plotting_data' to retrieve data from the \"results/\" directory and generate a plot. The model used for this task is \"GPT-3.5\". The y-axis has an upper limit of 35, and the cost axis has an upper limit of 9. The function will display solved values on the plot and show left and right y-labels.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/plot.py\":158-166", + "content": " get_plotting_data(\"results/\"),\n display_solved=True,\n annotation_offset=-0.3,\n model=\"GPT-3.5\",\n y_upper=35,\n display_left_ylabel=True,\n display_right_ylabel=True,\n cost_upper=9,\n)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/9e090150-5d69-43c5-8187-800d2da3b4a1.json b/docs/doc/9e090150-5d69-43c5-8187-800d2da3b4a1.json new file mode 100644 index 0000000..f4dc330 --- /dev/null +++ b/docs/doc/9e090150-5d69-43c5-8187-800d2da3b4a1.json @@ -0,0 +1,35 @@ +{ + "summary": "The GoT framework is a Python 3.8+ language model that solves sorting problems, outputs JSON graphs, and provides detailed instructions for usage with real-world examples in the examples directory. Users are encouraged to star the repository, ask questions, provide feedback, and cite the reference when using it in other projects.", + "details": [ + { + "comment": "Installation instructions for the Graph of Thoughts (GoT) framework. Requires Python 3.8 or newer and can be installed directly from PyPI using pip.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/README.md\":0-19", + "content": "# Graph of Thoughts (GoT)\n

\n \n

\nThis is the official implementation of [Graph of Thoughts: Solving Elaborate Problems with Large Language Models](https://arxiv.org/pdf/2308.09687.pdf). \nThis framework gives you the ability to solve complex problems by modeling them as a Graph of Operations (GoO), which is automatically executed with a Large Language Model (LLM) as the engine. \nThis framework is designed to be flexible and extensible, allowing you to not only solve problems using the new GoT approach, but also to implement GoOs resembling previous approaches like CoT or ToT.\n## Setup Guide\nIn order to use this framework, you need to have a working installation of Python 3.8 or newer.\n### Installing GoT\nBefore running either of the following two installation methods, make sure to activate your Python environment (if any) beforehand. \nIf you are a user and you just want to use `graph_of_thoughts`, you can install it directly from PyPI:\n```bash\npip install graph_of_thoughts" + }, + { + "comment": "This code provides instructions for installing and configuring an LLM (Language Model) to use the Graph of Thoughts framework. The code also shows a quick start example for solving the sorting problem with a list of 32 numbers using a CoT-like approach, assuming the setup guide has been followed.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/README.md\":20-47", + "content": "```\nIf you are a developer and you want to modify the code, you can install it in editable mode from source:\n```bash\ngit clone https://github.com/spcl/graph-of-thoughts.git\ncd graph-of-thoughts\npip install -e .\n```\n### Configuring the LLM\nIn order to use the framework, you need to have access to an LLM.\nPlease follow the instructions in the [Controller README](graph_of_thoughts/controller/README.md) to configure the LLM of your choice.\n## Quick Start\nThe following code snippet shows how to use the framework to solve the sorting problem for a list of 32 numbers using a CoT-like approach. \nMake sure you have followed the [Setup Guide](#setup-guide) before running the code.\n```python\nfrom examples.sorting.sorting_032 import SortingPrompter, SortingParser, utils\nfrom graph_of_thoughts import controller, language_models, operations\n# Problem input\nto_be_sorted = \"[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]\"\n# Create the Graph of Operations\ngop = operations.GraphOfOperations()" + }, + { + "comment": "This code generates a graph of thoughts using the GoT approach. It appends operations to generate, score (using num_errors function), and ground truth (using test_sorting function). It then initializes a language model with an API key from config.json and creates a controller with given parameters. Finally, it runs the controller and outputs the graph in JSON format. The example problem input is provided for usage.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/README.md\":48-82", + "content": "gop.append_operation(operations.Generate())\ngop.append_operation(operations.Score(scoring_function=utils.num_errors))\ngop.append_operation(operations.GroundTruth(utils.test_sorting))\n# Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)\nlm = language_models.ChatGPT(\"config.json\", model_name=\"chatgpt\")\n# Create the Controller\nctrl = controller.Controller(\n lm, \n gop, \n SortingPrompter(), \n SortingParser(),\n # The following dictionary is used to configure the initial thought state\n {\n \"original\": to_be_sorted,\n \"current\": \"\",\n \"method\": \"cot\"\n }\n)\n# Run the Controller and generate the output graph\nctrl.run()\nctrl.output_graph(\"output_cot.json\")\n```\nTo run the more sophisticated GoT approach, you can use the following code snippet.\n```python\nfrom examples.sorting.sorting_032 import SortingPrompter, SortingParser, got, utils\nfrom graph_of_thoughts import controller, language_models, operations\n# Problem input\nto_be_sorted = \"[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]\"" + }, + { + "comment": "This code retrieves the Graph of Operations (gop), configures a language model (lm) using config.json, creates a Controller object (ctrl) with the necessary components, and runs the controller to generate output graphs \"output_cot.json\" and \"output_got.json\". The final thought states' scores in the output graphs indicate the number of errors in the sorted list. Read the documentation for more detailed information on the framework's individual modules.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/README.md\":84-115", + "content": "# Retrieve the Graph of Operations\ngop = got()\n# Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)\nlm = language_models.ChatGPT(\"config.json\", model_name=\"chatgpt\")\n# Create the Controller\nctrl = controller.Controller(\n lm, \n gop, \n SortingPrompter(), \n SortingParser(),\n # The following dictionary is used to configure the initial thought state\n {\n \"original\": to_be_sorted,\n \"current\": \"\",\n \"phase\": 0,\n \"method\": \"got\"\n }\n)\n# Run the Controller and generate the output graph\nctrl.run()\nctrl.output_graph(\"output_got.json\")\n```\nYou can compare the two results by inspecting the output graphs `output_cot.json` and `output_got.json`. \nThe final thought states' scores indicate the number of errors in the sorted list.\n## Documentation\nThe paper gives a high-level overview of the framework and its components. \nIn order to understand the framework in more detail, you can read the documentation of the individual modules. \nEspecially the [Controller](grap" + }, + { + "comment": "This code provides instructions on understanding and utilizing the framework, mentioning the importance of documentation for easy comprehension. It highlights the examples directory containing real-world problem solutions as a learning resource, with each example having a detailed README file. Additionally, it explains how to run the examples directly from the main directory and mentions that results will be stored in respective sub-directories. Lastly, it informs about running experiments from the paper through the examples directory.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/README.md\":115-132", + "content": "h_of_thoughts/controller/README.md) and [Operations](graph_of_thoughts/operations/README.md) modules are important for understanding how to make the most out of the framework. \nWe took extra care to fully document the code, so that you can easily understand how it works and how to extend it.\n## Examples\nThe [examples](examples) directory contains several examples of problems that can be solved using the framework, including the ones presented in the paper. \nIt is a great starting point for learning how to use the framework to solve real problems. \nEach example contains a `README.md` file with instructions on how to run it and play with it. The code is fully documented and should be easy to follow.\nYou can also run the examples straight from the main directory. Note that the results will be stored in the respective examples sub-directory.\nTry for instance:\n```bash\npython -m examples.sorting.sorting_032\npython -m examples.keyword_counting.keyword_counting\n```\n## Paper Results\nYou can run the experiments from the paper by following the instructions in the [examples](examples) directory. " + }, + { + "comment": "The code provides instructions to access the project's results, suggests using the 'paper' directory for inspection and replotting, encourages starring the repository if valuable, offers contact information for questions or feedback, and recommends citing the provided reference when using the work in other projects.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/README.md\":133-149", + "content": "However, if you just want to inspect and replot the results, you can use the [paper](paper) directory.\n## Citations\nIf you find this repository valuable, please give it a star! \nGot any questions or feedback? Feel free to reach out to [nils.blach@inf.ethz.ch](mailto:nils.blach@inf.ethz.ch) or open an issue. \nUsing this in your work? Please reference us using the provided citation:\n```bibtex\n@misc{besta2023got,\n title = {{Graph of Thoughts: Solving Elaborate Problems with Large Language Models}},\n author = {Besta, Maciej and Blach, Nils and Kubicek, Ales and Gerstenberger, Robert and Gianinazzi, Lukas and Gajda, Joanna and Lehmann, Tomasz and Podstawski, Micha{\\l} and Niewiadomski, Hubert and Nyczyk, Piotr and Hoefler, Torsten},\n year = 2023,\n eprinttype = {arXiv},\n eprint = {2308.09687}\n}\n```" + } + ] +} \ No newline at end of file diff --git a/docs/doc/9ef559b4-a179-4352-a8d1-a4f60266badc.json b/docs/doc/9ef559b4-a179-4352-a8d1-a4f60266badc.json new file mode 100644 index 0000000..28db899 --- /dev/null +++ b/docs/doc/9ef559b4-a179-4352-a8d1-a4f60266badc.json @@ -0,0 +1,75 @@ +{ + "summary": "The code generates a language model dataset by organizing country occurrences into popular and rest categories, cleaning paragraphs, checking for invalid elements, and storing the result in CSV format.", + "details": [ + { + "comment": "This function finds the indices of occurrences of a given country in an input text and returns them as a list of tuples containing index and country. The primary_countries variable is a list of countries used in the dataset.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":0-42", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Ales Kubicek\nimport csv\nfrom typing import List, Tuple\nfrom graph_of_thoughts import controller\ndef find_country_indices(text: str, country: str) -> List[Tuple[int, str]]:\n \"\"\"\n Finds the indices of the occurences of a given country in the input text.\n :param text: Input text.\n :type text: str\n :param country: Country to search for.\n :type country: str\n :return: List of tuples, where each tuple consists of index and country.\n :rtype: List[Tuple[int, str]]\n \"\"\"\n indices = []\n index = text.find(country)\n while index != -1:\n indices.append(index)\n index = text.find(country, index + 1)\n return [(index, country) for index in indices]\nprimary_countries = [\n \"Afghanistan\",\n \"Argentina\",\n \"Australia\",\n \"Brazil\",\n \"Canada\",\n \"China\",\n \"Colombia\",\n \"Cuba\",\n \"Egypt\"," + }, + { + "comment": "This code contains lists of countries and their corresponding primary adjectives. The countries list includes 46 nations, while the adjectives list has 28 items. These data can be used for keyword counting or other text processing tasks related to country-specific information.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":43-111", + "content": " \"France\",\n \"Germany\",\n \"Greece\",\n \"India\",\n \"Indonesia\",\n \"Iran\",\n \"Iraq\",\n \"Ireland\",\n \"Israel\",\n \"Italy\",\n \"Japan\",\n \"Kenya\",\n \"Mexico\",\n \"Netherlands\",\n \"New Zealand\",\n \"Nigeria\",\n \"North Korea\",\n \"Pakistan\",\n \"Peru\",\n \"Philippines\",\n \"Poland\",\n \"Portugal\",\n \"Russia\",\n \"Saudi Arabia\",\n \"South Africa\",\n \"South Korea\",\n \"Spain\",\n \"Sweden\",\n \"Switzerland\",\n \"Thailand\",\n \"Turkey\",\n \"Ukraine\",\n \"United Arab Emirates\",\n \"United Kingdom\",\n \"United States\",\n \"Venezuela\",\n \"Vietnam\",\n \"Yemen\",\n \"Zimbabwe\",\n \"Belgium\",\n \"Norway\",\n]\nprimary_adjectives = [\n \"Afghan\",\n \"Argentine \",\n \"Argentinean\",\n \"Australian\",\n \"Brazilian\",\n \"Canadian\",\n \"Chinese\",\n \"Colombian\",\n \"Cuban\",\n \"Egyptian\",\n \"French\",\n \"German\",\n \"Greek\",\n \"Indian\",\n \"Indonesian\",\n \"Iranian\",\n \"Iraqi\",\n \"Irish\",\n \"Israeli\",\n \"Italian\",\n \"Japanese\",\n \"Kenyan\",\n \"Mexican\",\n \"Dutch\",\n \"New Zealander \",\n \"Kiwi\"," + }, + { + "comment": "The code provides a list of countries divided into two sections: \"popular_countries\" containing widely recognized nations, and \"rest_countries\" containing the remaining countries. It appears to be used for organizing or filtering country data in an application or dataset.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":112-175", + "content": " \"Nigerian\",\n \"North Korean\",\n \"Pakistani\",\n \"Peruvian\",\n \"Filipino\",\n \"Philippine\",\n \"Polish\",\n \"Portuguese\",\n \"Russian\",\n \"Saudi \",\n \"Saudi Arabian\",\n \"South African\",\n \"South Korean\",\n \"Spanish\",\n \"Swedish\",\n \"Swiss\",\n \"Thai\",\n \"Turkish\",\n \"Ukrainian\",\n \"United Arab Emirates\",\n \"Emirati\",\n \"British\",\n \"American\",\n \"Venezuelan\",\n \"Vietnamese\",\n \"Yemeni\",\n \"Zimbabwean\",\n \"Belgian\",\n \"Norwegian\",\n]\nrest_countries = [\n \"Albania\",\n \"Algeria\",\n \"Andorra\",\n \"Angola\",\n \"Antigua and Barbuda\",\n \"Armenia\",\n \"Austria\",\n \"Azerbaijan\",\n \"The Bahamas\",\n \"Bahrain\",\n \"Bangladesh\",\n \"Barbados\",\n \"Belarus\",\n \"Belize\",\n \"Benin\",\n \"Bhutan\",\n \"Bolivia\",\n \"Bosnia and Herzegovina\",\n \"Botswana\",\n \"Brunei\",\n \"Bulgaria\",\n \"Burkina Faso\",\n \"Burundi\",\n \"Cabo Verde\",\n \"Cambodia\",\n \"Cameroon\",\n \"Central African Republic\",\n \"Chad\",\n \"Chile\",\n \"Comoros\",\n \"Congo\",\n \"Costa Rica\",\n \"C\u00f4te d\u2019Ivoire\"," + }, + { + "comment": "The code includes a list of country names in alphabetical order. Each country name is separated by a comma, and some countries have multiple names listed for different uses or recognition.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":176-242", + "content": " \"Croatia\",\n \"Cyprus\",\n \"Czech Republic\",\n \"Czechia\",\n \"Denmark\",\n \"Djibouti\",\n \"Dominica\",\n \"Dominican Republic\",\n \"East Timor\",\n \"Timor-Leste\",\n \"Ecuador\",\n \"El Salvador\",\n \"Equatorial Guinea\",\n \"Eritrea\",\n \"Estonia\",\n \"Eswatini\",\n \"Ethiopia\",\n \"Fiji\",\n \"Finland\",\n \"Gabon\",\n \"The Gambia\",\n \"Georgia\",\n \"Ghana\",\n \"Grenada\",\n \"Guatemala\",\n \"Guinea\",\n \"Guinea-Bissau\",\n \"Guyana\",\n \"Haiti\",\n \"Honduras\",\n \"Hungary\",\n \"Iceland\",\n \"Jamaica\",\n \"Jordan\",\n \"Kazakhstan\",\n \"Kiribati\",\n \"Kosovo\",\n \"Kuwait\",\n \"Kyrgyzstan\",\n \"Laos\",\n \"Latvia\",\n \"Lebanon\",\n \"Lesotho\",\n \"Liberia\",\n \"Libya\",\n \"Liechtenstein\",\n \"Lithuania\",\n \"Luxembourg\",\n \"Madagascar\",\n \"Malawi\",\n \"Malaysia\",\n \"Maldives\",\n \"Mali\",\n \"Malta\",\n \"Marshall Islands\",\n \"Mauritania\",\n \"Mauritius\",\n \"Micronesia\",\n \"Moldova\",\n \"Monaco\",\n \"Mongolia\",\n \"Montenegro\",\n \"Morocco\",\n \"Mozambique\",\n \"Myanmar\",\n \"Burma\",\n \"Namibia\"," + }, + { + "comment": "This code defines a list of countries and their corresponding adjectives, which can be used to generate diverse language datasets.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":243-305", + "content": " \"Nauru\",\n \"Nepal\",\n \"Nicaragua\",\n \"Niger\",\n \"North Macedonia\",\n \"Oman\",\n \"Palau\",\n \"Panama\",\n \"Papua New Guinea\",\n \"Paraguay\",\n \"Qatar\",\n \"Romania\",\n \"Rwanda\",\n \"Saint Kitts and Nevis\",\n \"Saint Lucia\",\n \"Saint Vincent and the Grenadines\",\n \"Samoa\",\n \"San Marino\",\n \"Sao Tome and Principe\",\n \"Senegal\",\n \"Serbia\",\n \"Seychelles\",\n \"Sierra Leone\",\n \"Singapore\",\n \"Slovakia\",\n \"Slovenia\",\n \"Solomon Islands\",\n \"Somalia\",\n \"Sri Lanka\",\n \"Sudan\",\n \"Suriname\",\n \"Syria\",\n \"Taiwan\",\n \"Tajikistan\",\n \"Tanzania\",\n \"Togo\",\n \"Tonga\",\n \"Trinidad and Tobago\",\n \"Tunisia\",\n \"Turkmenistan\",\n \"Tuvalu\",\n \"Uganda\",\n \"Uruguay\",\n \"Uzbekistan\",\n \"Vanuatu\",\n \"Vatican City\",\n \"Zambia\",\n]\nrest_adjectives = [\n \"Albanian\",\n \"Algerian\",\n \"Andorran\",\n \"Angolan\",\n \"Antiguan and Barbudan\",\n \"Armenian\",\n \"Austrian\",\n \"Azerbaijani\",\n \"Bahamian\",\n \"Bahraini\",\n \"Bangladeshi\",\n \"Barbadian\",\n \"Belarusian\",\n \"Belizean\"," + }, + { + "comment": "This code lists various country names and their corresponding adjective forms, used for identifying nationality or origin.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":306-369", + "content": " \"Beninese\",\n \"Bhutanese\",\n \"Bolivian\",\n \"Bosnian and Herzegovinian\",\n \"Botswanan\",\n \"Bruneian\",\n \"Bulgarian\",\n \"Burkinab\u00e8\",\n \"Burundian\",\n \"Cape Verdean\",\n \"Cambodian\",\n \"Cameroonian\",\n \"Central African\",\n \"Chadian\",\n \"Chilean\",\n \"Comorian\",\n \"Congolese\",\n \"Costa Rican\",\n \"Ivorian\",\n \"Croatian\",\n \"Cypriot\",\n \"Czech\",\n \"Czech\",\n \"Danish\",\n \"Djiboutian\",\n \"Dominican\",\n \"Dominican\",\n \"East Timorese\",\n \"Timorese\",\n \"Ecuadorian\",\n \"Salvadoran\",\n \"Equatorial Guinean\",\n \"Eritrean\",\n \"Estonian\",\n \"Swazi\",\n \"Ethiopian\",\n \"Fijian\",\n \"Finnish\",\n \"Gabonese\",\n \"Gambian\",\n \"Georgian\",\n \"Ghanaian\",\n \"Grenadian\",\n \"Guatemalan\",\n \"Guinean\",\n \"Bissau-Guinean\",\n \"Guyanese\",\n \"Haitian\",\n \"Honduran\",\n \"Hungarian\",\n \"Icelandic\",\n \"Jamaican\",\n \"Jordanian\",\n \"Kazakh\",\n \"I-Kiribati\",\n \"Kosovar\",\n \"Kuwaiti\",\n \"Kyrgyz\",\n \"Laotian\",\n \"Latvian\",\n \"Lebanese\",\n \"Basotho\",\n \"Liberian\",\n \"Libyan\"," + }, + { + "comment": "This code defines a list of country names and their associated adjectival forms, used for keyword counting in a dataset.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":370-432", + "content": " \"Liechtensteiner\",\n \"Lithuanian\",\n \"Luxembourger\",\n \"Malagasy\",\n \"Malawian\",\n \"Malaysian\",\n \"Maldivian\",\n \"Malian\",\n \"Maltese\",\n \"Marshallese\",\n \"Mauritanian\",\n \"Mauritian\",\n \"Micronesian\",\n \"Moldovan\",\n \"Mon\u00e9gasque\",\n \"Mongolian\",\n \"Montenegrin\",\n \"Moroccan\",\n \"Mozambican\",\n \"Myanmarese\",\n \"Burmese\",\n \"Namibian\",\n \"Nauruan\",\n \"Nepali\",\n \"Nicaraguan\",\n \"Nigerien\",\n \"Macedonian\",\n \"Omani\",\n \"Palauan\",\n \"Panamanian\",\n \"Papua New Guinean\",\n \"Paraguayan\",\n \"Qatari\",\n \"Romanian\",\n \"Rwandan\",\n \"Kittitian\",\n \"Nevisian\",\n \"Saint Lucian\",\n \"Vincentian\",\n \"Samoan\",\n \"Sammarinese\",\n \"Santomean\",\n \"Senegalese\",\n \"Serbian\",\n \"Seychellois\",\n \"Sierra Leonean\",\n \"Singaporean\",\n \"Slovak\",\n \"Slovenian\",\n \"Solomon Islander\",\n \"Somali\",\n \"Sri Lankan\",\n \"Sudanese\",\n \"Surinamese\",\n \"Syrian\",\n \"Taiwanese\",\n \"Tajik\",\n \"Tanzanian\",\n \"Togolese\",\n \"Tongan\",\n \"Trinidadian \",\n \"Tobagonian\",\n \"Tunisian\"," + }, + { + "comment": "This code generates a prompt for an AI language model to create a continuous passage with 16 sentences using a provided list of countries and specific restrictions. The generated text should mention the countries multiple times consecutively, be creative and coherent, and avoid using adjectives for the countries.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":433-459", + "content": " \"Turkmen\",\n \"Tuvaluan\",\n \"Ugandan\",\n \"Uruguayan\",\n \"Uzbek\",\n \"Ni-Vanuatu\",\n \"Vatican\",\n \"Zambian\",\n]\nlm = controller.ChatGPT(\n \"../../graph_of_thoughts/controller/config.json\", model_name=\"chatgpt4\"\n)\nprompt = \"\"\" Generate a continuous passage (single paragraph) of 16 sentences following the provided restrictions precisely. \n\nThe following restrictions must apply to the generated text:\n1. Single continuous passage of exactly 16 sentences without any paragraphs (line breaks).\n2. Countries appearing in the passage must be only from the provided list. No other countries can be mentioned.\n3. When a country is mentioned in the passage, it must be mentioned multiple times consecutively in the same or following sentences.\n4. Passage should be creative and coherent.\n5. Using adjectives of a country is NOT allowed (e.g., \"Colombian coffee\" should be \"coffee from Colombia\" instead)\n\n\nList of countries: [Afghanistan, Argentina" + }, + { + "comment": "The code defines a list containing the names of countries. These country names are used in various parts of the program to handle data related to specific countries.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":459-461", + "content": ", Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway]\nPassage:\nWhile exploring the ancient ruins in Greece, Sam discovered manuscripts that hinted at the hidden treasures of Egypt. It seemed these treasures were once stolen from Egypt by rogue merchants and secretly moved to Greece, only to be buried under layers of time. Intrigued, he shared the findings with his friend Maya from India, who was an expert in decoding ancient languages. She pointed out that there was a similar legend in India about treasures from China that had somehow ended up in the southern parts of India, possibly" + }, + { + "comment": "Code snippet describes a conversation between Sam and Maya discussing historical connections between different countries through trade and cultural exchanges.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":461-461", + "content": " through trade or conquest. She also recounted tales from China that spoke of incredible artifacts from Indonesia, suggesting a rich tapestry of cultural exchanges throughout history. Their conversation took an interesting turn when Sam mentioned a book he'd read about the mysterious connections between Argentina and Brazil. The book detailed how both Argentina and Brazil, despite their differences, shared tales of lost civilizations and forgotten cities deep within their jungles. Maya excitedly mentioned that she'd been to the Philippines and had heard local legends about ancient ties with Indonesia and how traders from the Philippines would journey to Indonesia in search of spices and other goods. Thinking of spices, Sam fondly recalled his trip to Spain, where he had learned about the country's historical links with Portugal. Spain and Portugal, both maritime giants of their time, had extensively explored unknown lands and established trade routes. Maya, remembering her travels, sai" + }, + { + "comment": "This code generates a dataset of samples, where each sample contains an ID, text, list of countries mentioned, number of sentences, and number of characters. It will generate 100 samples with incrementing IDs. The provided list of countries serves as the pool from which countries will be randomly selected for each sample's text.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":461-470", + "content": "d that she had been to Belgium once and was fascinated by its connections with the Netherlands. Both Belgium and the Netherlands, she explained, had rich histories of art, trade, and diplomacy that intertwined them for centuries. They both sat back, marveling at the interconnectedness of the world and how countries from Greece to the Netherlands shared tales of adventure, discovery, and mystery.\n\nList of countries: [Afghanistan, Argentina, Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway]\nPassage:\n\"\"\"\nnum_samples = 100\nsample_id = 0\nresult = [[\"ID\", \"Text\", \"Countries\", \"Sentences\", \"Characters\"]]" + }, + { + "comment": "This code generates passages containing country names for keyword counting. It iterates through a given number of samples, queries the language model (lm) for responses, cleans paragraphs by removing newlines and extra spaces, finds all occurrences of primary countries in each text, orders them based on their appearance in the text, and checks for invalid countries or adjectives.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":472-498", + "content": "\"\"\"\nGenerate passages of text that contain country names to be used as input for the\nkeyword counting.\nInput(x) : Number of samples\nOutput(y) : Passages written to a file in the CSV format.\n File contains the sample ID, the passage, the countries the passage\n contains, the sentences of the passages, number of characters of the\n passage.\n\"\"\"\n# For x batches of y responses\nfor _ in range(num_samples):\n response = lm.query(prompt, 1)\n texts = lm.get_response_texts(response)\n for text in texts:\n # Clean paragraphs - single long passage\n text = text.strip().replace(\"\\n\", \"\")\n # Get all occurrences of all primary permissible countries\n occurrences = []\n for country in [country for country in primary_countries if country in text]:\n occurrences.extend(find_country_indices(text, country))\n # Order exactly how they appear in the text\n ordered_occurrences = [country[1] for country in sorted(occurrences)]\n # Check invalid countries and adjectives" + }, + { + "comment": "This code segment checks for invalid primary adjectives, rest countries, and rest adjectives in the text. It counts their occurrences, and if any of them are present, it prints a message with details about the invalid elements found. If there are no invalid elements, it adds the sample (with its ID, text, ordered occurrences, number of sentences, and total length) to the result list. The code continues to the next iteration, and after processing all samples, it will write the final result to a CSV file.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":499-531", + "content": " invalid_primary_adjective = [\n adjective for adjective in primary_adjectives if adjective in text\n ]\n invalid_rest_country = [\n country for country in rest_countries if country in text\n ]\n invalid_rest_adjective = [\n adjective for adjective in rest_adjectives if adjective in text\n ]\n invalid_count = (\n len(invalid_primary_adjective)\n + len(invalid_rest_country)\n + len(invalid_rest_adjective)\n )\n if invalid_count > 0:\n print(\n f\"Invalid countries or adjectives present: {invalid_primary_adjective}, {invalid_rest_country}, {invalid_rest_adjective}\"\n )\n continue\n result.append(\n [\n sample_id,\n text,\n \"[{0}]\".format(\", \".join(map(str, ordered_occurrences))),\n len(text.split(\".\")) - 1,\n len(text),\n ]\n )\n sample_id += 1\n# Writing to csv file" + }, + { + "comment": "This code writes the result to a CSV file named \"countries_script.csv\". It opens the file in write mode (\"w\"), creates a CSV writer object, and uses the writerows() method to write each row of the result variable to the CSV file.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/dataset_gen_countries.py\":532-534", + "content": "with open(\"countries_script.csv\", \"w\") as csvfile:\n csvwriter = csv.writer(csvfile)\n csvwriter.writerows(result)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/a23616f6-ad8f-4cc1-8642-d64d19c48cde.json b/docs/doc/a23616f6-ad8f-4cc1-8642-d64d19c48cde.json new file mode 100644 index 0000000..36b7290 --- /dev/null +++ b/docs/doc/a23616f6-ad8f-4cc1-8642-d64d19c48cde.json @@ -0,0 +1,20 @@ +{ + "summary": "The code provides a data generator for set intersections and allows users to customize parameters, storing results in JSON files. To visualize the data, modify the results directory and length parameter in `plot.py` before executing the script.", + "details": [ + { + "comment": "This code provides different approaches for computing set intersection and offers a data generator to create samples. It includes IO, Chain-of-Thought (CoT), Tree of Thought (ToT) with two variations, and Graph of Thoughts (GoT). The user can specify the number of elements, upper bound, seed, number of samples, and output filename. The code is structured into separate files for each set length.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/README.md\":0-28", + "content": "# Set Intersection\nThe use case in this directory computes the intersection of two input\nsets. We provide implementations of five different approaches for 32, 64\nand 128 elements:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT):\n - ToT: wider tree, meaning more branches per level\n - ToT2: tree with more levels, but fewer branches per level\n- Graph of Thoughts (GoT)\n## Data\nWe provide input files with 100 precomputed samples for each set length:\n`set_intersection_.csv`. It is also possible to use\nthe data generator `dataset_gen_intersection.py` to generate additional or\ndifferent samples. The parameters can be updated in lines 24 to 28 of\nthe main body:\n- set_size = 32 # size of the generated sets\n- int_value_ubound = 64 # (exclusive) upper limit of generated numbers\n- seed = 42 # seed of the random number generator\n- num_sample = 100 # number of samples\n- filename = 'set_intersection_032.csv' # output filename\n## Execution\nThe files to execute the use case are called\n`set_intersection_.py`. In the main body, one can" + }, + { + "comment": "This code selects samples and approaches, allows budget setting, hardcodes input filename, creates directories for execution-specific files, and stores the Graph Reasoning State (GRS) for each sample in separate JSON files.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/README.md\":29-45", + "content": "select the specific samples to be run (variable sample) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe input filename for the samples is currently hardcoded to\n`set_intersection_.csv`, but can be updated in the\nfunction `run`.\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are\ncreated. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample." + }, + { + "comment": "This code snippet instructs the user to modify the results directory in line 170 of `plot.py` and adjust the length parameter accordingly before executing `python3 plot.py` to visualize their data.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/set_intersection/README.md\":47-51", + "content": "## Plot Data\nChange the results directory in line 170 of `plot.py` and update the\nlength parameter in the subsequent line and run `python3 plot.py` to\nplot your data." + } + ] +} \ No newline at end of file diff --git a/docs/doc/a9ed2c0d-bced-4bd8-8e98-f89309111026.json b/docs/doc/a9ed2c0d-bced-4bd8-8e98-f89309111026.json new file mode 100644 index 0000000..9f8006b --- /dev/null +++ b/docs/doc/a9ed2c0d-bced-4bd8-8e98-f89309111026.json @@ -0,0 +1,25 @@ +{ + "summary": "The Thought class represents an LLM thought with attributes including state, score, validity flag, and solution flag. It includes methods for initializing new instances and cloning existing thoughts, as well as properties for validity, score, and solved flag management.", + "details": [ + { + "comment": "This code defines a `Thought` class that represents an LLM thought with its state, constructed by the parser, and various flags. The class has instance attributes including a logger, unique ID, state, score, validity flag, solution flag, and a method to initialize a new Thought instance with a state and default flags if none provided.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/thought.py\":0-34", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom __future__ import annotations\nimport logging\nfrom typing import Iterator, Dict, Optional\nimport itertools\nclass Thought:\n \"\"\"\n Represents an LLM thought with its state, constructed by the parser, and various flags.\n \"\"\"\n _ids: Iterator[int] = itertools.count(0)\n def __init__(self, state: Optional[Dict] = None) -> None:\n \"\"\"\n Initializes a new Thought instance with a state and various default flags.\n :param state: The state of the thought. Defaults to None.\n :type state: Optional[Dict]\n \"\"\"\n self.logger: logging.Logger = logging.getLogger(self.__class__.__name__)\n self.id: int = next(Thought._ids)\n self.state: Dict = state\n self._score: float = 0.0\n self._valid: bool = False\n self._solved: bool = False\n self.scored: bool = False" + }, + { + "comment": "This code defines a Thought class with properties like state, score, validity, solved status, scoring information, and comparison to ground truth. The class also has a static method `from_thought` to create a new thought from an existing one by cloning its properties. The `valid` property is a boolean representing the validity of the thought, which can be accessed using the `@property` decorator and modified with the `@valid.setter` decorator.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/thought.py\":35-67", + "content": " self.validated: bool = False\n self.compared_to_ground_truth: bool = False\n @staticmethod\n def from_thought(thought: Thought) -> Thought:\n \"\"\"\n Creates a new thought from an existing one.\n :param thought: An instance of a Thought to clone.\n :return: A new Thought instance with properties copied from the input thought.\n \"\"\"\n new_thought = Thought(thought.state)\n new_thought.score = thought.score\n new_thought.valid = thought.valid\n new_thought.solved = thought.solved\n new_thought.scored = thought.scored\n new_thought.validated = thought.validated\n new_thought.compared_to_ground_truth = thought.compared_to_ground_truth\n return new_thought\n @property\n def valid(self) -> bool:\n \"\"\"\n Returns the validity of the thought.\n :return: The validity of the thought.\n :rtype: bool\n \"\"\"\n return self._valid\n @valid.setter\n def valid(self, valid: bool) -> None:\n \"\"\"" + }, + { + "comment": "This code defines a Thought class with properties for validity, score, and solved flag. The valid property can be set and gets the validity of the thought. The score property returns and sets the score of the thought, marking it as scored when updated. The solved property returns and sets the solved flag of the thought, also marking it as compared_to_ground_truth when updated.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/thought.py\":68-110", + "content": " Sets the validity of the thought and the validated flag.\n :param valid: The validity of the thought.\n :type valid: bool\n \"\"\"\n self.validated = True\n self._valid = valid\n @property\n def score(self) -> float:\n \"\"\"\n Returns the score of the thought.\n :return: The score of the thought.\n :rtype: float\n \"\"\"\n return self._score\n @score.setter\n def score(self, new_score: float) -> None:\n \"\"\"\n Sets the score of the thought and the scored flag.\n :param new_score: The score of the thought.\n :type new_score: float\n \"\"\"\n self.scored = True\n self._score = new_score\n @property\n def solved(self) -> bool:\n \"\"\"\n Returns the solved flag of the thought.\n :return: The solved flag of the thought.\n :rtype: bool\n \"\"\"\n return self._solved\n @solved.setter\n def solved(self, solved: bool) -> None:\n \"\"\"\n Sets the solved flag of the thought and the compared_to_ground_truth flag." + }, + { + "comment": "Method defining a Thought object with a boolean parameter \"solved\" indicating if it contains a solution to the problem. The method sets self.compared_to_ground_truth to True and assigns the value of solved to self._solved.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/thought.py\":112-116", + "content": " :param solved: Whether the thought contains a solution to the problem.\n :type solved: bool\n \"\"\"\n self.compared_to_ground_truth = True\n self._solved = solved" + } + ] +} \ No newline at end of file diff --git a/docs/doc/b1ebbf1c-ed23-4a46-a071-011a2df7c24c.json b/docs/doc/b1ebbf1c-ed23-4a46-a071-011a2df7c24c.json new file mode 100644 index 0000000..c872d33 --- /dev/null +++ b/docs/doc/b1ebbf1c-ed23-4a46-a071-011a2df7c24c.json @@ -0,0 +1,30 @@ +{ + "summary": "The code initializes the LLaMA 2 model for text generation, sets up configurations and tokenizer, creates a pipeline, defines a method to generate responses by querying the model, formats responses into dictionaries, and extracts \"generated_text\" from multiple query response dictionaries.", + "details": [ + { + "comment": "The code imports necessary libraries, defines a class Llama2HF as an interface for using LLaMA 2 models through HuggingFace library, and initializes the class with configuration, model name, and caching options.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/llamachat_hf.py\":0-30", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Ales Kubicek\nimport os\nimport torch\nfrom typing import List, Dict, Union\nfrom .abstract_language_model import AbstractLanguageModel\nclass Llama2HF(AbstractLanguageModel):\n \"\"\"\n An interface to use LLaMA 2 models through the HuggingFace library.\n \"\"\"\n def __init__(\n self, config_path: str = \"\", model_name: str = \"llama7b-hf\", cache: bool = False\n ) -> None:\n \"\"\"\n Initialize an instance of the Llama2HF class with configuration, model details, and caching options.\n :param config_path: Path to the configuration file. Defaults to an empty string.\n :type config_path: str\n :param model_name: Specifies the name of the LLaMA model variant. Defaults to \"llama7b-hf\".\n Used to select the correct configuration.\n :type model_name: str\n :param cache: Flag to determine whether to cache responses. Defaults to False." + }, + { + "comment": "The code initializes a class and sets various attributes such as model_id, prompt and response token costs, temperature, top K sampling, and maximum tokens. It also sets the Transformers library cache environment variable before importing it to avoid conflicts with other caches.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/llamachat_hf.py\":31-52", + "content": " :type cache: bool\n \"\"\"\n super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # Detailed id of the used model.\n self.model_id: str = self.config[\"model_id\"]\n # Costs for 1000 tokens.\n self.prompt_token_cost: float = self.config[\"prompt_token_cost\"]\n self.response_token_cost: float = self.config[\"response_token_cost\"]\n # The temperature is defined as the randomness of the model's output.\n self.temperature: float = self.config[\"temperature\"]\n # Top K sampling.\n self.top_k: int = self.config[\"top_k\"]\n # The maximum number of tokens to generate in the chat completion.\n self.max_tokens: int = self.config[\"max_tokens\"]\n # Important: must be done before importing transformers\n os.environ[\"TRANSFORMERS_CACHE\"] = self.config[\"cache_dir\"]\n import transformers\n hf_model_id = f\"meta-llama/{self.model_id}\"\n model_config = transformers.AutoConfig.from_pretrained(hf_model_id)" + }, + { + "comment": "The code initializes an LLaMA model for text generation, loads the tokenizer and model configurations, and creates a text generation pipeline. It also provides a function to query the model with a given input query and can generate multiple responses depending on the provided number of desired responses.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/llamachat_hf.py\":53-81", + "content": " bnb_config = transformers.BitsAndBytesConfig(\n load_in_4bit=True,\n bnb_4bit_quant_type=\"nf4\",\n bnb_4bit_use_double_quant=True,\n bnb_4bit_compute_dtype=torch.bfloat16,\n )\n self.tokenizer = transformers.AutoTokenizer.from_pretrained(hf_model_id)\n self.model = transformers.AutoModelForCausalLM.from_pretrained(\n hf_model_id,\n trust_remote_code=True,\n config=model_config,\n quantization_config=bnb_config,\n device_map=\"auto\",\n )\n self.model.eval()\n torch.no_grad()\n self.generate_text = transformers.pipeline(\n model=self.model, tokenizer=self.tokenizer, task=\"text-generation\"\n )\n def query(self, query: str, num_responses: int = 1) -> List[Dict]:\n \"\"\"\n Query the LLaMA 2 model for responses.\n :param query: The query to be posed to the language model.\n :type query: str\n :param num_responses: Number of desired responses, default is 1." + }, + { + "comment": "This code defines a method that generates responses from the LLaMA 2 language model. It first checks if the response is cached, then creates a query with system instructions and input. It generates multiple responses using the `generate_text` function, stores them in a list, and formats them into a response dictionary. Finally, it caches the response if necessary.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/llamachat_hf.py\":82-106", + "content": " :type num_responses: int\n :return: Response(s) from the LLaMA 2 model.\n :rtype: List[Dict]\n \"\"\"\n if self.cache and query in self.respone_cache:\n return self.respone_cache[query]\n sequences = []\n query = f\"<>You are a helpful assistant. Always follow the intstructions precisely and output the response exactly in the requested format.<>\\n\\n[INST] {query} [/INST]\"\n for _ in range(num_responses):\n sequences.extend(\n self.generate_text(\n query,\n do_sample=True,\n top_k=self.top_k,\n num_return_sequences=1,\n eos_token_id=self.tokenizer.eos_token_id,\n max_length=self.max_tokens,\n )\n )\n response = [\n {\"generated_text\": sequence[\"generated_text\"][len(query) :].strip()}\n for sequence in sequences\n ]\n if self.cache:\n self.respone_cache[query] = response" + }, + { + "comment": "This function takes a list of query response dictionaries, extracts the \"generated_text\" key from each dictionary and returns a list of those extracted texts.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/llamachat_hf.py\":107-118", + "content": " return response\n def get_response_texts(self, query_responses: List[Dict]) -> List[str]:\n \"\"\"\n Extract the response texts from the query response.\n :param query_responses: The response list of dictionaries generated from the `query` method.\n :type query_responses: List[Dict]\n :return: List of response strings.\n :rtype: List[str]\n \"\"\"\n return [query_response[\"generated_text\"] for query_response in query_responses]" + } + ] +} \ No newline at end of file diff --git a/docs/doc/b3d07cc5-ef2d-4fc4-aba2-4324001369f1.json b/docs/doc/b3d07cc5-ef2d-4fc4-aba2-4324001369f1.json new file mode 100644 index 0000000..1364742 --- /dev/null +++ b/docs/doc/b3d07cc5-ef2d-4fc4-aba2-4324001369f1.json @@ -0,0 +1,10 @@ +{ + "summary": "This code imports classes from different modules within the \"graph-of-thoughts\" package to be used in operations. It includes classes for Thought, GraphOfOperations, Operation, Score, ValidateAndImprove, Generate, Aggregate, KeepBestN, KeepValid, Selector, GroundTruth, and Improve.", + "details": [ + { + "comment": "This code imports classes from different modules within the \"graph-of-thoughts\" package to be used in operations. It includes classes for Thought, GraphOfOperations, Operation, Score, ValidateAndImprove, Generate, Aggregate, KeepBestN, KeepValid, Selector, GroundTruth, and Improve.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/operations/__init__.py\":0-13", + "content": "from .thought import Thought\nfrom .graph_of_operations import GraphOfOperations\nfrom .operations import (\n Operation,\n Score,\n ValidateAndImprove,\n Generate,\n Aggregate,\n KeepBestN,\n KeepValid,\n Selector,\n GroundTruth,\n Improve,\n)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/ba444083-0879-4ed5-bd02-c751b6e84203.json b/docs/doc/ba444083-0879-4ed5-bd02-c751b6e84203.json new file mode 100644 index 0000000..bab6b03 --- /dev/null +++ b/docs/doc/ba444083-0879-4ed5-bd02-c751b6e84203.json @@ -0,0 +1,20 @@ +{ + "summary": "The code defines a function that converts string-encoded lists to Python integer lists and tests if the solution matches ground truth. A helper function checks sorted lists by comparing adjacent elements, returning error count as score; defaults to 300 in case of exception.", + "details": [ + { + "comment": "This code defines a function to convert a list encoded inside a string into a Python list object of integer elements. It also contains a helper function that tests whether the final solution matches the ground truth, taking a thought state as input and returning a boolean result.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/utils.py\":0-34", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom typing import Dict, List\ndef string_to_list(string: str) -> List[int]:\n \"\"\"\n Helper function to convert a list encoded inside a string into a Python\n list object of string elements.\n :param string: Input string containing a list.\n :type string: str\n :return: List of string elements.\n :rtype: List[str]\n :raise AssertionError: If input string does not contain a list.\n \"\"\"\n assert string[0] == \"[\" and string[-1] == \"]\", \"String is not a list.\"\n return [int(num) for num in string[1:-1].split(\",\")]\ndef test_sorting(state: Dict) -> bool:\n \"\"\"\n Function to test whether the final solution matches ground truth.\n :param state: Thought state that represents the final solution.\n :type state: Dict\n :return: Returns whether the solution matches the ground truth.\n :rtype: bool" + }, + { + "comment": "Function to check if a given list is correctly sorted. If not, returns the number of errors as score.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/utils.py\":35-69", + "content": " \"\"\"\n try:\n correct_list = sorted(string_to_list(state[\"original\"]))\n sorted_list = string_to_list(state[\"current\"])\n return sorted_list == correct_list\n except:\n return False\ndef num_errors(state: Dict) -> float:\n \"\"\"\n Function to locally count the number of errors that serves as a score.\n :param state: Thought state to be scored.\n :type state: Dict\n :return: Number of errors.\n :rtype: float\n \"\"\"\n try:\n unsorted_list = state[\"original\"]\n if (\n \"unsorted_sublist\" in state\n and state[\"unsorted_sublist\"] != \"\"\n and state[\"unsorted_sublist\"] is not None\n and len(state[\"unsorted_sublist\"]) < len(unsorted_list) - 5\n ):\n unsorted_list = state[\"unsorted_sublist\"]\n correct_list = sorted(string_to_list(unsorted_list))\n current_list = string_to_list(state[\"current\"])\n num_errors = 0\n for i in range(10):\n num_errors += abs(\n sum([1 for num in current_list if num == i])" + }, + { + "comment": "This code calculates the number of errors in a sorted list by comparing adjacent elements. It uses list comprehensions and built-in Python functions like zip() and sum(). If an exception occurs, it returns 300 as a default value for num_errors.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/utils.py\":70-77", + "content": " - sum([1 for num in correct_list if num == i])\n )\n num_errors += sum(\n [1 for num1, num2 in zip(current_list, current_list[1:]) if num1 > num2]\n )\n return num_errors\n except:\n return 300" + } + ] +} \ No newline at end of file diff --git a/docs/doc/bc29c96d-2474-4e87-90c7-ba3a25757fee.json b/docs/doc/bc29c96d-2474-4e87-90c7-ba3a25757fee.json new file mode 100644 index 0000000..17addc9 --- /dev/null +++ b/docs/doc/bc29c96d-2474-4e87-90c7-ba3a25757fee.json @@ -0,0 +1,15 @@ +{ + "summary": "The code offers a frequency computation method for countries in text using seven approaches, utilizing 'countries.csv' as input, and allows for custom samples, budgets, and directory creation with log files.", + "details": [ + { + "comment": "This code provides a use case for computing the frequencies of occurring countries in a long passage of text using seven different approaches including IO, Chain-of-Thought (CoT), Tree of Thought (ToT) with variations, and Graph of Thoughts (GoT) with variations. It uses an input file named 'countries.csv' and provides a data generator for additional or different samples. The code to execute the use case is called 'keyword_counting.py'.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/README.md\":0-25", + "content": "# Keyword Counting\nThe use case in this directory computes the frequencies of occurring countries \nin a long passage of text. We provide implementations of seven different approaches:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT):\n - ToT: wider tree, meaning more branches per level\n - ToT2: tree with more levels, but fewer branches per level\n- Graph of Thoughts (GoT):\n - GoT4: split passage into 4 sub-passages\n - GoT8: split passage into 8 sub-passages\n - GoTx: split by sentences\n## Data\nWe provide an input file with 100 samples: `countries.csv`. It is also possible to use\nthe data generator `dataset_gen_countries.py` to generate additional or\ndifferent samples (using GPT-4). The parameters can be updated on line 54 (number of samples to be generated). \nNote that not every generated sample will be included in the dataset, as each sample is \nadditionally tested for validity (observe script output for details).\n## Execution\nThe file to execute the use case is called\n`keyword_counting.py`. In the main body, one can" + }, + { + "comment": "The code selects specific samples, approaches, and sets a budget for running Python scripts. It creates directories for each run with `config.json` and `log.log` files containing LLM prompts/responses and GRS data for samples. Change the results directory in line 150 of `plot.py` to plot data by running `python3 plot.py`.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/keyword_counting/README.md\":26-44", + "content": "select the specific samples to be run (variable samples) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are\ncreated. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 150 of `plot.py` and run `python3\nplot.py` to plot your data." + } + ] +} \ No newline at end of file diff --git a/docs/doc/bceed44f-7aac-4b5f-a653-e8689e5d160c.json b/docs/doc/bceed44f-7aac-4b5f-a653-e8689e5d160c.json new file mode 100644 index 0000000..86d1e6d --- /dev/null +++ b/docs/doc/bceed44f-7aac-4b5f-a653-e8689e5d160c.json @@ -0,0 +1,10 @@ +{ + "summary": "This line is importing the \"Controller\" class from the \"controller\" module, which is located in the same package directory. This likely means that this code is part of a larger application where different modules handle different aspects of the program's functionality, and the \"Controller\" class manages some specific part or feature of the app.", + "details": [ + { + "comment": "This line is importing the \"Controller\" class from the \"controller\" module, which is located in the same package directory. This likely means that this code is part of a larger application where different modules handle different aspects of the program's functionality, and the \"Controller\" class manages some specific part or feature of the app.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/controller/__init__.py\":0-0", + "content": "from .controller import Controller" + } + ] +} \ No newline at end of file diff --git a/docs/doc/c275354a-cb53-400e-9eeb-4b68c825d129.json b/docs/doc/c275354a-cb53-400e-9eeb-4b68c825d129.json new file mode 100644 index 0000000..fa9b4a1 --- /dev/null +++ b/docs/doc/c275354a-cb53-400e-9eeb-4b68c825d129.json @@ -0,0 +1,15 @@ +{ + "summary": "The code provides a generic language model configuration template, including parameters for model ID, prompt and response token costs, temperature, max tokens, stop words, cache directory (\"/llama\"), and optional values (top-k=10). This is a user-specific config without API key or organization.", + "details": [ + { + "comment": "This code appears to be a configuration template for language models, with each model (such as \"chatgpt\", \"chatgpt4\", \"llama7b-hf\", etc.) defined by its own set of parameters including the model ID, prompt and response token costs, temperature, max tokens, and optional stop words. The \"cache_dir\" parameter is specific to Llama models, suggesting these models require local caching. The absence of an API key and organization suggests that this is a generic template for user-specific configurations.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/config_template.json\":0-40", + "content": "{\n \"chatgpt\" : {\n \"model_id\": \"gpt-3.5-turbo\",\n \"prompt_token_cost\": 0.0015,\n \"response_token_cost\": 0.002,\n \"temperature\": 1.0,\n \"max_tokens\": 1536,\n \"stop\": null,\n \"organization\": \"\",\n \"api_key\": \"\"\n },\n \"chatgpt4\" : {\n \"model_id\": \"gpt-4\",\n \"prompt_token_cost\": 0.03,\n \"response_token_cost\": 0.06,\n \"temperature\": 1.0,\n \"max_tokens\": 4096,\n \"stop\": null,\n \"organization\": \"\",\n \"api_key\": \"\"\n },\n \"llama7b-hf\" : {\n \"model_id\": \"Llama-2-7b-chat-hf\",\n \"cache_dir\": \"/llama\",\n \"prompt_token_cost\": 0.0,\n \"response_token_cost\": 0.0,\n \"temperature\": 0.6,\n \"top_k\": 10,\n \"max_tokens\": 4096\n },\n \"llama13b-hf\" : {\n \"model_id\": \"Llama-2-13b-chat-hf\",\n \"cache_dir\": \"/llama\",\n \"prompt_token_cost\": 0.0,\n \"response_token_cost\": 0.0,\n \"temperature\": 0.6,\n \"top_k\": 10,\n \"max_tokens\": 4096\n },\n \"llama70b-hf\" : {\n \"model_id\": \"Llama-2-70b-chat-hf\"," + }, + { + "comment": "This code snippet contains a configuration template for a language model. It sets the cache directory path as \"/llama\", prompts and response tokens costs to 0, temperature at 0.6, top-k value as 10, and maximum generated token count as 4096.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/config_template.json\":41-48", + "content": " \"cache_dir\": \"/llama\",\n \"prompt_token_cost\": 0.0,\n \"response_token_cost\": 0.0,\n \"temperature\": 0.6,\n \"top_k\": 10,\n \"max_tokens\": 4096\n }\n}" + } + ] +} \ No newline at end of file diff --git a/docs/doc/c5e61b9e-7b0c-4187-845f-ad264dfef3ba.json b/docs/doc/c5e61b9e-7b0c-4187-845f-ad264dfef3ba.json new file mode 100644 index 0000000..375d9de --- /dev/null +++ b/docs/doc/c5e61b9e-7b0c-4187-845f-ad264dfef3ba.json @@ -0,0 +1,20 @@ +{ + "summary": "This code defines an AbstractLanguageModel class with config file path, model name, and caching options for language models. It also includes two abstract methods: 'query' and 'get_response_texts', serving as placeholders for derived classes to implement their own functionality.", + "details": [ + { + "comment": "This code snippet defines an abstract base class, AbstractLanguageModel, for language models with config file path, model name, and caching options in the initializer. It also initializes a logger for logging purposes.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/abstract_language_model.py\":0-33", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom abc import ABC, abstractmethod\nfrom typing import List, Dict, Union, Any\nimport json\nimport os\nimport logging\nclass AbstractLanguageModel(ABC):\n \"\"\"\n Abstract base class that defines the interface for all language models.\n \"\"\"\n def __init__(\n self, config_path: str = \"\", model_name: str = \"\", cache: bool = False\n ) -> None:\n \"\"\"\n Initialize the AbstractLanguageModel instance with configuration, model details, and caching options.\n :param config_path: Path to the config file. Defaults to \"\".\n :type config_path: str\n :param model_name: Name of the language model. Defaults to \"\".\n :type model_name: str\n :param cache: Flag to determine whether to cache responses. Defaults to False.\n :type cache: bool\n \"\"\"\n self.logger = logging.getLogger(self.__class__.__name__)" + }, + { + "comment": "This code initializes an abstract language model object with optional cache and loads its configuration from a specified file. It also provides methods to clear the response cache.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/abstract_language_model.py\":34-65", + "content": " self.config: Dict = None\n self.model_name: str = model_name\n self.cache = cache\n if self.cache:\n self.respone_cache: Dict[str, List[Any]] = {}\n self.load_config(config_path)\n self.prompt_tokens: int = 0\n self.completion_tokens: int = 0\n self.cost: float = 0.0\n def load_config(self, path: str) -> None:\n \"\"\"\n Load configuration from a specified path.\n :param path: Path to the config file. If an empty path provided,\n default is `config.json` in the current directory.\n :type path: str\n \"\"\"\n if path == \"\":\n current_dir = os.path.dirname(os.path.abspath(__file__))\n path = os.path.join(current_dir, \"config.json\")\n with open(path, \"r\") as f:\n self.config = json.load(f)\n self.logger.debug(f\"Loaded config from {path} for {self.model_name}\")\n def clear_cache(self) -> None:\n \"\"\"\n Clear the response cache.\n \"\"\"\n self.respone_cache.clear()" + }, + { + "comment": "This code defines two abstract methods for a language model. The 'query' method takes a query and the desired number of responses, but doesn't specify what it should do with them. The 'get_response_texts' method expects response(s) from the language model, but doesn't clarify how to extract textual data. It serves as a placeholder for derived classes to implement their own functionality.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/language_models/abstract_language_model.py\":67-91", + "content": " @abstractmethod\n def query(self, query: str, num_responses: int = 1) -> Any:\n \"\"\"\n Abstract method to query the language model.\n :param query: The query to be posed to the language model.\n :type query: str\n :param num_responses: The number of desired responses.\n :type num_responses: int\n :return: The language model's response(s).\n :rtype: Any\n \"\"\"\n pass\n @abstractmethod\n def get_response_texts(self, query_responses: Union[List[Any], Any]) -> List[str]:\n \"\"\"\n Abstract method to extract response texts from the language model's response(s).\n :param query_responses: The responses returned from the language model.\n :type query_responses: Union[List[Any], Any]\n :return: List of textual responses.\n :rtype: List[str]\n \"\"\"\n pass" + } + ] +} \ No newline at end of file diff --git a/docs/doc/d4d79625-27f5-4744-89d1-7db44ae551cb.json b/docs/doc/d4d79625-27f5-4744-89d1-7db44ae551cb.json new file mode 100644 index 0000000..58615b0 --- /dev/null +++ b/docs/doc/d4d79625-27f5-4744-89d1-7db44ae551cb.json @@ -0,0 +1,140 @@ +{ + "summary": "The code develops an efficient NDA merging class with language model prompts and redundancy handling, generating a graph for document merge and language model inference within budget limits. It utilizes input data from \"documents.csv\", manages exceptions, and scores output based on coverage.", + "details": [ + { + "comment": "This code defines a class DocMergePrompter that inherits from Prompter and provides prompts for merging NDA documents. It includes a merge_doc_prompt_start string for generating the prompt and a merge_doc_prompt_block string for displaying NDAs to be merged. The goal is to create a single NDA by maximizing information retention and minimizing redundancy, with the output between and .", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":0-30", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport os\nimport re\nimport logging\nimport datetime\nimport json\nimport csv\nfrom statistics import fmean\nfrom typing import Dict, List, Callable, Set, Union\nfrom graph_of_thoughts import controller, language_models, operations, prompter, parser\nclass DocMergePrompter(prompter.Prompter):\n \"\"\"\n DocMergePrompter provides the generation of prompts specific to the document\n merge example for the language models.\n Inherits from the Prompter class and implements its abstract methods.\n \"\"\"\n merge_doc_prompt_start = \"\"\"Merge the following {num} NDA documents - into a single NDA, maximizing retained information and minimizing redundancy. Output only the created NDA between the tags and , without any additional text.\nHere are NDAs - \n\"\"\"\n merge_doc_prompt_block = \"\"\"" + }, + { + "comment": "The code defines two prompts for merging and improving NDA documents. The first prompt instructs to merge the provided NDAs into a single one, preserving information and minimizing redundancy. It also provides an example approach. The second prompt asks to improve the merged document by adding more information and removing redundancies, with output placed between specific tags. Both prompts include the input NDAs as \"Doc1\" to \"Doc{num}\".", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":31-53", + "content": "\n{document}\n\n\"\"\"\n merge_doc_prompt_cot_start = \"\"\"Merge the following {num} NDA documents - into a single NDA, maximizing retained information and minimizing redundancy.\nYou can generate any intermediate thoughts and documents you want, but the final output should be the merged NDA, placed between the two tags and .\nFor instance you might want to follow this approach:\n1. Split each NDA into their logical subparts.\n2. Merge the subparts of the {num} NDAs.\n3. Combine the merged subparts into a single NDA.\n4. Place the merged NDA between the tags and .\nHere are NDAs - :\n\"\"\"\n improve_summary_prompt_start = \"\"\"The following NDA merges initial NDAs - .\nPlease improve the summary NDA by adding more information and removing redundancy. Output only the improved NDA, placed between the two tags and , without any additional text.\nHere are NDAs - :\n\"\"\"\n improve_summary_prompt_block = \"\"\"" + }, + { + "comment": "This code contains various prompts for different tasks, such as improving summaries and scoring merged documents. The prompts are designed to assist in the task of merging NDAs while considering redundancy and retained information scores, with specific tags provided for clarity.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":54-70", + "content": "\n{document}\n\n\"\"\"\n improve_summary_prompt_end = \"\"\"\nHere is the summary NDA :\n\n{summary}\n\n\"\"\"\n score_prompt_base = \"\"\"The following NDA merges NDAs - .\nPlease score the merged NDA in terms of how much redundant information is contained, independent of the original NDAs, as well as how much information is retained from the original NDAs.\nA score of 10 for redundancy implies that absolutely no information is redundant, while a score of 0 implies that at least half of the information is redundant (so everything is at least mentioned twice).\nA score of 10 for retained information implies that all information from the original NDAs is retained, while a score of 0 implies that no information is retained.\nYou may provide reasoning for your scoring, but the final score for redundancy should be between the tags and , and the final score for retained information should be between the tags and , without any additional text within any of those tags." + }, + { + "comment": "This code appears to be part of a larger program that deals with merging and summarizing Non-Disclosure Agreements (NDAs). It uses string formatting to generate prompts for the user, asking them to provide NDAs in a specific format. The code snippet includes various placeholders (, ) for incorporating the user's provided information.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":72-111", + "content": "Here are NDAs - :\n\"\"\"\n score_prompt_block = \"\"\"\n\n{document}\n\n\"\"\"\n score_prompt_end = \"\"\"\nHere is the summary NDA :\n\n{summary}\n\n\"\"\"\n aggregate_full_prompt_base = \"\"\"The following NDAs - each merge the initial NDAs - .\nCombine the merged NDAs - into a new one, maximizing their advantages and overall information retention, while minimizing redundancy.\nOutput only the new NDA between the tags and , without any additional text. \nHere are the original NDAs - :\n\"\"\"\n aggregate_full_prompt_block1 = \"\"\"\n\n{document}\n\n\"\"\"\n aggregate_full_prompt_mid = \"\"\"\nHere are the summary NDAs - :\n\"\"\"\n aggregate_full_prompt_block2 = \"\"\"\n\n{summary}\n\n\"\"\"\n aggregate_sub_prompt_base = \"\"\"The following NDAs - are summaries of some other NDAs.\nCombine them into a new one, make sure to maximize their advantages and overall information retention, while minimizing redundancy." + }, + { + "comment": "This code generates an aggregation prompt for a language model, using the provided state_dicts. It concatenates NDAs from each state_dict and formats them into a final prompt. The output is a string containing the merged NDAs between \"\" and \"\".", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":112-142", + "content": "Output only the new NDA between the tags and , without any additional text.\nHere are NDAs - :\n\"\"\"\n aggregate_sub_prompt_generate = \"\"\"\nNDA :\n{nda}\n\n\"\"\"\n def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str:\n \"\"\"\n Generate an aggregation prompt for the language model.\n :param state_dicts: The thought states that should be aggregated.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The aggregation prompt.\n :rtype: str\n \"\"\"\n if len(state_dicts[0][\"parts\"]) > 0 and len(state_dicts[0][\"parts\"]) < len(\n state_dicts[0][\"documents\"]\n ):\n prompt = self.aggregate_sub_prompt_base.format(\n num_ndas=len(state_dicts),\n )\n for i, state_dict in enumerate(state_dicts):\n prompt += self.aggregate_sub_prompt_generate.format(\n nda=state_dict[\"current\"], num=i + 1" + }, + { + "comment": "This code defines a class with methods for generating prompts. The `generate_prompt` method takes in parameters like number of branches, documents, and current state. It returns a prompt for the language model using string formatting based on input parameters.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":143-173", + "content": " )\n return prompt\n else:\n prompt = self.aggregate_full_prompt_base.format(\n num_ndas=len(state_dicts[0][\"documents\"]),\n num_ndas_summary=len(state_dicts),\n )\n for i, document in enumerate(state_dicts[0][\"documents\"]):\n prompt += self.aggregate_full_prompt_block1.format(\n document=document, num=i + 1\n )\n prompt += self.aggregate_full_prompt_mid.format(\n num_ndas_summary=len(state_dicts),\n )\n for i, state_dict in enumerate(state_dicts):\n prompt += self.aggregate_full_prompt_block2.format(\n summary=state_dict[\"current\"], num=i + 1\n )\n return prompt\n def generate_prompt(\n self,\n num_branches: int,\n documents: List[str],\n method: str,\n parts: Set[str],\n current: str,\n **kwargs,\n ) -> str:\n \"\"\"\n Generate a generate prompt for the language model." + }, + { + "comment": "This function takes in the number of responses, a list of documents to merge, method for generating the prompt, indices of already processed document parts, an intermediate solution, and additional keyword arguments. It returns the generate prompt used for merging the documents. If the method is not implemented yet, it raises AssertionError.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":175-197", + "content": " :param num_branches: The number of responses the prompt should ask the LM to generate.\n :type num_branches: int\n :param documents: The list of documents to be merged.\n :type documents: List[str]\n :param method: Method for which the generate prompt is generated.\n :type method: str\n :param parts: Indices of the already processed document parts.\n :type parts: Set[str]\n :param current: The intermediate solution.\n :type current: str\n :param kwargs: Additional keyword arguments.\n :return: The generate prompt.\n :rtype: str\n :raise AssertionError: If method is not implemented yet.\n \"\"\"\n prompt = \"\"\n if method.startswith(\"io\") or method.startswith(\"cot\"):\n if method.startswith(\"io\"):\n prompt += self.merge_doc_prompt_start.format(num=len(documents))\n else:\n prompt += self.merge_doc_prompt_cot_start.format(num=len(documents))\n for i, document in enumerate(documents):" + }, + { + "comment": "The code provides a prompt for merging multiple documents or improving a given summary based on the specified method. It dynamically generates the prompt by concatenating predefined blocks of text with placeholders for document numbers and the original summary. If no current summary is provided, it creates a prompt to merge documents, otherwise, it improves the given summary using those documents.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":198-220", + "content": " prompt += self.merge_doc_prompt_block.format(\n document=document, num=i + 1\n )\n return prompt\n elif method.startswith(\"tot\"):\n if current is None or current == \"\":\n prompt += self.merge_doc_prompt_start.format(num=len(documents))\n for i, document in enumerate(documents):\n prompt += self.merge_doc_prompt_block.format(\n document=document, num=i + 1\n )\n return prompt\n else:\n prompt += self.improve_summary_prompt_start.format(\n num=len(documents),\n )\n for i, document in enumerate(documents):\n prompt += self.improve_summary_prompt_block.format(\n document=document, num=i + 1\n )\n prompt += self.improve_summary_prompt_end.format(summary=current)\n return prompt\n elif method.startswith(\"got\"):" + }, + { + "comment": "The code checks if the current summary is provided. If not, it generates a prompt for merging documents into one coherent summary. If the current summary is provided, it generates a prompt for improving an existing summary by incorporating information from multiple documents. The code also sorts the parts of the document and formats them in a specific way for the prompts.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":221-244", + "content": " parts = (\n sorted(list(parts)) if len(parts) > 0 else list(range(len(documents)))\n )\n if current is None or current == \"\":\n prompt += self.merge_doc_prompt_start.format(num=len(parts))\n for i, part in enumerate(sorted(list(parts))):\n prompt += self.merge_doc_prompt_block.format(\n document=documents[part], num=i + 1\n )\n return prompt\n else:\n prompt += self.improve_summary_prompt_start.format(\n num=len(parts),\n )\n for i, part in enumerate(sorted(list(parts))):\n prompt += self.improve_summary_prompt_block.format(\n document=documents[part], num=i + 1\n )\n prompt += self.improve_summary_prompt_end.format(summary=current)\n return prompt\n else:\n assert False, \"Not implemented yet.\"\n def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str:" + }, + { + "comment": "This function generates a score prompt for the language model using a single thought state provided as an argument. It checks if only one thought state is supplied and handles the case where more than one is given. The prompt is created by formatting the base and block prompts with the number of documents.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":245-273", + "content": " \"\"\"\n Generate a score prompt for the language model.\n :param state_dicts: The thought states that should be scored,\n if more than one, they should be scored together.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The score prompt.\n :rtype: str\n :raise AssertionError: If more than one thought state is supplied.\n \"\"\"\n if len(state_dicts) > 1:\n assert False, \"Not implemented yet.\"\n else:\n # perform individual scoring\n parts = (\n [\n state_dicts[0][\"documents\"][part]\n for part in sorted(list(state_dicts[0][\"parts\"]))\n ]\n if len(state_dicts[0][\"parts\"]) > 0\n else state_dicts[0][\"documents\"]\n )\n prompt = self.score_prompt_base.format(\n num=len(parts),\n )\n for i, part in enumerate(parts):\n prompt += self.score_prompt_block.format(document=part, num=i + 1)" + }, + { + "comment": "This code defines a class DocMergeParser that extends the Parser class and provides specific functionality for parsing language model responses in the document merge example. It includes methods to generate improve prompt, validation prompt, and handles answer stripping with optional tags. The response cache is initialized in the constructor.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":274-314", + "content": " prompt += self.score_prompt_end.format(\n summary=state_dicts[0][\"current\"],\n )\n return prompt\n def improve_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate an improve prompt for the language model.\n :param kwargs: Additional keyword arguments.\n :return: The improve prompt.\n :rtype: str\n \"\"\"\n pass\n def validation_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate a validation prompt for the language model.\n :param kwargs: Additional keyword arguments.\n :return: The validation prompt.\n :rtype: str\n \"\"\"\n pass\nclass DocMergeParser(parser.Parser):\n \"\"\"\n DocMergeParser provides the parsing of language model reponses specific to the\n document merge example.\n Inherits from the Parser class and implements its abstract methods.\n \"\"\"\n def __init__(self) -> None:\n \"\"\"\n Inits the response cache.\n \"\"\"\n self.cache = {}\n def strip_answer_helper(self, text: str, tag: str = \"\") -> str:" + }, + { + "comment": "This function removes specified tags from a text. It first strips whitespace and checks if \"Output:\" is in the text. Then, it searches for start and end tags to remove the enclosed content while handling cases of only one tag found. If no matching tags are found, it logs a warning and returns everything after or before the found tag.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":315-341", + "content": " \"\"\"\n Helper function to remove tags from a text.\n :param text: The input text.\n :type text: str\n :param tag: The tag to be stripped. Defaults to \"\".\n :type tag: str\n :return: The stripped text.\n :rtype: str\n \"\"\"\n text = text.strip()\n if \"Output:\" in text:\n text = text[text.index(\"Output:\") + len(\"Output:\") :].strip()\n if tag != \"\":\n start = text.rfind(f\"<{tag}>\")\n end = text.rfind(f\"\")\n if start != -1 and end != -1:\n text = text[start + len(f\"<{tag}>\") : end].strip()\n elif start != -1:\n logging.warning(\n f\"Only found the start tag <{tag}> in answer: {text}. Returning everything after the tag.\"\n )\n text = text[start + len(f\"<{tag}>\") :].strip()\n elif end != -1:\n logging.warning(\n f\"Only found the end tag in answer: {text}. Returning everything before the tag.\"" + }, + { + "comment": "The code is parsing the response from a language model for an aggregation prompt. It checks if there are enough thought states and performs subpart aggregation by stripping the answer to a single text using a helper function.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":342-368", + "content": " )\n text = text[:end].strip()\n else:\n logging.warning(\n f\"Could not find any tag {tag} in answer: {text}. Returning the full answer.\"\n )\n return text\n def parse_aggregation_answer(\n self, states: List[Dict], texts: List[str]\n ) -> Union[Dict, List[Dict]]:\n \"\"\"\n Parse the response from the language model for an aggregation prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the respones from the language model.\n :rtype: Union[Dict, List[Dict]]\n \"\"\"\n new_states = []\n for text in texts:\n if len(states[0][\"parts\"]) < len(states[0][\"documents\"]):\n # subpart aggregation\n text = self.strip_answer_helper(text, \"Merged\")" + }, + { + "comment": "The code appears to be a part of a larger function that generates new thought states by aggregating inputs from multiple sources. It seems to handle both partial and full non-disclosure agreement (NDA) cases, stripping the answer text and creating new states accordingly. The `parse_generate_answer` function processes response from the language model for generate prompts and returns new thought states after parsing the responses.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":369-392", + "content": " new_state = states[0].copy()\n new_state[\"current\"] = text\n new_state[\"parts\"] = set()\n for state in states:\n new_state[\"parts\"] = new_state[\"parts\"] | state[\"parts\"]\n new_states.append(new_state)\n else:\n # full NDA aggregation\n text = self.strip_answer_helper(text, \"Merged\")\n new_state = states[0].copy()\n new_state[\"current\"] = text\n new_states.append(new_state)\n return new_states\n def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:\n \"\"\"\n Parse the response from the language model for a generate prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the respones from the language model." + }, + { + "comment": "The function `get_new_states()` takes a list of texts and returns a list of dictionaries, where each dictionary represents a thought state with the current text as its value.\n\nThe function `parse_score_answer()` takes a list of thought states and responses from the language model, asserts that only one thought state is allowed for scoring, and then initializes lists for redundancy and retain scores.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":393-419", + "content": " :rtype: List[Dict]\n \"\"\"\n new_states = []\n for text in texts:\n text = self.strip_answer_helper(text, \"Merged\")\n new_state = state.copy()\n new_state[\"current\"] = text\n new_states.append(new_state)\n return new_states\n def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]:\n \"\"\"\n Parse the response from the language model for a score prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The scores for the thought states.\n :rtype: List[float]\n :raise AssertionError: If the number of thought states is not one.\n \"\"\"\n assert len(states) == 1, \"Only one state is allowed for scoring.\"\n if len(states) == 1:\n # individual scoring\n redundancy_scores = []\n retain_scores = []" + }, + { + "comment": "This code iterates through text inputs, extracts redundancy and retained scores using regex, handles multiple score cases by logging a warning and selecting the last one or ignoring if no scores found.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":420-440", + "content": " for text in texts:\n answer = self.strip_answer_helper(text, \"Redundancy\")\n res = re.findall(r\"\\d+\\.?\\d*\", answer)\n if len(res) == 1:\n redundancy_scores.append(float(res[0]))\n elif len(res) > 1:\n logging.warning(\n f\"Found multiple redundancy scores in answer: {text}. Returning the last one.\"\n )\n redundancy_scores.append(float(res[-1]))\n else:\n logging.warning(\n f\"Could not find any redundancy score in answer: {text}. Ignoring this answer.\"\n )\n answer = self.strip_answer_helper(text, \"Retained\")\n res = re.findall(r\"\\d+\\.?\\d*\", answer)\n if len(res) == 1:\n retain_scores.append(float(res[0]))\n elif len(res) > 1:\n logging.warning(\n f\"Found multiple retained scores in answer: {text}. Returning the last one.\"" + }, + { + "comment": "This code snippet is a part of a function responsible for parsing the responses from a language model for an 'improve' prompt. It calculates redundancy and retain scores for each answer, then returns the F1 score based on these scores. If no valid scores are found in any answer, it returns 0.0.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":441-463", + "content": " )\n retain_scores.append(float(res[-1]))\n else:\n logging.warning(\n f\"Could not find any retained score in answer: {text}. Ignoring this answer.\"\n )\n if len(redundancy_scores) == 0 or len(retain_scores) == 0:\n logging.warning(\n f\"Could not find any valid score in any answer. Returning 0.0.\"\n )\n return [0.0]\n mean_redundancy = fmean(redundancy_scores)\n mean_retain = fmean(retain_scores)\n f1 = 2 * mean_redundancy * mean_retain / (mean_redundancy + mean_retain)\n return [f1]\n def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict:\n \"\"\"\n Parse the response from the language model for an improve prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model." + }, + { + "comment": "This code contains functions for thought state management, parsing responses from a language model, and generating the Graph of Operations for IO method. It uses Dict and List[str] as inputs and returns bool or Dict outputs. The code block defines three functions: update_thought_state, parse_validation_answer, and io. The last function generates the Graph of Operations by appending Generate and Score operations to an instance of operations.GraphOfOperations().", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":464-496", + "content": " :type texts: List[str]\n :return: The new thought state after parsing the responses from the language model.\n :rtype: Dict\n \"\"\"\n pass\n def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool:\n \"\"\"\n Parse the response from the language model for a validation prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: Whether the thought state is valid or not.\n :rtype: bool\n \"\"\"\n pass\ndef io() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the IO method.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n operations_graph.append_operation(operations.Generate(1, 1))\n operations_graph.append_operation(operations.Score(3, False))\n return operations_graph" + }, + { + "comment": "The code defines two functions, `cot()` and `tot()`, which generate the Graph of Operations for CoT and ToT methods respectively. The CoT method involves generating one child node, scoring it, while the ToT method generates 10 children nodes initially, keeps the best one, then generates two additional children per iteration.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":499-532", + "content": "def cot() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the CoT method.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n operations_graph.append_operation(operations.Generate(1, 1))\n operations_graph.append_operation(operations.Score(3, False))\n return operations_graph\ndef tot() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the ToT method.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n branch_factor = 10\n operations_graph.append_operation(operations.Generate(1, branch_factor))\n operations_graph.append_operation(operations.Score(3, False))\n keep_best_1 = operations.KeepBestN(1, True)\n operations_graph.append_operation(keep_best_1)\n for _ in range(2):\n operations_graph.append_operation(operations.Generate(1, branch_factor))\n operations_graph.append_operation(operations.Score(3, False))" + }, + { + "comment": "This code generates a Graph of Operations for merging full documents. It first appends operations to generate, score, aggregate, and keep the best scores. The last two operations add a predecessor to keep_best and append an additional generate operation with parameters 1 and 10.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":533-560", + "content": " keep_best_2 = operations.KeepBestN(1, True)\n keep_best_2.add_predecessor(keep_best_1)\n operations_graph.append_operation(keep_best_2)\n keep_best_1 = keep_best_2\n return operations_graph\ndef got() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the GoT method, where full documents\n are merged.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n operations_graph.append_operation(operations.Generate(1, 5))\n operations_graph.append_operation(operations.Score(3, False))\n keep_best = operations.KeepBestN(3, True)\n operations_graph.append_operation(keep_best)\n operations_graph.append_operation(operations.Aggregate(5))\n operations_graph.append_operation(operations.Score(3, False))\n keep_best2 = operations.KeepBestN(1, True)\n keep_best2.add_predecessor(keep_best)\n operations_graph.append_operation(keep_best2)\n operations_graph.append_operation(operations.Generate(1, 10))" + }, + { + "comment": "This code generates a Graph of Operations for the GoT2 method, which merges partial documents. It creates an initial GraphOfOperations object and iteratively adds operations such as Selectors, Generators, and Scorers to the graph. Each iteration consists of selecting specific thoughts, generating new documents, and scoring them. The resulting graph is returned.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":561-592", + "content": " operations_graph.append_operation(operations.Score(3, False))\n keep_best3 = operations.KeepBestN(1, True)\n keep_best3.add_predecessor(keep_best2)\n operations_graph.append_operation(keep_best3)\n return operations_graph\ndef got2() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the GoT2 method, where partial\n documents are merged.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n sub_parts = []\n for i in range(0, 4, 2): # should be at most 16 parts\n sub_text = operations.Selector(\n lambda thoughts, list_id=i: [\n operations.Thought(\n state={**thoughts[0].state, \"parts\": {list_id, list_id + 1}}\n )\n ]\n )\n operations_graph.add_operation(sub_text)\n gen_nda = operations.Generate(1, 5)\n gen_nda.add_predecessor(sub_text)\n operations_graph.add_operation(gen_nda)\n score_nda = operations.Score(3, False)" + }, + { + "comment": "This code is creating an operations graph for a document merge process. It starts with adding Score and Generate nodes, then iteratively adds Aggregate, Score, and KeepBestN nodes until there's only one node left in the sub_parts list. The Score nodes are used to calculate similarity scores, while the KeepBestN nodes keep the best result from the previous operation. The operations graph is then built with these operations added in sequence.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":593-618", + "content": " score_nda.add_predecessor(gen_nda)\n operations_graph.add_operation(score_nda)\n keep_best_nda = operations.KeepBestN(1, True)\n keep_best_nda.add_predecessor(score_nda)\n operations_graph.add_operation(keep_best_nda)\n sub_parts.append(keep_best_nda)\n while len(sub_parts) > 1:\n new_sub_parts = []\n for i in range(0, len(sub_parts), 2):\n if i + 1 == len(sub_parts):\n new_sub_parts.append(sub_parts[i])\n continue\n aggregate = operations.Aggregate(5)\n aggregate.add_predecessor(sub_parts[i])\n aggregate.add_predecessor(sub_parts[i + 1])\n operations_graph.add_operation(aggregate)\n score = operations.Score(3, False)\n score.add_predecessor(aggregate)\n operations_graph.add_operation(score)\n keep_best = operations.KeepBestN(1, True)\n keep_best.add_predecessor(score)\n operations_graph.add_operation(keep_best)\n gen_nda = operations.Generate(1, 5)" + }, + { + "comment": "This code is creating a graph of operations for language model inference. It defines several nodes and adds them to the operations graph, including generation, scoring, and keeping the best node. The function run() executes methods for each specified sample within the budget limit.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":619-648", + "content": " gen_nda.add_predecessor(keep_best)\n operations_graph.add_operation(gen_nda)\n score_nda = operations.Score(3, False)\n score_nda.add_predecessor(gen_nda)\n operations_graph.add_operation(score_nda)\n keep_best_nda = operations.KeepBestN(1, True)\n keep_best_nda.add_predecessor(score_nda)\n keep_best_nda.add_predecessor(keep_best)\n operations_graph.add_operation(keep_best_nda)\n new_sub_parts.append(keep_best_nda)\n sub_parts = new_sub_parts\n return operations_graph\ndef run(\n data_ids: List[int],\n methods: List[Callable[[], operations.GraphOfOperations]],\n budget: float,\n lm_name: str,\n) -> float:\n \"\"\"\n Controller function that executes each specified method for each specified\n sample while the budget is not exhausted.\n :param data_ids: Indices of the sample to be run.\n :type data_ids: List[int]\n :param methods: List of functions to generate Graphs of Operations.\n :type methods: Each function generates a Graph of Operation." + }, + { + "comment": "This function takes a budget, language model name, and optional data IDs as input. It reads the \"documents.csv\" file, filters the data based on provided data IDs, and then creates folders to save results for different methods using the specified language model. The function returns the spent budget in dollars.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":649-678", + "content": " :param budget: Language model budget for the execution in dollars.\n :type budget: float\n :param lm_name: Name of the language model to be used.\n :type lm_name: str\n :return: Spent budget in dollars.\n :rtype: float\n \"\"\"\n orig_budget = budget\n data_path = os.path.join(os.path.dirname(__file__), \"documents.csv\")\n data = []\n with open(data_path, \"r\", encoding=\"utf8\") as f:\n reader = csv.reader(f)\n next(reader)\n for row in reader:\n row[0] = int(row[0])\n data.append(row)\n if data_ids is None or len(data_ids) == 0:\n data_ids = list(range(len(data)))\n selected_data = [data[i] for i in data_ids]\n results_dir = os.path.join(os.path.dirname(__file__), \"results\")\n if not os.path.exists(results_dir):\n os.makedirs(results_dir)\n timestamp = datetime.datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n extra_info = f\"{lm_name}_{'-'.join([method.__name__ for method in methods])}\"\n folder_name = f\"{extra_info}_{timestamp}\"\n results_folder = os.path.join(results_dir, folder_name)" + }, + { + "comment": "This code sets up a results folder, saves the configuration file in JSON format, and initializes logging. It then iterates over selected data and methods, keeping track of remaining budget. If the budget becomes zero, it stops execution and logs an error message.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":679-711", + "content": " os.makedirs(results_folder)\n config = {\n \"data\": selected_data,\n \"methods\": [method.__name__ for method in methods],\n \"lm\": lm_name,\n \"budget\": budget,\n }\n with open(os.path.join(results_folder, \"config.json\"), \"w\") as f:\n json.dump(config, f)\n logging.basicConfig(\n filename=os.path.join(results_folder, \"log.log\"),\n filemode=\"w\",\n format=\"%(name)s - %(levelname)s - %(message)s\",\n level=logging.DEBUG,\n )\n for method in methods:\n os.makedirs(os.path.join(results_folder, method.__name__))\n for data in selected_data:\n logging.info(f\"Running data {data[0]}: {data[1]}\")\n if budget <= 0.0:\n logging.error(\n f\"Budget has been depleted, stopping. Data {data[0]} has not been run.\"\n )\n break\n for method in methods:\n logging.info(f\"Running method {method.__name__}\")\n logging.info(f\"Budget left: {budget}\")\n if budget <= 0.0:\n logging.error(" + }, + { + "comment": "This code chunk initializes a language model, creates an operations graph, and sets up an executor for running the method. If the budget is depleted, it will stop execution. The code then attempts to run the executor and logs any exceptions that occur during execution.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":712-740", + "content": " f\"Budget has been depleted, stopping. Method {method.__name__} has not been run.\"\n )\n break\n lm = language_models.ChatGPT(\n os.path.join(\n os.path.dirname(__file__),\n \"../../graph_of_thoughts/language_models/config.json\",\n ),\n model_name=lm_name,\n cache=True,\n )\n operations_graph = method()\n executor = controller.Controller(\n lm,\n operations_graph,\n DocMergePrompter(),\n DocMergeParser(),\n {\n \"documents\": [data[2], data[3], data[4], data[5]],\n \"parts\": set(),\n \"current\": \"\",\n \"method\": method.__name__,\n },\n )\n try:\n executor.run()\n except Exception as e:\n logging.error(f\"Exception: {e}\")\n path = os.path.join(" + }, + { + "comment": "This code takes input NDAs, combines them, and evaluates the combined result using an LLM (Language Model). The output is scored based on information coverage without repetition. A budget of 30 is set, with sampling from range(0, 50), and approaches io, cot, tot, got, and got2 are used. The code logs the spent budget after running the function \"run\".", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/doc_merge/doc_merge.py\":741-766", + "content": " results_folder,\n method.__name__,\n f\"{data[0]}.json\",\n )\n for operation in operations_graph.operations:\n for thought in operation.thoughts:\n thought.state[\"parts\"] = list(thought.state[\"parts\"])\n executor.output_graph(path)\n budget -= lm.cost\n return orig_budget - budget\nif __name__ == \"__main__\":\n \"\"\"\n Input (x1, x2, x3, x4): Four NDAs\n Output (y): A new combined NDA\n Evaluation: According to information coverage without repetition (scored by the LLM)\n \"\"\"\n budget = 30\n samples = [item for item in range(0, 50)]\n approaches = [io, cot, tot, got, got2]\n spent = run(samples, approaches, budget, \"chatgpt\")\n logging.info(f\"Spent {spent} out of {budget} budget.\")" + } + ] +} \ No newline at end of file diff --git a/docs/doc/d5de7461-a64a-4f34-93a3-2d622b4cae73.json b/docs/doc/d5de7461-a64a-4f34-93a3-2d622b4cae73.json new file mode 100644 index 0000000..77ef458 --- /dev/null +++ b/docs/doc/d5de7461-a64a-4f34-93a3-2d622b4cae73.json @@ -0,0 +1,10 @@ +{ + "summary": "The code provides instructions to access and visualize arXiv preprint data, which is stored in the `final_results_gpt35.tar.bz2` archive. The `plots.py` file needs to be executed after unpacking the archive for visualization purposes.", + "details": [ + { + "comment": "The code provides instructions to access and visualize arXiv preprint data, which is stored in the `final_results_gpt35.tar.bz2` archive. The `plots.py` file needs to be executed after unpacking the archive for visualization purposes.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/paper/README.md\":0-4", + "content": "## Plot Data\nThe data used to create the figure of the arXiv preprint article can be\nfound in the `final_results_gpt35.tar.bz2` archive. Unpack the archive\nand run the file `plots.py`." + } + ] +} \ No newline at end of file diff --git a/docs/doc/d632b16c-9b55-42f4-b40a-ba725f9e6b7d.json b/docs/doc/d632b16c-9b55-42f4-b40a-ba725f9e6b7d.json new file mode 100644 index 0000000..12da0b7 --- /dev/null +++ b/docs/doc/d632b16c-9b55-42f4-b40a-ba725f9e6b7d.json @@ -0,0 +1,35 @@ +{ + "summary": "The code manages the execution flow of a graph's operations using language models and classes for processing, serialization, and debugging, resulting in an organized list written to a JSON file.", + "details": [ + { + "comment": "This code defines a Controller class to manage the execution flow of the Graph of Operations, utilizing language models, graph operations, prompting, and parsing. The Controller is initialized with an AbstractLanguageModel, GraphOfOperations, Prompter, Parser, and problem parameters.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/controller/controller.py\":0-34", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport json\nimport logging\nfrom typing import List\nfrom graph_of_thoughts.language_models import AbstractLanguageModel\nfrom graph_of_thoughts.operations import GraphOfOperations, Thought\nfrom graph_of_thoughts.prompter import Prompter\nfrom graph_of_thoughts.parser import Parser\nclass Controller:\n \"\"\"\n Controller class to manage the execution flow of the Graph of Operations,\n generating the Graph Reasoning State.\n This involves language models, graph operations, prompting, and parsing.\n \"\"\"\n def __init__(\n self,\n lm: AbstractLanguageModel,\n graph: GraphOfOperations,\n prompter: Prompter,\n parser: Parser,\n problem_parameters: dict,\n ) -> None:\n \"\"\"\n Initialize the Controller instance with the language model,\n operations graph, prompter, parser, and problem parameters." + }, + { + "comment": "This function initializes a controller object with provided language model, graph of operations, prompter, parser, and problem parameters. It also sets the run_executed flag to False. The run method executes the operations from the Graph of Operations based on their readiness, ensuring the program is in a valid state before execution.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/controller/controller.py\":36-59", + "content": " :param lm: An instance of the AbstractLanguageModel.\n :type lm: AbstractLanguageModel\n :param graph: The Graph of Operations to be executed.\n :type graph: OperationsGraph\n :param prompter: An instance of the Prompter class, used to generate prompts.\n :type prompter: Prompter\n :param parser: An instance of the Parser class, used to parse responses.\n :type parser: Parser\n :param problem_parameters: Initial parameters/state of the problem.\n :type problem_parameters: dict\n \"\"\"\n self.logger = logging.getLogger(self.__class__.__module__)\n self.lm = lm\n self.graph = graph\n self.prompter = prompter\n self.parser = parser\n self.problem_parameters = problem_parameters\n self.run_executed = False\n def run(self) -> None:\n \"\"\"\n Run the controller and execute the operations from the Graph of\n Operations based on their readiness.\n Ensures the program is in a valid state before execution." + }, + { + "comment": "This code snippet is checking the validity of the program state and executing operations in a queue. It raises AssertionError if the Graph of Operations has no roots or if a successor operation is not found in the graph. The code logs debug messages for state checks, information messages for executed operations, and asserts to ensure proper execution order.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/controller/controller.py\":60-81", + "content": " :raises AssertionError: If the Graph of Operation has no roots.\n :raises AssertionError: If the successor of an operation is not in the Graph of Operations.\n \"\"\"\n self.logger.debug(\"Checking that the program is in a valid state\")\n assert self.graph.roots is not None, \"The operations graph has no root\"\n self.logger.debug(\"The program is in a valid state\")\n execution_queue = [\n operation\n for operation in self.graph.operations\n if operation.can_be_executed()\n ]\n while len(execution_queue) > 0:\n current_operation = execution_queue.pop(0)\n self.logger.info(\"Executing operation %s\", current_operation.operation_type)\n current_operation.execute(\n self.lm, self.prompter, self.parser, **self.problem_parameters\n )\n self.logger.info(\"Operation %s executed\", current_operation.operation_type)\n for operation in current_operation.successors:\n assert (" + }, + { + "comment": "Code snippet defines a class with methods to execute operations in a graph, retrieve final thoughts after execution, and serialize the graph state and results. The `run` method executes operations in the graph, checks if operation is in graph's operations, appends executable operations to an execution queue, logs information when all operations are executed, and sets `run_executed` flag to True. `get_final_thoughts` method retrieves final thoughts after execution of all operations by iterating through graph's leaves and getting thoughts from each operation. It raises AssertionError if the run method has not been executed yet. `output_graph` method serializes state and results of operations graph to a JSON file at specified path.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/controller/controller.py\":82-105", + "content": " operation in self.graph.operations\n ), \"The successor of an operation is not in the operations graph\"\n if operation.can_be_executed():\n execution_queue.append(operation)\n self.logger.info(\"All operations executed\")\n self.run_executed = True\n def get_final_thoughts(self) -> List[List[Thought]]:\n \"\"\"\n Retrieve the final thoughts after all operations have been executed.\n :return: List of thoughts for each operation in the graph's leaves.\n :rtype: List[List[Thought]]\n :raises AssertionError: If the `run` method hasn't been executed yet.\n \"\"\"\n assert self.run_executed, \"The run method has not been executed\"\n return [operation.get_thoughts() for operation in self.graph.leaves]\n def output_graph(self, path: str) -> None:\n \"\"\"\n Serialize the state and results of the operations graph to a JSON file.\n :param path: The path to the output file.\n :type path: str" + }, + { + "comment": "This code iterates through the operations in a graph, serializes each operation with its thoughts, and adds extra information if any thoughts have been scored, validated, or are invalid. This is used for generating an output list of serialized operations and associated data.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/controller/controller.py\":106-127", + "content": " \"\"\"\n output = []\n for operation in self.graph.operations:\n operation_serialized = {\n \"operation\": operation.operation_type.name,\n \"thoughts\": [thought.state for thought in operation.get_thoughts()],\n }\n if any([thought.scored for thought in operation.get_thoughts()]):\n operation_serialized[\"scored\"] = [\n thought.scored for thought in operation.get_thoughts()\n ]\n operation_serialized[\"scores\"] = [\n thought.score for thought in operation.get_thoughts()\n ]\n if any([thought.validated for thought in operation.get_thoughts()]):\n operation_serialized[\"validated\"] = [\n thought.validated for thought in operation.get_thoughts()\n ]\n operation_serialized[\"validity\"] = [\n thought.valid for thought in operation.get_thoughts()\n ]\n if any(" + }, + { + "comment": "This code iterates over the thoughts in each operation, compares them to ground truth, and determines if they were solved. The data is serialized and appended to a list, which is then written to a JSON file along with prompt, completion tokens, and cost information.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/controller/controller.py\":128-151", + "content": " [\n thought.compared_to_ground_truth\n for thought in operation.get_thoughts()\n ]\n ):\n operation_serialized[\"compared_to_ground_truth\"] = [\n thought.compared_to_ground_truth\n for thought in operation.get_thoughts()\n ]\n operation_serialized[\"problem_solved\"] = [\n thought.solved for thought in operation.get_thoughts()\n ]\n output.append(operation_serialized)\n output.append(\n {\n \"prompt_tokens\": self.lm.prompt_tokens,\n \"completion_tokens\": self.lm.completion_tokens,\n \"cost\": self.lm.cost,\n }\n )\n with open(path, \"w\") as file:\n file.write(json.dumps(output, indent=2))" + } + ] +} \ No newline at end of file diff --git a/docs/doc/d67c0757-c845-401d-986c-330d77c79d0f.json b/docs/doc/d67c0757-c845-401d-986c-330d77c79d0f.json new file mode 100644 index 0000000..567b06b --- /dev/null +++ b/docs/doc/d67c0757-c845-401d-986c-330d77c79d0f.json @@ -0,0 +1,25 @@ +{ + "summary": "The code includes an abstract class that defines three methods for parsing different language model responses, utilizing thought states and text inputs. The 'pass' statement serves as a placeholder or temporary measure.", + "details": [ + { + "comment": "Parser abstract class for language model responses parsing. Defines an interface to be implemented by subclasses. Used for aggregation prompts and takes thought states and language model responses as input, returning updated thought states after parsing the response.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/parser/parser.py\":0-30", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main authors: Robert Gerstenberger, Nils Blach\nfrom __future__ import annotations\nfrom abc import ABC, abstractmethod\nfrom typing import Dict, List, Union\nclass Parser(ABC):\n \"\"\"\n Abstract base class that defines the interface for all parsers.\n Parsers are used to parse the responses from the language models.\n \"\"\"\n @abstractmethod\n def parse_aggregation_answer(\n self, states: List[Dict], texts: List[str]\n ) -> Union[Dict, List[Dict]]:\n \"\"\"\n Parse the response from the language model for a aggregation prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the response from the language model." + }, + { + "comment": "This code defines three abstract methods in a class: `parse_improve_answer`, `parse_generate_answer`, and `parse`. These methods are responsible for parsing responses from a language model given a thought state and text inputs. The return types vary depending on the method, with `parse` returning either a dictionary or a list of dictionaries.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/parser/parser.py\":31-58", + "content": " :rtype: Union[Dict, List[Dict]]\n \"\"\"\n pass\n @abstractmethod\n def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict:\n \"\"\"\n Parse the response from the language model for an improve prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought state after parsing the response from the language model.\n :rtype: Dict\n \"\"\"\n pass\n @abstractmethod\n def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:\n \"\"\"\n Parse the response from the language model for a generate prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the response from the language model." + }, + { + "comment": "This code defines an abstract base class with three methods for parsing different types of language model responses. The `parse_thought_answer` method takes a thought state and text response to determine if the thought is valid. The `parse_validation_answer` method takes a list of thought states and text responses to determine if each thought's state is valid. Finally, the `parse_score_answer` method takes a list of thought states and text responses and returns the scores for each thought state.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/parser/parser.py\":59-88", + "content": " :rtype: List[Dict]\n \"\"\"\n pass\n @abstractmethod\n def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool:\n \"\"\"\n Parse the response from the language model for a validation prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: Whether the thought state is valid or not.\n :rtype: bool\n \"\"\"\n pass\n @abstractmethod\n def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]:\n \"\"\"\n Parse the response from the language model for a score prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The scores for the thought states.\n :rtype: List[float]\n \"\"\"" + }, + { + "comment": "The code contains a 'pass' statement, which is used as a placeholder and does nothing. It could be used for future implementation or as a temporary measure during development.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/parser/parser.py\":89-89", + "content": " pass" + } + ] +} \ No newline at end of file diff --git a/docs/doc/dfc74651-7417-43a9-b64d-74c1b2eb667f.json b/docs/doc/dfc74651-7417-43a9-b64d-74c1b2eb667f.json new file mode 100644 index 0000000..e8d5707 --- /dev/null +++ b/docs/doc/dfc74651-7417-43a9-b64d-74c1b2eb667f.json @@ -0,0 +1,15 @@ +{ + "summary": "The code uses Hatchling to define project settings for the Python package \"graph_of_thoughts,\" including package details, dependencies, and URLs. It also includes a TOML configuration file setting up an entry point for executable scripts under the project's namespace within the \"scripts\" section of the \"project\" block.", + "details": [ + { + "comment": "This code defines the project settings for a Python package called \"graph_of_thoughts\" using Hatchling as the build system. It specifies the package name, version, authors, description, dependencies, and URLs for further information.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/pyproject.toml\":0-36", + "content": "[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n[project]\nname = \"graph_of_thoughts\"\nversion = \"0.0.3\"\nauthors = [\n { name=\"Maciej Besta\", email=\"maciej.besta@inf.ethz.ch\" },\n { name=\"Nils Blach\", email=\"nils.blach@inf.ethz.ch\" },\n { name=\"Ales Kubicek\", email=\"akubicek@student.ethz.ch\" },\n { name=\"Robert Gerstenberger\", email=\"gerstenberger.robert@gmail.com\" },\n]\ndescription = \"Python package for Graph of Thoughts that enables solving elaborate problems with Large Language Models\"\nreadme = \"README.md\"\nlicense = {file = \"LICENSE\"}\nrequires-python = \">=3.8\"\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"Operating System :: OS Independent\",\n]\ndependencies = [\n \"backoff>=2.2.1,<3.0.0\",\n \"openai>=1.0.0,<2.0.0\",\n \"matplotlib>=3.7.1,<4.0.0\",\n \"numpy>=1.24.3,<2.0.0\",\n \"pandas>=2.0.3,<3.0.0\",\n \"sympy>=1.12,<2.0\",\n \"torch>=2.0.1,<3.0.0\",\n \"transformers>=4.31.0,<5.0.0\",\n \"accelerate>=0.21.0,<1.0.0\",\n \"bitsandbytes>=0.41.0,<1.0.0\",\n \"scipy>=1.10.1,<2.0.0\",\n]\n[project.urls]\nHomepage = \"https://github.com/spcl/graph-of-thoughts\"" + }, + { + "comment": "The code snippet is a part of a TOML configuration file, specifically defining the \"scripts\" section within the \"project\" block. It sets up an entry point for executable scripts under the project's namespace.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/pyproject.toml\":38-38", + "content": "[project.scripts]" + } + ] +} \ No newline at end of file diff --git a/docs/doc/dff07df9-81f0-4854-b618-a4bbd8d283bd.json b/docs/doc/dff07df9-81f0-4854-b618-a4bbd8d283bd.json new file mode 100644 index 0000000..38c9620 --- /dev/null +++ b/docs/doc/dff07df9-81f0-4854-b618-a4bbd8d283bd.json @@ -0,0 +1,10 @@ +{ + "summary": "The code imports the Parser class from the \"parser\" module in the current package, allowing for easier usage and organization of related functions and classes.", + "details": [ + { + "comment": "The code imports the Parser class from the \"parser\" module in the current package, allowing for easier usage and organization of related functions and classes.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/parser/__init__.py\":0-0", + "content": "from .parser import Parser" + } + ] +} \ No newline at end of file diff --git a/docs/doc/f03e6678-1954-4890-bdb6-f984291581f8.json b/docs/doc/f03e6678-1954-4890-bdb6-f984291581f8.json new file mode 100644 index 0000000..e7d5e48 --- /dev/null +++ b/docs/doc/f03e6678-1954-4890-bdb6-f984291581f8.json @@ -0,0 +1,35 @@ +{ + "summary": "The code reads and sorts JSON data, calculates scores for sorting algorithm performances, plots boxplots, customizes options, adjusts y-axis limits, adds annotations, saves as PDF, and calls function with GPT-3.5 parameters.", + "details": [ + { + "comment": "This code reads a directory of JSON files, extracts their key and data, and stores them in a dictionary. It handles directories recursively and does not include non-JSON files or folders without .json files. This function may be used to collect and organize data from multiple sources.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/plot.py\":0-28", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Robert Gerstenberger\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():" + }, + { + "comment": "Code sorts results by \"key\" and returns them in a new dictionary. The sorted results are then processed to calculate scores for each method, including score, solution status, prompt tokens, completion tokens, and cost.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/plot.py\":29-57", + "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]" + }, + { + "comment": "The code defines a function `get_plotting_data` that extracts and organizes data for plotting. It takes the base directory as input, retrieves complete results from it, then gets final scores. The final scores are organized into a dictionary called `results_plotting`, which contains scores, solved counts, and costs for each method. Another function, `plot_results`, is defined to handle the actual plotting of the data with customizable options. It extracts scores in the specified order, organizes them, and provides customizability such as display settings and annotations.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/plot.py\":58-94", + "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"got\"],\n model=\"GPT-3.5\",\n length=32,\n y_lower=0,\n cost_upper=0.0,\n display_solved=True,\n annotation_offset=0,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n scores_ordered = [\n [\n min(score, length)\n for score in results[method][\"scores\"]" + }, + { + "comment": "This code creates a boxplot to visualize the scores of different methods, sets the ticks and labels, adjusts the y-axis limits based on length, and defines the y-lower limit as y_lower.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/plot.py\":95-130", + "content": " if score != 100 and score != 300\n ]\n for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n method_labels = [\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT\"]\n plt.yticks(fontsize=fig_fontsize)\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(method_labels, fontsize=fig_fontsize)\n y_upper = length\n range_increase = 1\n if display_solved:\n if length < 48:\n range_increase = 2\n elif length < 96:\n range_increase = 4\n else:\n range_increase = 8\n ax.set_ylim(y_lower, y_upper + range_increase)\n ax1_yticks = range(\n y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)" + }, + { + "comment": "Setting the y-tick positions and labels for ax2, setting the y-label for ax2 if display_right_ylabel is True, setting the title of the plot to length elements, setting the lower limit of the y-axis for ax2 if cost_upper > 0, adjusting the y-ticks' values for ax2 based on the number of ticks and the upper cost limit, and finally adding annotations for solved methods.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/plot.py\":131-162", + "content": " )\n ax.set_yticks(ax1_yticks)\n if display_left_ylabel:\n ax.set_ylabel(f\"#incorrectly sorted elements; the lower the better\")\n ax.set_title(f\"{length} elements\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n if cost_upper > 0:\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue" + }, + { + "comment": "The code plots sorting algorithm performance data and displays the solved count for each method. It saves the plot as a PDF with the model name and length appended to its filename. The function is then called again with specific parameters, including GPT-3.5 as the model.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/examples/sorting/plot.py\":163-185", + "content": " solved = results[method][\"solved\"]\n ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"sorting_{model}_{length}.pdf\", bbox_inches=\"tight\")\nplot_results(\n get_plotting_data(\"results/\"),\n length=32,\n display_solved=True,\n model=\"GPT-3.5\",\n display_left_ylabel=True,\n display_right_ylabel=True,\n)" + } + ] +} \ No newline at end of file diff --git a/docs/doc/f7b1b6c4-e340-42de-8dd2-3744278951ef.json b/docs/doc/f7b1b6c4-e340-42de-8dd2-3744278951ef.json new file mode 100644 index 0000000..f1ede0b --- /dev/null +++ b/docs/doc/f7b1b6c4-e340-42de-8dd2-3744278951ef.json @@ -0,0 +1,15 @@ +{ + "summary": "The Controller class manages the execution of a graph of operations using an LLM and requires custom prompter, parser, GoO, and AbstractLanguageModel. The code initializes an instance with these parameters, runs the executor, and outputs the generated graph to file.", + "details": [ + { + "comment": "The Controller class manages the execution of the Graph of Operations (GoO) using a Large Language Model (LLM). It requires custom Prompter and Parser, along with instantiated GraphOfOperations and AbstractLanguageModel. The initial state is represented as a dictionary for prompts in operations.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/controller/README.md\":0-15", + "content": "# Controller\nThe Controller class is responsible for traversing the Graph of Operations (GoO), which is a static structure that is constructed once, before the execution starts.\nGoO prescribes the execution plan of thought operations and the Controller invokes their execution, generating the Graph Reasoning State (GRS). \nIn order for a GoO to be executed, an instance of Large Language Model (LLM) must be supplied to the controller (along with other required objects).\nPlease refer to the [Language Models](../language_models/README.md) section for more information about LLMs. \nThe following section describes how to instantiate the Controller to run a defined GoO. \n## Controller Instantiation\n- Requires custom `Prompter`, `Parser`, as well as instantiated `GraphOfOperations` and `AbstractLanguageModel` - creation of these is described separately.\n- Prepare initial state (thought) as dictionary - this can be used in the initial prompts by the operations.\n```\nlm = ...create\ngraph_of_operations = ...create" + }, + { + "comment": "The code initializes an instance of the Controller class with necessary parameters, including a language model (lm), graph of operations, custom prompter and parser, and an initial state. It then runs the executor and writes the generated graph containing individual operations, thoughts, scores, validity, and token usage to an output file at the specified path.", + "location": "\"/media/root/Toshiba XG3/works/graph-of-thoughts/docs/src/graph_of_thoughts/controller/README.md\":17-27", + "content": "executor = controller.Controller(\n lm,\n graph_of_operations,\n ,\n ,\n ,\n)\nexecutor.run()\nexecutor.output_graph(\"path/to/output.json\")\n```\n- After the run the graph is written to an output file, which contains individual operations, their thoughts, information about scores and validity and total amount of used tokens / cost." + } + ] +} \ No newline at end of file diff --git a/docs/github-markdown.css b/docs/github-markdown.css new file mode 100644 index 0000000..96a4f29 --- /dev/null +++ b/docs/github-markdown.css @@ -0,0 +1,1197 @@ +@media (prefers-color-scheme: dark) { + + .markdown-body, + [data-theme="dark"] { + /*dark*/ + color-scheme: dark; + --color-prettylights-syntax-comment: #8b949e; + --color-prettylights-syntax-constant: #79c0ff; + --color-prettylights-syntax-entity: #d2a8ff; + --color-prettylights-syntax-storage-modifier-import: #c9d1d9; + --color-prettylights-syntax-entity-tag: #7ee787; + --color-prettylights-syntax-keyword: #ff7b72; + --color-prettylights-syntax-string: #a5d6ff; + --color-prettylights-syntax-variable: #ffa657; + --color-prettylights-syntax-brackethighlighter-unmatched: #f85149; + --color-prettylights-syntax-invalid-illegal-text: #f0f6fc; + --color-prettylights-syntax-invalid-illegal-bg: #8e1519; + --color-prettylights-syntax-carriage-return-text: #f0f6fc; + --color-prettylights-syntax-carriage-return-bg: #b62324; + --color-prettylights-syntax-string-regexp: #7ee787; + --color-prettylights-syntax-markup-list: #f2cc60; + --color-prettylights-syntax-markup-heading: #1f6feb; + --color-prettylights-syntax-markup-italic: #c9d1d9; + --color-prettylights-syntax-markup-bold: #c9d1d9; + --color-prettylights-syntax-markup-deleted-text: #ffdcd7; + --color-prettylights-syntax-markup-deleted-bg: #67060c; + --color-prettylights-syntax-markup-inserted-text: #aff5b4; + --color-prettylights-syntax-markup-inserted-bg: #033a16; + --color-prettylights-syntax-markup-changed-text: #ffdfb6; + --color-prettylights-syntax-markup-changed-bg: #5a1e02; + --color-prettylights-syntax-markup-ignored-text: #c9d1d9; + --color-prettylights-syntax-markup-ignored-bg: #1158c7; + --color-prettylights-syntax-meta-diff-range: #d2a8ff; + --color-prettylights-syntax-brackethighlighter-angle: #8b949e; + --color-prettylights-syntax-sublimelinter-gutter-mark: #484f58; + --color-prettylights-syntax-constant-other-reference-link: #a5d6ff; + --color-fg-default: #e6edf3; + --color-fg-muted: #848d97; + --color-fg-subtle: #6e7681; + --color-canvas-default: #0d1117; + --color-canvas-subtle: #161b22; + --color-border-default: #30363d; + --color-border-muted: #21262d; + --color-neutral-muted: rgba(110, 118, 129, 0.4); + --color-accent-fg: #2f81f7; + --color-accent-emphasis: #1f6feb; + --color-success-fg: #3fb950; + --color-success-emphasis: #238636; + --color-attention-fg: #d29922; + --color-attention-emphasis: #9e6a03; + --color-attention-subtle: rgba(187, 128, 9, 0.15); + --color-danger-fg: #f85149; + --color-danger-emphasis: #da3633; + --color-done-fg: #a371f7; + --color-done-emphasis: #8957e5; + } +} + +@media (prefers-color-scheme: light) { + + .markdown-body, + [data-theme="light"] { + /*light*/ + color-scheme: light; + --color-prettylights-syntax-comment: #57606a; + --color-prettylights-syntax-constant: #0550ae; + --color-prettylights-syntax-entity: #6639ba; + --color-prettylights-syntax-storage-modifier-import: #24292f; + --color-prettylights-syntax-entity-tag: #116329; + --color-prettylights-syntax-keyword: #cf222e; + --color-prettylights-syntax-string: #0a3069; + --color-prettylights-syntax-variable: #953800; + --color-prettylights-syntax-brackethighlighter-unmatched: #82071e; + --color-prettylights-syntax-invalid-illegal-text: #f6f8fa; + --color-prettylights-syntax-invalid-illegal-bg: #82071e; + --color-prettylights-syntax-carriage-return-text: #f6f8fa; + --color-prettylights-syntax-carriage-return-bg: #cf222e; + --color-prettylights-syntax-string-regexp: #116329; + --color-prettylights-syntax-markup-list: #3b2300; + --color-prettylights-syntax-markup-heading: #0550ae; + --color-prettylights-syntax-markup-italic: #24292f; + --color-prettylights-syntax-markup-bold: #24292f; + --color-prettylights-syntax-markup-deleted-text: #82071e; + --color-prettylights-syntax-markup-deleted-bg: #ffebe9; + --color-prettylights-syntax-markup-inserted-text: #116329; + --color-prettylights-syntax-markup-inserted-bg: #dafbe1; + --color-prettylights-syntax-markup-changed-text: #953800; + --color-prettylights-syntax-markup-changed-bg: #ffd8b5; + --color-prettylights-syntax-markup-ignored-text: #eaeef2; + --color-prettylights-syntax-markup-ignored-bg: #0550ae; + --color-prettylights-syntax-meta-diff-range: #8250df; + --color-prettylights-syntax-brackethighlighter-angle: #57606a; + --color-prettylights-syntax-sublimelinter-gutter-mark: #8c959f; + --color-prettylights-syntax-constant-other-reference-link: #0a3069; + --color-fg-default: #1F2328; + --color-fg-muted: #656d76; + --color-fg-subtle: #6e7781; + --color-canvas-default: #ffffff; + --color-canvas-subtle: #f6f8fa; + --color-border-default: #d0d7de; + --color-border-muted: hsla(210, 18%, 87%, 1); + --color-neutral-muted: rgba(175, 184, 193, 0.2); + --color-accent-fg: #0969da; + --color-accent-emphasis: #0969da; + --color-success-fg: #1a7f37; + --color-success-emphasis: #1f883d; + --color-attention-fg: #9a6700; + --color-attention-emphasis: #9a6700; + --color-attention-subtle: #fff8c5; + --color-danger-fg: #d1242f; + --color-danger-emphasis: #cf222e; + --color-done-fg: #8250df; + --color-done-emphasis: #8250df; + } +} + +.markdown-body { + -ms-text-size-adjust: 100%; + -webkit-text-size-adjust: 100%; + margin: 0; + color: var(--color-fg-default); + background-color: var(--color-canvas-default); + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji"; + font-size: 16px; + line-height: 1.5; + word-wrap: break-word; +} + +.markdown-body .octicon { + display: inline-block; + fill: currentColor; + vertical-align: text-bottom; +} + +.markdown-body h1:hover .anchor .octicon-link:before, +.markdown-body h2:hover .anchor .octicon-link:before, +.markdown-body h3:hover .anchor .octicon-link:before, +.markdown-body h4:hover .anchor .octicon-link:before, +.markdown-body h5:hover .anchor .octicon-link:before, +.markdown-body h6:hover .anchor .octicon-link:before { + width: 16px; + height: 16px; + content: ' '; + display: inline-block; + background-color: currentColor; + -webkit-mask-image: url("data:image/svg+xml,"); + mask-image: url("data:image/svg+xml,"); +} + +.markdown-body details, +.markdown-body figcaption, +.markdown-body figure { + display: block; +} + +.markdown-body summary { + display: list-item; +} + +.markdown-body [hidden] { + display: none !important; +} + +.markdown-body a { + background-color: transparent; + color: var(--color-accent-fg); + text-decoration: none; +} + +.markdown-body abbr[title] { + border-bottom: none; + -webkit-text-decoration: underline dotted; + text-decoration: underline dotted; +} + +.markdown-body b, +.markdown-body strong { + font-weight: var(--base-text-weight-semibold, 600); +} + +.markdown-body dfn { + font-style: italic; +} + +.markdown-body h1 { + margin: .67em 0; + font-weight: var(--base-text-weight-semibold, 600); + padding-bottom: .3em; + font-size: 2em; + border-bottom: 1px solid var(--color-border-muted); +} + +.markdown-body mark { + background-color: var(--color-attention-subtle); + color: var(--color-fg-default); +} + +.markdown-body small { + font-size: 90%; +} + +.markdown-body sub, +.markdown-body sup { + font-size: 75%; + line-height: 0; + position: relative; + vertical-align: baseline; +} + +.markdown-body sub { + bottom: -0.25em; +} + +.markdown-body sup { + top: -0.5em; +} + +.markdown-body img { + border-style: none; + max-width: 100%; + box-sizing: content-box; + background-color: var(--color-canvas-default); +} + +.markdown-body code, +.markdown-body kbd, +.markdown-body pre, +.markdown-body samp { + font-family: monospace; + font-size: 1em; +} + +.markdown-body figure { + margin: 1em 40px; +} + +.markdown-body hr { + box-sizing: content-box; + overflow: hidden; + background: transparent; + border-bottom: 1px solid var(--color-border-muted); + height: .25em; + padding: 0; + margin: 24px 0; + background-color: var(--color-border-default); + border: 0; +} + +.markdown-body input { + font: inherit; + margin: 0; + overflow: visible; + font-family: inherit; + font-size: inherit; + line-height: inherit; +} + +.markdown-body [type=button], +.markdown-body [type=reset], +.markdown-body [type=submit] { + -webkit-appearance: button; + appearance: button; +} + +.markdown-body [type=checkbox], +.markdown-body [type=radio] { + box-sizing: border-box; + padding: 0; +} + +.markdown-body [type=number]::-webkit-inner-spin-button, +.markdown-body [type=number]::-webkit-outer-spin-button { + height: auto; +} + +.markdown-body [type=search]::-webkit-search-cancel-button, +.markdown-body [type=search]::-webkit-search-decoration { + -webkit-appearance: none; + appearance: none; +} + +.markdown-body ::-webkit-input-placeholder { + color: inherit; + opacity: .54; +} + +.markdown-body ::-webkit-file-upload-button { + -webkit-appearance: button; + appearance: button; + font: inherit; +} + +.markdown-body a:hover { + text-decoration: underline; +} + +.markdown-body ::placeholder { + color: var(--color-fg-subtle); + opacity: 1; +} + +.markdown-body hr::before { + display: table; + content: ""; +} + +.markdown-body hr::after { + display: table; + clear: both; + content: ""; +} + +.markdown-body table { + border-spacing: 0; + border-collapse: collapse; + display: block; + width: max-content; + max-width: 100%; + overflow: auto; +} + +.markdown-body td, +.markdown-body th { + padding: 0; +} + +.markdown-body details summary { + cursor: pointer; +} + +.markdown-body details:not([open])>*:not(summary) { + display: none !important; +} + +.markdown-body a:focus, +.markdown-body [role=button]:focus, +.markdown-body input[type=radio]:focus, +.markdown-body input[type=checkbox]:focus { + outline: 2px solid var(--color-accent-fg); + outline-offset: -2px; + box-shadow: none; +} + +.markdown-body a:focus:not(:focus-visible), +.markdown-body [role=button]:focus:not(:focus-visible), +.markdown-body input[type=radio]:focus:not(:focus-visible), +.markdown-body input[type=checkbox]:focus:not(:focus-visible) { + outline: solid 1px transparent; +} + +.markdown-body a:focus-visible, +.markdown-body [role=button]:focus-visible, +.markdown-body input[type=radio]:focus-visible, +.markdown-body input[type=checkbox]:focus-visible { + outline: 2px solid var(--color-accent-fg); + outline-offset: -2px; + box-shadow: none; +} + +.markdown-body a:not([class]):focus, +.markdown-body a:not([class]):focus-visible, +.markdown-body input[type=radio]:focus, +.markdown-body input[type=radio]:focus-visible, +.markdown-body input[type=checkbox]:focus, +.markdown-body input[type=checkbox]:focus-visible { + outline-offset: 0; +} + +.markdown-body kbd { + display: inline-block; + padding: 3px 5px; + font: 11px ui-monospace, SFMono-Regular, SF Mono, Menlo, Consolas, Liberation Mono, monospace; + line-height: 10px; + color: var(--color-fg-default); + vertical-align: middle; + background-color: var(--color-canvas-subtle); + border: solid 1px var(--color-neutral-muted); + border-bottom-color: var(--color-neutral-muted); + border-radius: 6px; + box-shadow: inset 0 -1px 0 var(--color-neutral-muted); +} + +.markdown-body h1, +.markdown-body h2, +.markdown-body h3, +.markdown-body h4, +.markdown-body h5, +.markdown-body h6 { + margin-top: 24px; + margin-bottom: 16px; + font-weight: var(--base-text-weight-semibold, 600); + line-height: 1.25; +} + +.markdown-body h2 { + font-weight: var(--base-text-weight-semibold, 600); + padding-bottom: .3em; + font-size: 1.5em; + border-bottom: 1px solid var(--color-border-muted); +} + +.markdown-body h3 { + font-weight: var(--base-text-weight-semibold, 600); + font-size: 1.25em; +} + +.markdown-body h4 { + font-weight: var(--base-text-weight-semibold, 600); + font-size: 1em; +} + +.markdown-body h5 { + font-weight: var(--base-text-weight-semibold, 600); + font-size: .875em; +} + +.markdown-body h6 { + font-weight: var(--base-text-weight-semibold, 600); + font-size: .85em; + color: var(--color-fg-muted); +} + +.markdown-body p { + margin-top: 0; + margin-bottom: 10px; +} + +.markdown-body blockquote { + margin: 0; + padding: 0 1em; + color: var(--color-fg-muted); + border-left: .25em solid var(--color-border-default); +} + +.markdown-body ul, +.markdown-body ol { + margin-top: 0; + margin-bottom: 0; + padding-left: 2em; +} + +.markdown-body ol ol, +.markdown-body ul ol { + list-style-type: lower-roman; +} + +.markdown-body ul ul ol, +.markdown-body ul ol ol, +.markdown-body ol ul ol, +.markdown-body ol ol ol { + list-style-type: lower-alpha; +} + +.markdown-body dd { + margin-left: 0; +} + +.markdown-body tt, +.markdown-body code, +.markdown-body samp { + font-family: ui-monospace, SFMono-Regular, SF Mono, Menlo, Consolas, Liberation Mono, monospace; + font-size: 12px; +} + +.markdown-body pre { + margin-top: 0; + margin-bottom: 0; + font-family: ui-monospace, SFMono-Regular, SF Mono, Menlo, Consolas, Liberation Mono, monospace; + font-size: 12px; + word-wrap: normal; +} + +.markdown-body .octicon { + display: inline-block; + overflow: visible !important; + vertical-align: text-bottom; + fill: currentColor; +} + +.markdown-body input::-webkit-outer-spin-button, +.markdown-body input::-webkit-inner-spin-button { + margin: 0; + -webkit-appearance: none; + appearance: none; +} + +.markdown-body .mr-2 { + margin-right: var(--base-size-8, 8px) !important; +} + +.markdown-body::before { + display: table; + content: ""; +} + +.markdown-body::after { + display: table; + clear: both; + content: ""; +} + +.markdown-body>*:first-child { + margin-top: 0 !important; +} + +.markdown-body>*:last-child { + margin-bottom: 0 !important; +} + +.markdown-body a:not([href]) { + color: inherit; + text-decoration: none; +} + +.markdown-body .absent { + color: var(--color-danger-fg); +} + +.markdown-body .anchor { + float: left; + padding-right: 4px; + margin-left: -20px; + line-height: 1; +} + +.markdown-body .anchor:focus { + outline: none; +} + +.markdown-body p, +.markdown-body blockquote, +.markdown-body ul, +.markdown-body ol, +.markdown-body dl, +.markdown-body table, +.markdown-body pre, +.markdown-body details { + margin-top: 0; + margin-bottom: 16px; +} + +.markdown-body blockquote>:first-child { + margin-top: 0; +} + +.markdown-body blockquote>:last-child { + margin-bottom: 0; +} + +.markdown-body h1 .octicon-link, +.markdown-body h2 .octicon-link, +.markdown-body h3 .octicon-link, +.markdown-body h4 .octicon-link, +.markdown-body h5 .octicon-link, +.markdown-body h6 .octicon-link { + color: var(--color-fg-default); + vertical-align: middle; + visibility: hidden; +} + +.markdown-body h1:hover .anchor, +.markdown-body h2:hover .anchor, +.markdown-body h3:hover .anchor, +.markdown-body h4:hover .anchor, +.markdown-body h5:hover .anchor, +.markdown-body h6:hover .anchor { + text-decoration: none; +} + +.markdown-body h1:hover .anchor .octicon-link, +.markdown-body h2:hover .anchor .octicon-link, +.markdown-body h3:hover .anchor .octicon-link, +.markdown-body h4:hover .anchor .octicon-link, +.markdown-body h5:hover .anchor .octicon-link, +.markdown-body h6:hover .anchor .octicon-link { + visibility: visible; +} + +.markdown-body h1 tt, +.markdown-body h1 code, +.markdown-body h2 tt, +.markdown-body h2 code, +.markdown-body h3 tt, +.markdown-body h3 code, +.markdown-body h4 tt, +.markdown-body h4 code, +.markdown-body h5 tt, +.markdown-body h5 code, +.markdown-body h6 tt, +.markdown-body h6 code { + padding: 0 .2em; + font-size: inherit; +} + +.markdown-body summary h1, +.markdown-body summary h2, +.markdown-body summary h3, +.markdown-body summary h4, +.markdown-body summary h5, +.markdown-body summary h6 { + display: inline-block; +} + +.markdown-body summary h1 .anchor, +.markdown-body summary h2 .anchor, +.markdown-body summary h3 .anchor, +.markdown-body summary h4 .anchor, +.markdown-body summary h5 .anchor, +.markdown-body summary h6 .anchor { + margin-left: -40px; +} + +.markdown-body summary h1, +.markdown-body summary h2 { + padding-bottom: 0; + border-bottom: 0; +} + +.markdown-body ul.no-list, +.markdown-body ol.no-list { + padding: 0; + list-style-type: none; +} + +.markdown-body ol[type="a s"] { + list-style-type: lower-alpha; +} + +.markdown-body ol[type="A s"] { + list-style-type: upper-alpha; +} + +.markdown-body ol[type="i s"] { + list-style-type: lower-roman; +} + +.markdown-body ol[type="I s"] { + list-style-type: upper-roman; +} + +.markdown-body ol[type="1"] { + list-style-type: decimal; +} + +.markdown-body div>ol:not([type]) { + list-style-type: decimal; +} + +.markdown-body ul ul, +.markdown-body ul ol, +.markdown-body ol ol, +.markdown-body ol ul { + margin-top: 0; + margin-bottom: 0; +} + +.markdown-body li>p { + margin-top: 16px; +} + +.markdown-body li+li { + margin-top: .25em; +} + +.markdown-body dl { + padding: 0; +} + +.markdown-body dl dt { + padding: 0; + margin-top: 16px; + font-size: 1em; + font-style: italic; + font-weight: var(--base-text-weight-semibold, 600); +} + +.markdown-body dl dd { + padding: 0 16px; + margin-bottom: 16px; +} + +.markdown-body table th { + font-weight: var(--base-text-weight-semibold, 600); +} + +.markdown-body table th, +.markdown-body table td { + padding: 6px 13px; + border: 1px solid var(--color-border-default); +} + +.markdown-body table td>:last-child { + margin-bottom: 0; +} + +.markdown-body table tr { + background-color: var(--color-canvas-default); + border-top: 1px solid var(--color-border-muted); +} + +.markdown-body table tr:nth-child(2n) { + background-color: var(--color-canvas-subtle); +} + +.markdown-body table img { + background-color: transparent; +} + +.markdown-body img[align=right] { + padding-left: 20px; +} + +.markdown-body img[align=left] { + padding-right: 20px; +} + +.markdown-body .emoji { + max-width: none; + vertical-align: text-top; + background-color: transparent; +} + +.markdown-body span.frame { + display: block; + overflow: hidden; +} + +.markdown-body span.frame>span { + display: block; + float: left; + width: auto; + padding: 7px; + margin: 13px 0 0; + overflow: hidden; + border: 1px solid var(--color-border-default); +} + +.markdown-body span.frame span img { + display: block; + float: left; +} + +.markdown-body span.frame span span { + display: block; + padding: 5px 0 0; + clear: both; + color: var(--color-fg-default); +} + +.markdown-body span.align-center { + display: block; + overflow: hidden; + clear: both; +} + +.markdown-body span.align-center>span { + display: block; + margin: 13px auto 0; + overflow: hidden; + text-align: center; +} + +.markdown-body span.align-center span img { + margin: 0 auto; + text-align: center; +} + +.markdown-body span.align-right { + display: block; + overflow: hidden; + clear: both; +} + +.markdown-body span.align-right>span { + display: block; + margin: 13px 0 0; + overflow: hidden; + text-align: right; +} + +.markdown-body span.align-right span img { + margin: 0; + text-align: right; +} + +.markdown-body span.float-left { + display: block; + float: left; + margin-right: 13px; + overflow: hidden; +} + +.markdown-body span.float-left span { + margin: 13px 0 0; +} + +.markdown-body span.float-right { + display: block; + float: right; + margin-left: 13px; + overflow: hidden; +} + +.markdown-body span.float-right>span { + display: block; + margin: 13px auto 0; + overflow: hidden; + text-align: right; +} + +.markdown-body code, +.markdown-body tt { + padding: .2em .4em; + margin: 0; + font-size: 85%; + white-space: break-spaces; + background-color: var(--color-neutral-muted); + border-radius: 6px; +} + +.markdown-body code br, +.markdown-body tt br { + display: none; +} + +.markdown-body del code { + text-decoration: inherit; +} + +.markdown-body samp { + font-size: 85%; +} + +.markdown-body pre code { + font-size: 100%; +} + +.markdown-body pre>code { + padding: 0; + margin: 0; + word-break: normal; + white-space: pre; + background: transparent; + border: 0; +} + +.markdown-body .highlight { + margin-bottom: 16px; +} + +.markdown-body .highlight pre { + margin-bottom: 0; + word-break: normal; +} + +.markdown-body .highlight pre, +.markdown-body pre { + padding: 16px; + overflow: auto; + font-size: 85%; + line-height: 1.45; + color: var(--color-fg-default); + background-color: var(--color-canvas-subtle); + border-radius: 6px; +} + +.markdown-body pre code, +.markdown-body pre tt { + display: inline; + max-width: auto; + padding: 0; + margin: 0; + overflow: visible; + line-height: inherit; + word-wrap: normal; + background-color: transparent; + border: 0; +} + +.markdown-body .csv-data td, +.markdown-body .csv-data th { + padding: 5px; + overflow: hidden; + font-size: 12px; + line-height: 1; + text-align: left; + white-space: nowrap; +} + +.markdown-body .csv-data .blob-num { + padding: 10px 8px 9px; + text-align: right; + background: var(--color-canvas-default); + border: 0; +} + +.markdown-body .csv-data tr { + border-top: 0; +} + +.markdown-body .csv-data th { + font-weight: var(--base-text-weight-semibold, 600); + background: var(--color-canvas-subtle); + border-top: 0; +} + +.markdown-body [data-footnote-ref]::before { + content: "["; +} + +.markdown-body [data-footnote-ref]::after { + content: "]"; +} + +.markdown-body .footnotes { + font-size: 12px; + color: var(--color-fg-muted); + border-top: 1px solid var(--color-border-default); +} + +.markdown-body .footnotes ol { + padding-left: 16px; +} + +.markdown-body .footnotes ol ul { + display: inline-block; + padding-left: 16px; + margin-top: 16px; +} + +.markdown-body .footnotes li { + position: relative; +} + +.markdown-body .footnotes li:target::before { + position: absolute; + top: -8px; + right: -8px; + bottom: -8px; + left: -24px; + pointer-events: none; + content: ""; + border: 2px solid var(--color-accent-emphasis); + border-radius: 6px; +} + +.markdown-body .footnotes li:target { + color: var(--color-fg-default); +} + +.markdown-body .footnotes .data-footnote-backref g-emoji { + font-family: monospace; +} + +.markdown-body .pl-c { + color: var(--color-prettylights-syntax-comment); +} + +.markdown-body .pl-c1, +.markdown-body .pl-s .pl-v { + color: var(--color-prettylights-syntax-constant); +} + +.markdown-body .pl-e, +.markdown-body .pl-en { + color: var(--color-prettylights-syntax-entity); +} + +.markdown-body .pl-smi, +.markdown-body .pl-s .pl-s1 { + color: var(--color-prettylights-syntax-storage-modifier-import); +} + +.markdown-body .pl-ent { + color: var(--color-prettylights-syntax-entity-tag); +} + +.markdown-body .pl-k { + color: var(--color-prettylights-syntax-keyword); +} + +.markdown-body .pl-s, +.markdown-body .pl-pds, +.markdown-body .pl-s .pl-pse .pl-s1, +.markdown-body .pl-sr, +.markdown-body .pl-sr .pl-cce, +.markdown-body .pl-sr .pl-sre, +.markdown-body .pl-sr .pl-sra { + color: var(--color-prettylights-syntax-string); +} + +.markdown-body .pl-v, +.markdown-body .pl-smw { + color: var(--color-prettylights-syntax-variable); +} + +.markdown-body .pl-bu { + color: var(--color-prettylights-syntax-brackethighlighter-unmatched); +} + +.markdown-body .pl-ii { + color: var(--color-prettylights-syntax-invalid-illegal-text); + background-color: var(--color-prettylights-syntax-invalid-illegal-bg); +} + +.markdown-body .pl-c2 { + color: var(--color-prettylights-syntax-carriage-return-text); + background-color: var(--color-prettylights-syntax-carriage-return-bg); +} + +.markdown-body .pl-sr .pl-cce { + font-weight: bold; + color: var(--color-prettylights-syntax-string-regexp); +} + +.markdown-body .pl-ml { + color: var(--color-prettylights-syntax-markup-list); +} + +.markdown-body .pl-mh, +.markdown-body .pl-mh .pl-en, +.markdown-body .pl-ms { + font-weight: bold; + color: var(--color-prettylights-syntax-markup-heading); +} + +.markdown-body .pl-mi { + font-style: italic; + color: var(--color-prettylights-syntax-markup-italic); +} + +.markdown-body .pl-mb { + font-weight: bold; + color: var(--color-prettylights-syntax-markup-bold); +} + +.markdown-body .pl-md { + color: var(--color-prettylights-syntax-markup-deleted-text); + background-color: var(--color-prettylights-syntax-markup-deleted-bg); +} + +.markdown-body .pl-mi1 { + color: var(--color-prettylights-syntax-markup-inserted-text); + background-color: var(--color-prettylights-syntax-markup-inserted-bg); +} + +.markdown-body .pl-mc { + color: var(--color-prettylights-syntax-markup-changed-text); + background-color: var(--color-prettylights-syntax-markup-changed-bg); +} + +.markdown-body .pl-mi2 { + color: var(--color-prettylights-syntax-markup-ignored-text); + background-color: var(--color-prettylights-syntax-markup-ignored-bg); +} + +.markdown-body .pl-mdr { + font-weight: bold; + color: var(--color-prettylights-syntax-meta-diff-range); +} + +.markdown-body .pl-ba { + color: var(--color-prettylights-syntax-brackethighlighter-angle); +} + +.markdown-body .pl-sg { + color: var(--color-prettylights-syntax-sublimelinter-gutter-mark); +} + +.markdown-body .pl-corl { + text-decoration: underline; + color: var(--color-prettylights-syntax-constant-other-reference-link); +} + +.markdown-body g-emoji { + display: inline-block; + min-width: 1ch; + font-family: "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; + font-size: 1em; + font-style: normal !important; + font-weight: var(--base-text-weight-normal, 400); + line-height: 1; + vertical-align: -0.075em; +} + +.markdown-body g-emoji img { + width: 1em; + height: 1em; +} + +.markdown-body .task-list-item { + list-style-type: none; +} + +.markdown-body .task-list-item label { + font-weight: var(--base-text-weight-normal, 400); +} + +.markdown-body .task-list-item.enabled label { + cursor: pointer; +} + +.markdown-body .task-list-item+.task-list-item { + margin-top: 4px; +} + +.markdown-body .task-list-item .handle { + display: none; +} + +.markdown-body .task-list-item-checkbox { + margin: 0 .2em .25em -1.4em; + vertical-align: middle; +} + +.markdown-body .contains-task-list:dir(rtl) .task-list-item-checkbox { + margin: 0 -1.6em .25em .2em; +} + +.markdown-body .contains-task-list { + position: relative; +} + +.markdown-body .contains-task-list:hover .task-list-item-convert-container, +.markdown-body .contains-task-list:focus-within .task-list-item-convert-container { + display: block; + width: auto; + height: 24px; + overflow: visible; + clip: auto; +} + +.markdown-body ::-webkit-calendar-picker-indicator { + filter: invert(50%); +} + +.markdown-body .markdown-alert { + padding: var(--base-size-8) var(--base-size-16); + margin-bottom: 16px; + color: inherit; + border-left: .25em solid var(--color-border-default); +} + +.markdown-body .markdown-alert>:first-child { + margin-top: 0; +} + +.markdown-body .markdown-alert>:last-child { + margin-bottom: 0; +} + +.markdown-body .markdown-alert .markdown-alert-title { + display: flex; + font-weight: var(--base-text-weight-medium, 500); + align-items: center; + line-height: 1; +} + +.markdown-body .markdown-alert.markdown-alert-note { + border-left-color: var(--color-accent-emphasis); +} + +.markdown-body .markdown-alert.markdown-alert-note .markdown-alert-title { + color: var(--color-accent-fg); +} + +.markdown-body .markdown-alert.markdown-alert-important { + border-left-color: var(--color-done-emphasis); +} + +.markdown-body .markdown-alert.markdown-alert-important .markdown-alert-title { + color: var(--color-done-fg); +} + +.markdown-body .markdown-alert.markdown-alert-warning { + border-left-color: var(--color-attention-emphasis); +} + +.markdown-body .markdown-alert.markdown-alert-warning .markdown-alert-title { + color: var(--color-attention-fg); +} + +.markdown-body .markdown-alert.markdown-alert-tip { + border-left-color: var(--color-success-emphasis); +} + +.markdown-body .markdown-alert.markdown-alert-tip .markdown-alert-title { + color: var(--color-success-fg); +} + +.markdown-body .markdown-alert.markdown-alert-caution { + border-left-color: var(--color-danger-emphasis); +} + +.markdown-body .markdown-alert.markdown-alert-caution .markdown-alert-title { + color: var(--color-danger-fg); +} \ No newline at end of file diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..2254d8b --- /dev/null +++ b/docs/index.html @@ -0,0 +1,838 @@ + + + + + + + + + + Search Code By Comment + + + + + + + + + + + + + + + + + + + +
+ +
+
+
+
+
+ + +
+

Document Index of: + +
+ + +
+

+ + +
+
    + +
    + + + + \ No newline at end of file diff --git a/docs/metadata.json b/docs/metadata.json new file mode 100644 index 0000000..834b307 --- /dev/null +++ b/docs/metadata.json @@ -0,0 +1,195 @@ +{ + "url": { + "full": "https://github.com/spcl/graph-of-thoughts", + "partial": "spcl/graph-of-thoughts" + }, + "file_mapping": { + "0": { + "filepath": "/README.md", + "entry_id": 0, + "language_id": "markdown" + }, + "1": { + "filepath": "/pyproject.toml", + "entry_id": 14, + "language_id": "toml" + }, + "2": { + "filepath": "/examples/README.md", + "entry_id": 20, + "language_id": "markdown" + }, + "3": { + "filepath": "/examples/doc_merge/README.md", + "entry_id": 24, + "language_id": "markdown" + }, + "4": { + "filepath": "/examples/doc_merge/doc_merge.py", + "entry_id": 30, + "language_id": "python" + }, + "5": { + "filepath": "/examples/doc_merge/plot.py", + "entry_id": 86, + "language_id": "python" + }, + "6": { + "filepath": "/examples/doc_merge/pure_documents.json", + "entry_id": 100, + "language_id": "json" + }, + "7": { + "filepath": "/examples/keyword_counting/README.md", + "entry_id": 244, + "language_id": "plain-text" + }, + "8": { + "filepath": "/examples/keyword_counting/dataset_gen_countries.py", + "entry_id": 250, + "language_id": "python" + }, + "9": { + "filepath": "/examples/keyword_counting/plot.py", + "entry_id": 280, + "language_id": "python" + }, + "10": { + "filepath": "/examples/set_intersection/README.md", + "entry_id": 294, + "language_id": "markdown" + }, + "11": { + "filepath": "/examples/set_intersection/dataset_gen_intersection.py", + "entry_id": 302, + "language_id": "python" + }, + "12": { + "filepath": "/examples/set_intersection/plot.py", + "entry_id": 310, + "language_id": "python" + }, + "13": { + "filepath": "/examples/set_intersection/utils.py", + "entry_id": 324, + "language_id": "python" + }, + "14": { + "filepath": "/examples/sorting/README.md", + "entry_id": 332, + "language_id": "markdown" + }, + "15": { + "filepath": "/examples/sorting/plot.py", + "entry_id": 338, + "language_id": "python" + }, + "16": { + "filepath": "/examples/sorting/utils.py", + "entry_id": 352, + "language_id": "python" + }, + "17": { + "filepath": "/graph_of_thoughts/controller/README.md", + "entry_id": 360, + "language_id": "markdown" + }, + "18": { + "filepath": "/graph_of_thoughts/controller/__init__.py", + "entry_id": 366, + "language_id": "python" + }, + "19": { + "filepath": "/graph_of_thoughts/controller/controller.py", + "entry_id": 370, + "language_id": "python" + }, + "20": { + "filepath": "/graph_of_thoughts/language_models/README.md", + "entry_id": 384, + "language_id": "markdown" + }, + "21": { + "filepath": "/graph_of_thoughts/language_models/__init__.py", + "entry_id": 404, + "language_id": "python" + }, + "22": { + "filepath": "/graph_of_thoughts/language_models/abstract_language_model.py", + "entry_id": 408, + "language_id": "python" + }, + "23": { + "filepath": "/graph_of_thoughts/language_models/chatgpt.py", + "entry_id": 416, + "language_id": "python" + }, + "24": { + "filepath": "/graph_of_thoughts/language_models/config_template.json", + "entry_id": 432, + "language_id": "json" + }, + "25": { + "filepath": "/graph_of_thoughts/language_models/llamachat_hf.py", + "entry_id": 438, + "language_id": "python" + }, + "26": { + "filepath": "/graph_of_thoughts/operations/README.md", + "entry_id": 450, + "language_id": "markdown" + }, + "27": { + "filepath": "/graph_of_thoughts/operations/__init__.py", + "entry_id": 462, + "language_id": "python" + }, + "28": { + "filepath": "/graph_of_thoughts/operations/graph_of_operations.py", + "entry_id": 466, + "language_id": "python" + }, + "29": { + "filepath": "/graph_of_thoughts/operations/operations.py", + "entry_id": 474, + "language_id": "python" + }, + "30": { + "filepath": "/graph_of_thoughts/operations/thought.py", + "entry_id": 540, + "language_id": "python" + }, + "31": { + "filepath": "/graph_of_thoughts/parser/__init__.py", + "entry_id": 550, + "language_id": "python" + }, + "32": { + "filepath": "/graph_of_thoughts/parser/parser.py", + "entry_id": 554, + "language_id": "python" + }, + "33": { + "filepath": "/graph_of_thoughts/prompter/__init__.py", + "entry_id": 564, + "language_id": "python" + }, + "34": { + "filepath": "/graph_of_thoughts/prompter/prompter.py", + "entry_id": 568, + "language_id": "python" + }, + "35": { + "filepath": "/paper/README.md", + "entry_id": 576, + "language_id": "markdown" + }, + "36": { + "filepath": "/paper/plots.py", + "entry_id": 580, + "language_id": "python" + } + }, + "project_name": "graph-of-thoughts", + "split_count": 7 +} \ No newline at end of file diff --git a/docs/src/README.md b/docs/src/README.md new file mode 100644 index 0000000..dc0db1d --- /dev/null +++ b/docs/src/README.md @@ -0,0 +1,150 @@ +# Graph of Thoughts (GoT) + +

    + +

    + +This is the official implementation of [Graph of Thoughts: Solving Elaborate Problems with Large Language Models](https://arxiv.org/pdf/2308.09687.pdf). +This framework gives you the ability to solve complex problems by modeling them as a Graph of Operations (GoO), which is automatically executed with a Large Language Model (LLM) as the engine. +This framework is designed to be flexible and extensible, allowing you to not only solve problems using the new GoT approach, but also to implement GoOs resembling previous approaches like CoT or ToT. + +## Setup Guide + +In order to use this framework, you need to have a working installation of Python 3.8 or newer. + +### Installing GoT + +Before running either of the following two installation methods, make sure to activate your Python environment (if any) beforehand. +If you are a user and you just want to use `graph_of_thoughts`, you can install it directly from PyPI: +```bash +pip install graph_of_thoughts +``` +If you are a developer and you want to modify the code, you can install it in editable mode from source: +```bash +git clone https://github.com/spcl/graph-of-thoughts.git +cd graph-of-thoughts +pip install -e . +``` + +### Configuring the LLM + +In order to use the framework, you need to have access to an LLM. +Please follow the instructions in the [Controller README](graph_of_thoughts/controller/README.md) to configure the LLM of your choice. + +## Quick Start + +The following code snippet shows how to use the framework to solve the sorting problem for a list of 32 numbers using a CoT-like approach. +Make sure you have followed the [Setup Guide](#setup-guide) before running the code. + +```python +from examples.sorting.sorting_032 import SortingPrompter, SortingParser, utils +from graph_of_thoughts import controller, language_models, operations + +# Problem input + +to_be_sorted = "[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]" + +# Create the Graph of Operations +gop = operations.GraphOfOperations() +gop.append_operation(operations.Generate()) +gop.append_operation(operations.Score(scoring_function=utils.num_errors)) +gop.append_operation(operations.GroundTruth(utils.test_sorting)) + +# Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key) +lm = language_models.ChatGPT("config.json", model_name="chatgpt") + +# Create the Controller +ctrl = controller.Controller( + lm, + gop, + SortingPrompter(), + SortingParser(), + # The following dictionary is used to configure the initial thought state + { + "original": to_be_sorted, + "current": "", + "method": "cot" + } +) + +# Run the Controller and generate the output graph +ctrl.run() +ctrl.output_graph("output_cot.json") +``` + +To run the more sophisticated GoT approach, you can use the following code snippet. + +```python +from examples.sorting.sorting_032 import SortingPrompter, SortingParser, got, utils +from graph_of_thoughts import controller, language_models, operations + +# Problem input + +to_be_sorted = "[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]" + +# Retrieve the Graph of Operations +gop = got() + +# Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key) +lm = language_models.ChatGPT("config.json", model_name="chatgpt") + +# Create the Controller +ctrl = controller.Controller( + lm, + gop, + SortingPrompter(), + SortingParser(), + # The following dictionary is used to configure the initial thought state + { + "original": to_be_sorted, + "current": "", + "phase": 0, + "method": "got" + } +) + +# Run the Controller and generate the output graph +ctrl.run() +ctrl.output_graph("output_got.json") +``` +You can compare the two results by inspecting the output graphs `output_cot.json` and `output_got.json`. +The final thought states' scores indicate the number of errors in the sorted list. + +## Documentation +The paper gives a high-level overview of the framework and its components. +In order to understand the framework in more detail, you can read the documentation of the individual modules. +Especially the [Controller](graph_of_thoughts/controller/README.md) and [Operations](graph_of_thoughts/operations/README.md) modules are important for understanding how to make the most out of the framework. +We took extra care to fully document the code, so that you can easily understand how it works and how to extend it. + +## Examples + +The [examples](examples) directory contains several examples of problems that can be solved using the framework, including the ones presented in the paper. +It is a great starting point for learning how to use the framework to solve real problems. +Each example contains a `README.md` file with instructions on how to run it and play with it. The code is fully documented and should be easy to follow. +You can also run the examples straight from the main directory. Note that the results will be stored in the respective examples sub-directory. + +Try for instance: +```bash +python -m examples.sorting.sorting_032 +python -m examples.keyword_counting.keyword_counting +``` +## Paper Results + +You can run the experiments from the paper by following the instructions in the [examples](examples) directory. +However, if you just want to inspect and replot the results, you can use the [paper](paper) directory. + +## Citations + +If you find this repository valuable, please give it a star! +Got any questions or feedback? Feel free to reach out to [nils.blach@inf.ethz.ch](mailto:nils.blach@inf.ethz.ch) or open an issue. +Using this in your work? Please reference us using the provided citation: + +```bibtex +@misc{besta2023got, + title = {{Graph of Thoughts: Solving Elaborate Problems with Large Language Models}}, + author = {Besta, Maciej and Blach, Nils and Kubicek, Ales and Gerstenberger, Robert and Gianinazzi, Lukas and Gajda, Joanna and Lehmann, Tomasz and Podstawski, Micha{\l} and Niewiadomski, Hubert and Nyczyk, Piotr and Hoefler, Torsten}, + year = 2023, + eprinttype = {arXiv}, + eprint = {2308.09687} +} +``` diff --git a/docs/src/examples/README.md b/docs/src/examples/README.md new file mode 100644 index 0000000..ce7a6f6 --- /dev/null +++ b/docs/src/examples/README.md @@ -0,0 +1,7 @@ +# Examples + +This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example. + +We further include prompt files for each example that can be used to test prompts manually in a console. + +Please refer to the individual example directories for more information on the specific example. diff --git a/docs/src/examples/doc_merge/README.md b/docs/src/examples/doc_merge/README.md new file mode 100644 index 0000000..a1c34a5 --- /dev/null +++ b/docs/src/examples/doc_merge/README.md @@ -0,0 +1,38 @@ +# Document Merging + +The use case in this directory generates new Non-Disclosure Agreement (NDA) based on several input ones that partially overlap in terms of their contents. +We provide implementations of five different approaches: +- IO +- Chain-of-Thought (CoT) +- Tree of Thought (ToT) +- Graph of Thoughts (GoT): + - GoT: aggregation of fully merged NDAs + - GoT2: aggregation of partially merged NDAs + +## Data + +We provide an input file with 50 samples: `documents.csv`. + +## Execution + +The file to execute the use case is called +`doc_merge.py`. In the main body, one can +select the specific samples to be run (variable samples) and the +approaches (variable approaches). It is also possible to set a budget in +dollars (variable budget). + +The Python scripts will create the directory `result`, if it is not +already present. In the `result` directory, another directory is created +for each run: `{name of LLM}_{list of approaches}_{day}_{start time}`. +Inside each execution specific directory two files (`config.json`, +`log.log`) and a separate directory for each selected approach are +created. `config.json` contains the configuration of the run: input data, +selected approaches, name of the LLM, and the budget. `log.log` contains +the prompts and responses of the LLM as well as additional debug data. +The approach directories contain a separate json file for every sample +and the file contains the Graph Reasoning State (GRS) for that sample. + +## Plot Data + +Change the results directory in line 158 of `plot.py` and run `python3 +plot.py` to plot your data. diff --git a/docs/src/examples/doc_merge/doc_merge.py b/docs/src/examples/doc_merge/doc_merge.py new file mode 100644 index 0000000..dd560af --- /dev/null +++ b/docs/src/examples/doc_merge/doc_merge.py @@ -0,0 +1,767 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach + +import os +import re +import logging +import datetime +import json +import csv +from statistics import fmean +from typing import Dict, List, Callable, Set, Union +from graph_of_thoughts import controller, language_models, operations, prompter, parser + + +class DocMergePrompter(prompter.Prompter): + """ + DocMergePrompter provides the generation of prompts specific to the document + merge example for the language models. + + Inherits from the Prompter class and implements its abstract methods. + """ + + merge_doc_prompt_start = """Merge the following {num} NDA documents - into a single NDA, maximizing retained information and minimizing redundancy. Output only the created NDA between the tags and , without any additional text. +Here are NDAs - +""" + merge_doc_prompt_block = """ + +{document} + +""" + + merge_doc_prompt_cot_start = """Merge the following {num} NDA documents - into a single NDA, maximizing retained information and minimizing redundancy. +You can generate any intermediate thoughts and documents you want, but the final output should be the merged NDA, placed between the two tags and . +For instance you might want to follow this approach: +1. Split each NDA into their logical subparts. +2. Merge the subparts of the {num} NDAs. +3. Combine the merged subparts into a single NDA. +4. Place the merged NDA between the tags and . + +Here are NDAs - : +""" + + improve_summary_prompt_start = """The following NDA merges initial NDAs - . +Please improve the summary NDA by adding more information and removing redundancy. Output only the improved NDA, placed between the two tags and , without any additional text. + +Here are NDAs - : +""" + + improve_summary_prompt_block = """ + +{document} + +""" + + improve_summary_prompt_end = """ +Here is the summary NDA : + +{summary} + +""" + + score_prompt_base = """The following NDA merges NDAs - . +Please score the merged NDA in terms of how much redundant information is contained, independent of the original NDAs, as well as how much information is retained from the original NDAs. +A score of 10 for redundancy implies that absolutely no information is redundant, while a score of 0 implies that at least half of the information is redundant (so everything is at least mentioned twice). +A score of 10 for retained information implies that all information from the original NDAs is retained, while a score of 0 implies that no information is retained. +You may provide reasoning for your scoring, but the final score for redundancy should be between the tags and , and the final score for retained information should be between the tags and , without any additional text within any of those tags. + +Here are NDAs - : +""" + + score_prompt_block = """ + +{document} + +""" + + score_prompt_end = """ +Here is the summary NDA : + +{summary} + +""" + + aggregate_full_prompt_base = """The following NDAs - each merge the initial NDAs - . +Combine the merged NDAs - into a new one, maximizing their advantages and overall information retention, while minimizing redundancy. +Output only the new NDA between the tags and , without any additional text. + +Here are the original NDAs - : +""" + + aggregate_full_prompt_block1 = """ + +{document} + +""" + aggregate_full_prompt_mid = """ +Here are the summary NDAs - : +""" + + aggregate_full_prompt_block2 = """ + +{summary} + +""" + + aggregate_sub_prompt_base = """The following NDAs - are summaries of some other NDAs. +Combine them into a new one, make sure to maximize their advantages and overall information retention, while minimizing redundancy. +Output only the new NDA between the tags and , without any additional text. + +Here are NDAs - : +""" + + aggregate_sub_prompt_generate = """ +NDA : +{nda} + +""" + + def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str: + """ + Generate an aggregation prompt for the language model. + + :param state_dicts: The thought states that should be aggregated. + :type state_dicts: List[Dict] + :param kwargs: Additional keyword arguments. + :return: The aggregation prompt. + :rtype: str + """ + + if len(state_dicts[0]["parts"]) > 0 and len(state_dicts[0]["parts"]) < len( + state_dicts[0]["documents"] + ): + prompt = self.aggregate_sub_prompt_base.format( + num_ndas=len(state_dicts), + ) + for i, state_dict in enumerate(state_dicts): + prompt += self.aggregate_sub_prompt_generate.format( + nda=state_dict["current"], num=i + 1 + ) + return prompt + else: + prompt = self.aggregate_full_prompt_base.format( + num_ndas=len(state_dicts[0]["documents"]), + num_ndas_summary=len(state_dicts), + ) + for i, document in enumerate(state_dicts[0]["documents"]): + prompt += self.aggregate_full_prompt_block1.format( + document=document, num=i + 1 + ) + prompt += self.aggregate_full_prompt_mid.format( + num_ndas_summary=len(state_dicts), + ) + for i, state_dict in enumerate(state_dicts): + prompt += self.aggregate_full_prompt_block2.format( + summary=state_dict["current"], num=i + 1 + ) + return prompt + + def generate_prompt( + self, + num_branches: int, + documents: List[str], + method: str, + parts: Set[str], + current: str, + **kwargs, + ) -> str: + """ + Generate a generate prompt for the language model. + + :param num_branches: The number of responses the prompt should ask the LM to generate. + :type num_branches: int + :param documents: The list of documents to be merged. + :type documents: List[str] + :param method: Method for which the generate prompt is generated. + :type method: str + :param parts: Indices of the already processed document parts. + :type parts: Set[str] + :param current: The intermediate solution. + :type current: str + :param kwargs: Additional keyword arguments. + :return: The generate prompt. + :rtype: str + :raise AssertionError: If method is not implemented yet. + """ + + prompt = "" + if method.startswith("io") or method.startswith("cot"): + if method.startswith("io"): + prompt += self.merge_doc_prompt_start.format(num=len(documents)) + else: + prompt += self.merge_doc_prompt_cot_start.format(num=len(documents)) + for i, document in enumerate(documents): + prompt += self.merge_doc_prompt_block.format( + document=document, num=i + 1 + ) + return prompt + elif method.startswith("tot"): + if current is None or current == "": + prompt += self.merge_doc_prompt_start.format(num=len(documents)) + for i, document in enumerate(documents): + prompt += self.merge_doc_prompt_block.format( + document=document, num=i + 1 + ) + return prompt + else: + prompt += self.improve_summary_prompt_start.format( + num=len(documents), + ) + for i, document in enumerate(documents): + prompt += self.improve_summary_prompt_block.format( + document=document, num=i + 1 + ) + prompt += self.improve_summary_prompt_end.format(summary=current) + return prompt + elif method.startswith("got"): + parts = ( + sorted(list(parts)) if len(parts) > 0 else list(range(len(documents))) + ) + if current is None or current == "": + prompt += self.merge_doc_prompt_start.format(num=len(parts)) + for i, part in enumerate(sorted(list(parts))): + prompt += self.merge_doc_prompt_block.format( + document=documents[part], num=i + 1 + ) + return prompt + else: + prompt += self.improve_summary_prompt_start.format( + num=len(parts), + ) + for i, part in enumerate(sorted(list(parts))): + prompt += self.improve_summary_prompt_block.format( + document=documents[part], num=i + 1 + ) + prompt += self.improve_summary_prompt_end.format(summary=current) + return prompt + else: + assert False, "Not implemented yet." + + def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str: + """ + Generate a score prompt for the language model. + + :param state_dicts: The thought states that should be scored, + if more than one, they should be scored together. + :type state_dicts: List[Dict] + :param kwargs: Additional keyword arguments. + :return: The score prompt. + :rtype: str + :raise AssertionError: If more than one thought state is supplied. + """ + + if len(state_dicts) > 1: + assert False, "Not implemented yet." + else: + # perform individual scoring + parts = ( + [ + state_dicts[0]["documents"][part] + for part in sorted(list(state_dicts[0]["parts"])) + ] + if len(state_dicts[0]["parts"]) > 0 + else state_dicts[0]["documents"] + ) + prompt = self.score_prompt_base.format( + num=len(parts), + ) + for i, part in enumerate(parts): + prompt += self.score_prompt_block.format(document=part, num=i + 1) + prompt += self.score_prompt_end.format( + summary=state_dicts[0]["current"], + ) + return prompt + + def improve_prompt(self, **kwargs) -> str: + """ + Generate an improve prompt for the language model. + + :param kwargs: Additional keyword arguments. + :return: The improve prompt. + :rtype: str + """ + pass + + def validation_prompt(self, **kwargs) -> str: + """ + Generate a validation prompt for the language model. + + :param kwargs: Additional keyword arguments. + :return: The validation prompt. + :rtype: str + """ + pass + + +class DocMergeParser(parser.Parser): + """ + DocMergeParser provides the parsing of language model reponses specific to the + document merge example. + + Inherits from the Parser class and implements its abstract methods. + """ + + def __init__(self) -> None: + """ + Inits the response cache. + """ + self.cache = {} + + def strip_answer_helper(self, text: str, tag: str = "") -> str: + """ + Helper function to remove tags from a text. + + :param text: The input text. + :type text: str + :param tag: The tag to be stripped. Defaults to "". + :type tag: str + :return: The stripped text. + :rtype: str + """ + + text = text.strip() + if "Output:" in text: + text = text[text.index("Output:") + len("Output:") :].strip() + if tag != "": + start = text.rfind(f"<{tag}>") + end = text.rfind(f"") + if start != -1 and end != -1: + text = text[start + len(f"<{tag}>") : end].strip() + elif start != -1: + logging.warning( + f"Only found the start tag <{tag}> in answer: {text}. Returning everything after the tag." + ) + text = text[start + len(f"<{tag}>") :].strip() + elif end != -1: + logging.warning( + f"Only found the end tag in answer: {text}. Returning everything before the tag." + ) + text = text[:end].strip() + else: + logging.warning( + f"Could not find any tag {tag} in answer: {text}. Returning the full answer." + ) + return text + + def parse_aggregation_answer( + self, states: List[Dict], texts: List[str] + ) -> Union[Dict, List[Dict]]: + """ + Parse the response from the language model for an aggregation prompt. + + :param states: The thought states used to generate the prompt. + :type states: List[Dict] + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: The new thought states after parsing the respones from the language model. + :rtype: Union[Dict, List[Dict]] + """ + + new_states = [] + for text in texts: + if len(states[0]["parts"]) < len(states[0]["documents"]): + # subpart aggregation + text = self.strip_answer_helper(text, "Merged") + new_state = states[0].copy() + new_state["current"] = text + new_state["parts"] = set() + for state in states: + new_state["parts"] = new_state["parts"] | state["parts"] + + new_states.append(new_state) + else: + # full NDA aggregation + text = self.strip_answer_helper(text, "Merged") + new_state = states[0].copy() + new_state["current"] = text + new_states.append(new_state) + return new_states + + def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]: + """ + Parse the response from the language model for a generate prompt. + + :param state: The thought state used to generate the prompt. + :type state: Dict + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: The new thought states after parsing the respones from the language model. + :rtype: List[Dict] + """ + new_states = [] + for text in texts: + text = self.strip_answer_helper(text, "Merged") + new_state = state.copy() + new_state["current"] = text + new_states.append(new_state) + return new_states + + def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]: + """ + Parse the response from the language model for a score prompt. + + :param states: The thought states used to generate the prompt. + :type states: List[Dict] + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: The scores for the thought states. + :rtype: List[float] + :raise AssertionError: If the number of thought states is not one. + """ + assert len(states) == 1, "Only one state is allowed for scoring." + if len(states) == 1: + # individual scoring + redundancy_scores = [] + retain_scores = [] + for text in texts: + answer = self.strip_answer_helper(text, "Redundancy") + res = re.findall(r"\d+\.?\d*", answer) + if len(res) == 1: + redundancy_scores.append(float(res[0])) + elif len(res) > 1: + logging.warning( + f"Found multiple redundancy scores in answer: {text}. Returning the last one." + ) + redundancy_scores.append(float(res[-1])) + else: + logging.warning( + f"Could not find any redundancy score in answer: {text}. Ignoring this answer." + ) + answer = self.strip_answer_helper(text, "Retained") + res = re.findall(r"\d+\.?\d*", answer) + if len(res) == 1: + retain_scores.append(float(res[0])) + elif len(res) > 1: + logging.warning( + f"Found multiple retained scores in answer: {text}. Returning the last one." + ) + retain_scores.append(float(res[-1])) + else: + logging.warning( + f"Could not find any retained score in answer: {text}. Ignoring this answer." + ) + if len(redundancy_scores) == 0 or len(retain_scores) == 0: + logging.warning( + f"Could not find any valid score in any answer. Returning 0.0." + ) + return [0.0] + mean_redundancy = fmean(redundancy_scores) + mean_retain = fmean(retain_scores) + f1 = 2 * mean_redundancy * mean_retain / (mean_redundancy + mean_retain) + return [f1] + + def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict: + """ + Parse the response from the language model for an improve prompt. + + :param state: The thought state used to generate the prompt. + :type state: Dict + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: The new thought state after parsing the responses from the language model. + :rtype: Dict + """ + pass + + def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool: + """ + Parse the response from the language model for a validation prompt. + + :param state: The thought state used to generate the prompt. + :type state: Dict + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: Whether the thought state is valid or not. + :rtype: bool + """ + pass + + +def io() -> operations.GraphOfOperations: + """ + Generates the Graph of Operations for the IO method. + + :return: Graph of Operations + :rtype: GraphOfOperations + """ + operations_graph = operations.GraphOfOperations() + + operations_graph.append_operation(operations.Generate(1, 1)) + operations_graph.append_operation(operations.Score(3, False)) + + return operations_graph + + +def cot() -> operations.GraphOfOperations: + """ + Generates the Graph of Operations for the CoT method. + + :return: Graph of Operations + :rtype: GraphOfOperations + """ + operations_graph = operations.GraphOfOperations() + + operations_graph.append_operation(operations.Generate(1, 1)) + operations_graph.append_operation(operations.Score(3, False)) + + return operations_graph + + +def tot() -> operations.GraphOfOperations: + """ + Generates the Graph of Operations for the ToT method. + + :return: Graph of Operations + :rtype: GraphOfOperations + """ + operations_graph = operations.GraphOfOperations() + + branch_factor = 10 + + operations_graph.append_operation(operations.Generate(1, branch_factor)) + operations_graph.append_operation(operations.Score(3, False)) + keep_best_1 = operations.KeepBestN(1, True) + operations_graph.append_operation(keep_best_1) + + for _ in range(2): + operations_graph.append_operation(operations.Generate(1, branch_factor)) + operations_graph.append_operation(operations.Score(3, False)) + keep_best_2 = operations.KeepBestN(1, True) + keep_best_2.add_predecessor(keep_best_1) + operations_graph.append_operation(keep_best_2) + keep_best_1 = keep_best_2 + + return operations_graph + + +def got() -> operations.GraphOfOperations: + """ + Generates the Graph of Operations for the GoT method, where full documents + are merged. + + :return: Graph of Operations + :rtype: GraphOfOperations + """ + operations_graph = operations.GraphOfOperations() + + operations_graph.append_operation(operations.Generate(1, 5)) + operations_graph.append_operation(operations.Score(3, False)) + keep_best = operations.KeepBestN(3, True) + operations_graph.append_operation(keep_best) + operations_graph.append_operation(operations.Aggregate(5)) + operations_graph.append_operation(operations.Score(3, False)) + keep_best2 = operations.KeepBestN(1, True) + keep_best2.add_predecessor(keep_best) + operations_graph.append_operation(keep_best2) + operations_graph.append_operation(operations.Generate(1, 10)) + operations_graph.append_operation(operations.Score(3, False)) + keep_best3 = operations.KeepBestN(1, True) + keep_best3.add_predecessor(keep_best2) + operations_graph.append_operation(keep_best3) + + return operations_graph + + +def got2() -> operations.GraphOfOperations: + """ + Generates the Graph of Operations for the GoT2 method, where partial + documents are merged. + + :return: Graph of Operations + :rtype: GraphOfOperations + """ + operations_graph = operations.GraphOfOperations() + + sub_parts = [] + for i in range(0, 4, 2): # should be at most 16 parts + sub_text = operations.Selector( + lambda thoughts, list_id=i: [ + operations.Thought( + state={**thoughts[0].state, "parts": {list_id, list_id + 1}} + ) + ] + ) + operations_graph.add_operation(sub_text) + gen_nda = operations.Generate(1, 5) + gen_nda.add_predecessor(sub_text) + operations_graph.add_operation(gen_nda) + score_nda = operations.Score(3, False) + score_nda.add_predecessor(gen_nda) + operations_graph.add_operation(score_nda) + keep_best_nda = operations.KeepBestN(1, True) + keep_best_nda.add_predecessor(score_nda) + operations_graph.add_operation(keep_best_nda) + + sub_parts.append(keep_best_nda) + + while len(sub_parts) > 1: + new_sub_parts = [] + for i in range(0, len(sub_parts), 2): + if i + 1 == len(sub_parts): + new_sub_parts.append(sub_parts[i]) + continue + aggregate = operations.Aggregate(5) + aggregate.add_predecessor(sub_parts[i]) + aggregate.add_predecessor(sub_parts[i + 1]) + operations_graph.add_operation(aggregate) + score = operations.Score(3, False) + score.add_predecessor(aggregate) + operations_graph.add_operation(score) + keep_best = operations.KeepBestN(1, True) + keep_best.add_predecessor(score) + operations_graph.add_operation(keep_best) + + gen_nda = operations.Generate(1, 5) + gen_nda.add_predecessor(keep_best) + operations_graph.add_operation(gen_nda) + score_nda = operations.Score(3, False) + score_nda.add_predecessor(gen_nda) + operations_graph.add_operation(score_nda) + keep_best_nda = operations.KeepBestN(1, True) + keep_best_nda.add_predecessor(score_nda) + keep_best_nda.add_predecessor(keep_best) + operations_graph.add_operation(keep_best_nda) + + new_sub_parts.append(keep_best_nda) + sub_parts = new_sub_parts + + return operations_graph + + +def run( + data_ids: List[int], + methods: List[Callable[[], operations.GraphOfOperations]], + budget: float, + lm_name: str, +) -> float: + """ + Controller function that executes each specified method for each specified + sample while the budget is not exhausted. + + :param data_ids: Indices of the sample to be run. + :type data_ids: List[int] + :param methods: List of functions to generate Graphs of Operations. + :type methods: Each function generates a Graph of Operation. + :param budget: Language model budget for the execution in dollars. + :type budget: float + :param lm_name: Name of the language model to be used. + :type lm_name: str + :return: Spent budget in dollars. + :rtype: float + """ + + orig_budget = budget + data_path = os.path.join(os.path.dirname(__file__), "documents.csv") + data = [] + with open(data_path, "r", encoding="utf8") as f: + reader = csv.reader(f) + next(reader) + for row in reader: + row[0] = int(row[0]) + data.append(row) + + if data_ids is None or len(data_ids) == 0: + data_ids = list(range(len(data))) + selected_data = [data[i] for i in data_ids] + + results_dir = os.path.join(os.path.dirname(__file__), "results") + + if not os.path.exists(results_dir): + os.makedirs(results_dir) + timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + extra_info = f"{lm_name}_{'-'.join([method.__name__ for method in methods])}" + folder_name = f"{extra_info}_{timestamp}" + results_folder = os.path.join(results_dir, folder_name) + os.makedirs(results_folder) + + config = { + "data": selected_data, + "methods": [method.__name__ for method in methods], + "lm": lm_name, + "budget": budget, + } + with open(os.path.join(results_folder, "config.json"), "w") as f: + json.dump(config, f) + + logging.basicConfig( + filename=os.path.join(results_folder, "log.log"), + filemode="w", + format="%(name)s - %(levelname)s - %(message)s", + level=logging.DEBUG, + ) + + for method in methods: + os.makedirs(os.path.join(results_folder, method.__name__)) + + for data in selected_data: + logging.info(f"Running data {data[0]}: {data[1]}") + if budget <= 0.0: + logging.error( + f"Budget has been depleted, stopping. Data {data[0]} has not been run." + ) + break + for method in methods: + logging.info(f"Running method {method.__name__}") + logging.info(f"Budget left: {budget}") + if budget <= 0.0: + logging.error( + f"Budget has been depleted, stopping. Method {method.__name__} has not been run." + ) + break + lm = language_models.ChatGPT( + os.path.join( + os.path.dirname(__file__), + "../../graph_of_thoughts/language_models/config.json", + ), + model_name=lm_name, + cache=True, + ) + operations_graph = method() + executor = controller.Controller( + lm, + operations_graph, + DocMergePrompter(), + DocMergeParser(), + { + "documents": [data[2], data[3], data[4], data[5]], + "parts": set(), + "current": "", + "method": method.__name__, + }, + ) + try: + executor.run() + except Exception as e: + logging.error(f"Exception: {e}") + path = os.path.join( + results_folder, + method.__name__, + f"{data[0]}.json", + ) + for operation in operations_graph.operations: + for thought in operation.thoughts: + thought.state["parts"] = list(thought.state["parts"]) + executor.output_graph(path) + budget -= lm.cost + + return orig_budget - budget + + +if __name__ == "__main__": + """ + Input (x1, x2, x3, x4): Four NDAs + Output (y): A new combined NDA + Evaluation: According to information coverage without repetition (scored by the LLM) + """ + budget = 30 + samples = [item for item in range(0, 50)] + approaches = [io, cot, tot, got, got2] + + spent = run(samples, approaches, budget, "chatgpt") + + logging.info(f"Spent {spent} out of {budget} budget.") diff --git a/docs/src/examples/doc_merge/plot.py b/docs/src/examples/doc_merge/plot.py new file mode 100644 index 0000000..02495e2 --- /dev/null +++ b/docs/src/examples/doc_merge/plot.py @@ -0,0 +1,170 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach + +import json +import os +import matplotlib.pyplot as plt + + +def get_complete_results(base_directory): + results_complete = {} + for folder_name in os.listdir(base_directory): + folder_path = os.path.join(base_directory, folder_name) + if os.path.isdir(folder_path): + results_complete[folder_name] = [] + for file_name in os.listdir(folder_path): + if file_name.endswith(".json"): + file_path = os.path.join(folder_path, file_name) + with open(file_path, "r") as f: + data = json.load(f) + results_complete[folder_name].append( + {"key": int(file_name.split(".")[0]), "data": data} + ) + for key in results_complete.keys(): + results_complete[key] = sorted( + results_complete[key], key=lambda x: x["key"] + ) + return results_complete + + +def get_final_scores(results_complete): + scores = {} + for method in results_complete.keys(): + scores[method] = [] + for result in results_complete[method]: + score = 0 + solved = False + cost = 1 + prompt_tokens = 0 + completion_tokens = 0 + for op in reversed(result["data"]): + if "cost" in op: + cost = op["cost"] + prompt_tokens = op["prompt_tokens"] + completion_tokens = op["completion_tokens"] + if "operation" in op and op["operation"] == "score": + try: + score = max(op["scores"]) + break + except: + continue + scores[method].append( + [result["key"], score, solved, prompt_tokens, completion_tokens, cost] + ) + scores[method] = sorted(scores[method], key=lambda x: x[0]) + return scores + + +def get_plotting_data(base_directory): + results_complete = get_complete_results(base_directory) + scores = get_final_scores(results_complete) + results_plotting = { + method: { + "scores": [x[1] for x in scores[method]], + "solved": sum([1 for x in scores[method] if x[2]]), + "costs": [x[5] for x in scores[method]], + } + for method in scores.keys() + } + return results_plotting + + +def plot_results( + results, + methods_order=["io", "cot", "tot", "got", "got2"], + model="GPT-3.5", + num_ndas=4, + y_lower=0, + y_upper=10, + cost_upper=1.8, + display_solved=True, + annotation_offset=1, + display_left_ylabel=False, + display_right_ylabel=False, +): + methods_order = [method for method in methods_order if method in results] + scores_ordered = [ + [score for score in results[method]["scores"]] for method in methods_order + ] + total_costs = [sum(results[method]["costs"]) for method in methods_order] + + # Create figure and axis + fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5)) + + # Create boxplots + positions = range(1, len(methods_order) + 1) + ax.boxplot(scores_ordered, positions=positions) + + fig_fontsize = 12 + + # Set the ticks and labels + methods_labels = ["IO", "CoT", "ToT", "GoT", "GoT2"] + ax.set_xticks(range(1, len(methods_order) + 1)) + ax.set_xticks(range(1, len(methods_order) + 1)) + ax.set_xticklabels(methods_labels) + # ax.set_xlabel("Approach") + + ax.set_ylim(y_lower, 12 if display_solved else 9.75) + plt.yticks(fontsize=fig_fontsize) + + if display_left_ylabel: + ax.set_ylabel( + f"Score (out of 10); the higher the better", fontsize=fig_fontsize + ) + + # ax.set_title(f"Document Merging") + + ax2 = ax.twinx() + ax2.bar( + positions, + total_costs, + alpha=0.5, + color="blue", + label="Total Cost ($); the lower the better", + ) + ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize) + ax2.set_ylim(0, cost_upper) + number_of_ticks = len(ax.get_yticks()) + tick_interval = cost_upper / (number_of_ticks) + ax2_ticks = [tick_interval * i for i in range(number_of_ticks)] + + # Set custom tick positions for ax2 + ax2.set_yticks(ax2_ticks) + + if display_right_ylabel: + ax2.set_ylabel( + "Total Cost ($); the lower the better", + color="#1919ff", + fontsize=fig_fontsize, + ) + + if display_solved: + annotation_height = y_upper + annotation_offset + count = 1 + for method in methods_order: + if method not in results: + continue + solved = results[method]["solved"] + ax.text( + count, annotation_height, f"Solved: {solved}", ha="center", va="bottom" + ) + count += 1 + + model = model.replace(".", "").replace("-", "").lower() + fig.savefig(f"doc_merge_{model}_{num_ndas}.pdf", bbox_inches="tight") + + +plot_results( + get_plotting_data("results/"), + num_ndas=4, + display_solved=False, + model="GPT-3.5", + y_upper=10, + display_left_ylabel=True, + cost_upper=15, +) diff --git a/docs/src/examples/doc_merge/pure_documents.json b/docs/src/examples/doc_merge/pure_documents.json new file mode 100644 index 0000000..b40a3a6 --- /dev/null +++ b/docs/src/examples/doc_merge/pure_documents.json @@ -0,0 +1,52 @@ +[ + "NON-DISCLOSURE AGREEMENT (NDA)\n\n1. Agreement between [Your Company Name] and [Recipient Name] on [Date].\n2. Information sharing for the purpose of [specific project or purpose].\n3. \"Confidential Information\" includes all potentially commercially valuable information, specifically software development tactics, processes, and in-house research results.\n4. Receiving party is obligated to protect the Confidential Information, use it solely for the disclosed purpose, and not disclose it without consent.\n5. Breach penalties include injunctive relief, other remedies, and a $200,000 fee per breach.\n6. The Agreement applies to the Parties and their successors and assigns. It contains all related agreements and lack of enforcement doesn't imply waiver.\n7. The Agreement is under the laws of [State].\n8. Signed by [Your Company Name] and [Recipient Name] at the above date.", + "NON-DISCLOSURE AGREEMENT (NDA)\n\nEffective from [Effective Date], this NDA involves [Your Company Name] (\"Disclosing Party\"), and [Recipient Name] (\"Receiving Party\").\n\n1. Purpose: The Disclosing Party will disclose confidential information related to [Topic of Research] to the Receiving Party for [Purpose].\n\n2. Confidential Information: Defined as all non-public reports, data, designs, and other materials provided by the Disclosing Party to the Receiving Party.\n\n3. Receiving Party's Obligations:\n a. Use, reproduce, or distribute the confidential information only for the agreed purpose.\n b. Restrict access to the information to necessary parties, ensuring they abide by strict confidentiality.\n c. Return or destroy all confidential information upon request or at the end of the agreement.\n\n4. Exclusions: Information will not be classified as confidential if it is already known to the Receiving Party, publicly known, or independently developed by the Receiving Party.\n\n5. Non-Competition: The Receiving Party will not engage in any competing business against the Disclosing Party during the agreement and one year after its termination.\n\n6. Term and Termination: The agreement is valid for [e.g., \"two years\"], unless terminated earlier with [e.g., \"30 days\"] written notice. The Receiving Party's non-disclosure and non-competition obligations persist post-termination.\n\n7. General Provisions:\n a. Governing Law: [Your State]'s laws apply.\n b. Amendments: Only valid if written and signed by both parties.\n c. Entire Agreement: This contract overrules previous related agreements.\n\nSigned as of the Effective Date by [Your Company Name] - Disclosing Party [Recipient Name] - Receiving Party.", + "CONFIDENTIALITY & NON-DISCLOSURE AGREEMENT\n\n Entities Involved:\n Effective [Date], between [AquaBlue Innovations], established in [State], and [PineTree Solutions], a registered entity.\n\n Objective:\n To safeguard classified data during talks of a potential technological alliance.\n\n Specification of Protected Information:\n Particularly:\n\na. System designs and architectural schematics.\nb. Proprietary computational algorithms.\n\n Receiver's Obligations:\n a. Maintain strict non-disclosure using best practices.\n b. Employ solely for the aforementioned aim.\n c. No unveiling without explicit authorization.\n\n Violation Ramifications:\n A charge of $280,000 for every infringement, plus possible legal proceedings.\n\n General Terms:\n Binding for both parties and any successors. This encapsulates the entire accord.\n\n Legal Reference:\n Governed as per [State]'s legal framework.\n\n Attestation:\n Duly signed on [Date].\n\n[AquaBlue Innovations] [PineTree Solutions]", + "SECRECY & DISCLOSURE AGREEMENT\n\n Contracting Parties:\n Dated [Date], drawn between [AquaBlue Innovations], a [State]-based corporation, and [PineTree Solutions], a licensed organization.\n\n Aim:\n To protect exclusive insights amidst dialogues for a technological partnership.\n\n Categorization of Sensitive Data:\n Includes:\n\na. Internal software blueprints.\nb. Intellectual property awaiting patents.\n\n Commitments of Recipient:\n a. Uphold confidentiality, ensuring data integrity.\n b. Utilize strictly for collaborative ventures.\n c. No exposure without prior consensus.\n\n Repercussions for Non-Compliance:\n $295,000 fine for each transgression, and the option for legal recourse.\n\n Overall Provisions:\n Legally enforceable for signatories and successors. Complete and sole agreement.\n\n Juridical Standpoint:\n Under the auspices of [State] laws.\n\n Ratification:\n Confirmed and endorsed on [Date].\n\n[AquaBlue Innovations] [PineTree Solutions]", + "This Non-Disclosure and Non-Competition Agreement is made between [Your Company Name] and [Contractor Name/Company].\n\n1. Confidentiality: The Contractor acknowledges access to the Company's confidential information during their relationship.\n\n2. Non-Disclosure: The Contractor agrees not to disclose, use, reproduce, or distribute this confidential information unless necessary for their obligations.\n\n3. Non-Competition: The Contractor agrees not to compete with the company or assist others in doing so for one year after the termination of their relationship. They also agree not to solicit the company's clients or customers for the benefit of a competitor for one year.\n\n4. Return of Confidential Information: At the end of the relationship or upon the company's request, the Contractor will return all confidential information and copies thereof.\n\n5. Remedies: For any breach, the Company may seek specific performance and injunctive relief, in addition to other remedies.\n\n6. Governing Law: The Agreement is governed by the laws of [Your State].\n\n7. Entire Agreement: This document replaces all previous agreements and understandings on the subject.\n\nBoth parties acknowledge understanding and voluntarily accepting the Agreement.\n\nSignatures required from [Your Company Name] and [Contractor Name/Company].", + "This Loyalty Agreement is between [Company Name] and [Employee Full Name], where the company agrees to provide specialized training at no cost to the employee, who in turn commits to work for the company for a specified period. If the employee leaves the company within two years after completing training, they must pay $50,000 as compensation for training costs, payable within 30 days of termination. Exceptions to this repayment include termination without cause, resignation due to breach of agreement by the company, or other agreed upon circumstances. Any changes to this agreement must be in writing and signed by both parties, and the agreement will be governed by the laws of [State/Country]. This agreement is binding to all involved parties and their successors. Both the company and the employee sign to attest to these terms.", + "EMPLOYEE LOYALTY AGREEMENT\n\nThis agreement is entered into by [Company Name] and [Employee Name] to protect the company's business interests, goodwill, and confidential information, and affirm employee's loyalty. \n\n1. Non-disclosure: Employee agrees to not disclose or use company's confidential information, during or post-employment. \n\n2. Non-competition: Employee will not work for or establish a competitor within [e.g., \"50\"] miles from the company for [e.g., \"12\"] months post-employment.\n\n3. Non-solicitation: Employee will not solicit clients or employees of the company for [e.g., \"12\"] months post-employment.\n\n4. Return of Property: Employee will return all company property upon termination.\n\n5. Remedies: Company can seek injunction for a breach or potential breach of this agreement.\n\n6. Severability: If any provision of this agreement is held invalid, the remainder of the Agreement will continue.\n\n7. Governing Law: This agreement will be governed by the laws of [State, e.g., \"California\"].\n\n8. Agreement: This is the entire agreement and supersedes prior negotiations.\n\n9. Amendments: Any changes must be in writing and signed by both parties.\n\nSignatures of both parties indicate agreement to these terms.\n\n[Company Name] - Authorized Signatory [Employee Name]", + "This Loyalty Agreement is between [Company Name] and [Contractor Company Name]. The Agreement ensures the Contractor's loyalty and confidentiality towards the Company during and post engagement. Contractor agrees not to use or disclose the Company's confidential information, or engage in competing business or solicitation for a period of [e.g., \"12\"] months post termination. Contractor must return all Company property upon termination. In case of breach, Company can seek legal remedies including injunction. The Agreement remains valid even if a provision is held invalid. The Agreement follows [State, e.g., \"California\"] laws and replaces all previous understandings. It can be amended only in writing with both parties' signature.", + "B2B CONTRACTOR LOYALTY AGREEMENT\n\nThis Agreement is made on _____ day of ______, 20, between [Company Name], located at [Company Address] (\"Company\"), and [Contractor Company Name], located at [Contractor Address] (\"Contractor\").\n\n1. CONFIDENTIALITY\n\nContractor agrees not to disclose, use, or allow the use of the Company's confidential information during or after the relationship, except as required for their services to the Company.\n\n2. NON-COMPETITION\n\nFor 12 months post-relationship, the Contractor won't provide similar services to any entity competing with the Company within a 50-mile radius of any Company location.\n\n3. NON-SOLICITATION\n\nFor 12 months post-relationship, the Contractor won't solicit or induce any entity or individual connected to the Company to cease or reduce their relationship with the Company.\n\n4. RETURN OF PROPERTY\n\nUpon relationship termination or on Company's request, the Contractor will immediately return all Company property and data.\n\n5. PENALTY FOR BREACH\n\nIn the event of a breach of this Agreement, the Contractor shall pay the Company a penalty of $50,000.\n\n6. GOVERNING LAW\n\nThis Agreement is governed by [State, e.g., \"California\"] laws.\n\n7. ENTIRE AGREEMENT\n\nThis Agreement supersedes prior discussions and agreements between the parties.\n\nBy signing below, the parties agree to these terms.\n\n[Company Name] - Signatory [Contractor Company Name] - Signatory\nDate: _______________________ Date: _______________________", + "B2B CONTRACTOR LOYALTY AGREEMENT\n\nThis Agreement is made on _____ day of ______, 20, between [Company Name], located at [Company Address] (\"Company\"), and [Contractor Company Name], located at [Contractor Address] (\"Contractor\").\n\n1. DEFINITION OF CONFIDENTIAL INFORMATION\n\nFor the purposes of this Agreement, \"confidential information\" shall refer to research results, software created, devices produced by the Company, and any other information deemed proprietary or not generally known to the public.\n\n2. CONFIDENTIALITY\n\nContractor agrees not to disclose, use, or allow the use of the Company's confidential information, as defined herein, during or after the relationship, except as required for their services to the Company.\n\n3. NON-COMPETITION\n\nFor 12 months post-relationship, the Contractor won't provide similar services to any entity competing with the Company within a 50-mile radius of any Company location.\n\n4. NON-SOLICITATION\n\nFor 12 months post-relationship, the Contractor won't solicit or induce any entity or individual connected to the Company to cease or reduce their relationship with the Company.\n\n5. RETURN OF PROPERTY\n\nUpon relationship termination or on Company's request, the Contractor will immediately return all Company property, including all items containing or pertaining to confidential information.\n\n6. PENALTY FOR BREACH\n\nIn the event of a breach of this Agreement, the Contractor shall pay the Company a penalty of $50,000.\n\n7. GOVERNING LAW\n\nThis Agreement is governed by [State, e.g., \"California\"] laws.\n\n8. ENTIRE AGREEMENT\n\nThis Agreement supersedes prior discussions and agreements between the parties.\n\nBy signing below, the parties agree to these terms.\n\n[Company Name] - Signatory [Contractor Company Name] - Signatory", + "The Non-Disclosure Agreement (NDA) dated [Date] is between [Company], based in [Country/State], and [Supplier], also incorporated in [Country/State]. The Company intends to disclose confidential information to the Supplier for [purpose]. This confidential data can include business strategies, financial data, customer information, and product designs. The Supplier agrees to refrain from sharing this information, barring any legal requirements. Exceptions to this confidentiality are in cases where the information becomes public or was already known by the Supplier before the Company's disclosure. If the Supplier breaches this agreement, they face a financial penalty of [$]. The NDA is valid for [X years], unless the Company provides written termination. Upon the Company's request, the Supplier must return or destroy all copies of Confidential Information. This agreement supersedes previous agreements and can only be altered by a written document approved by both parties. The NDA is governed by the laws of [specific country/state].", + "NON-DISCLOSURE AND NON-COMPETE AGREEMENT\n\nEffective Date: [Date]\n\nPARTIES:\n\n Company: [Full Legal Name of Company], located at [Company Address].\n Supplier: [Full Legal Name of Supplier], located at [Supplier Address].\n\n1. CONFIDENTIALITY:\n\nSupplier shall not disclose Company's confidential information, which includes business strategies, financial data, and customer details, to any third party. This confidentiality obligation lasts for [X years, e.g., \"5 years\"] from the date of disclosure.\n\n2. NON-COMPETITION:\n\nFor [X years, e.g., \"3 years\"] following the termination of their business relationship, Supplier agrees not to engage in or start any business that directly competes with Company within a [X mile/km radius, e.g., \"50-mile radius\"] of Company's primary business location.\n\n3. PENALTY FOR BREACH:\n\nShould Supplier breach this Agreement, they shall pay Company a penalty of [specific amount, e.g., \"$50,000\"], in addition to any other legal remedies available to Company.\n\n4. RETURN OF INFORMATION:\n\nUpon request, Supplier shall return or destroy all of Company's confidential information and confirm its deletion in writing.\n\n5. GOVERNING LAW:\n\nThis Agreement is governed by the laws of [specific country/state, e.g., \"the State of New York\"].\n\nAGREEMENT ACKNOWLEDGEMENT:\n\n__________ [Company] __________ [Supplier]", + "DATA ANALYSIS EMPLOYEE AGREEMENT\n\nThis Agreement (\"Agreement\") is made and entered into as of [Date], by and between [Company Name], a [legal structure, e.g., \"corporation\"] incorporated under the laws of [State/Country], with its principal place of business at [Company Address], herein referred to as the \"Company,\" and [Employee Name], an individual residing at [Employee Address], herein referred to as the \"Employee.\"\n\n Position and Duties:\n a. The Company hereby employs Employee in the capacity of Data Analyst.\n b. The Employee's primary duties will be to [specific data analysis tasks, e.g., \"analyze sales data, forecast trends, and produce reports for managerial review\"].\n\n Term: The Employee's engagement will commence on [Start Date] and will terminate on [End Date].\n\n Compensation: For the services rendered by the Employee under this Agreement, the Company will pay Employee a total sum of [specific amount, e.g., \"$5,000\"] payable on [payment schedule, e.g., \"a monthly basis\"].\n\n Confidentiality: The Employee agrees not to disclose or use, either during or after the term of employment, any proprietary or confidential information or data of the Company without the Company's prior written consent, except as necessary in the course of performing their duties for the Company.\n\n Intellectual Property: Any works, developments, or inventions created by the Employee in the course of this employment related to the Company's business will remain the sole property of the Company.\n\n Termination: Either party may terminate this Agreement with [e.g., \"30\"] days written notice. Upon termination, Employee agrees to return all company property and data.\n\n Governing Law: This Agreement shall be governed by and construed under the laws of [State/Country].\n\n Amendments: This Agreement may only be amended in writing and signed by both parties.\n\n Entire Agreement: This Agreement contains the entire agreement between the parties and supersedes all prior negotiations, understandings, and agreements between the parties.\n\nThe parties hereto have executed this Agreement as of the date first above written.\n\n[Company Name or Authorized [Employee Name]\nRepresentative Name, Title]", + "DATA ANALYSIS SERVICE AGREEMENT\n\nThis Agreement (\"Agreement\") is made and entered into as of [Date], by and between [Company Name], a [legal structure, e.g., \"corporation\"] incorporated under the laws of [State/Country], with its principal place of business at [Company Address], herein referred to as the \"Company,\" and [Contractor Business Name], a [legal structure, e.g., \"limited liability company\"] organized under the laws of [State/Country], with its principal place of business at [Contractor Business Address], herein referred to as the \"Contractor.\"\n\n Scope of Work:\n a. The Contractor agrees to provide data analysis services to the Company.\n b. The specific services will include [specific data analysis tasks, e.g., \"analyzing sales data, forecasting trends, and producing reports for managerial review\"].\n\n Term: The Contractor's engagement will commence on [Start Date] and will terminate on [End Date].\n\n Compensation: For the services rendered by the Contractor under this Agreement, the Company will pay the Contractor a total sum of [specific amount, e.g., \"$5,000\"] payable on [payment schedule, e.g., \"a monthly basis\"].\n\n Confidentiality: The Contractor agrees not to disclose or use, either during or after the term of this Agreement, any proprietary or confidential information or data of the Company without the Company's prior written consent, except as necessary in the course of providing the services.\n\n Intellectual Property: Any works, developments, or inventions created by the Contractor in the course of providing the services related to the Company's business will remain the sole property of the Company.\n\n Termination: Either party may terminate this Agreement with [e.g., \"30\"] days written notice. Upon termination, Contractor agrees to return all company data and any other proprietary materials.\n\n Governing Law: This Agreement shall be governed by and construed under the laws of [State/Country].\n\n Amendments: This Agreement may only be amended in writing and signed by both parties.\n\n Entire Agreement: This Agreement contains the entire agreement between the parties and supersedes all prior negotiations, understandings, and agreements between the parties.\n\nThe parties hereto have executed this Agreement as of the date first above written.", + "NON-DISCLOSURE AGREEMENT (NDA)\n\nDate: [Insert Date]\n\nParties: [University Name], [University Address] (\"Disclosing Party\") and [Researcher's Full Name], [Researcher's Address] (\"Receiving Party\").\n\n1. Purpose: For the research of [Briefly Describe the Research or Project].\n\n2. Confidential Information: Includes data, studies, reports, patents, and other valuable business-related material.\n\n3. Obligations:\na. Confidential Information must remain secret.\nb. It's for the specified purpose only.\nc. No third-party disclosure without consent.\n\n4. Exceptions: Public knowledge, third-party shared info, or independently developed.\n\n5. Duration: Confidentiality lasts [X years, e.g., 2 years] from disclosure.\n\n6. Return: All Confidential Information must be returned or destroyed upon request.\n\n7. No Transfer: This doesn't grant property rights or licenses.\n\n8. Law: Governed by [State/Country] laws.\n\n9. Amendments: Only in writing and signed.\n\nAgreement: By signing, parties agree to the terms.", + "UNIVERSITY-BUSINESS COOPERATION AGREEMENT\n\nThis Cooperation Agreement (the \"Agreement\") is made and entered into on [Insert Date], by and between [Business Name], a [business type, e.g., \"corporation\"] located at [Business Address], hereinafter referred to as the \"Business', and [University Name], a higher education institution located at [University Address], hereinafter referred to as the \"University\".\n\n1. Objective:\n\nThe purpose of this Agreement is to define the terms under which the Business and the University will cooperate to [Objective e.g., \"jointly conduct research, promote innovation, and facilitate the exchange of knowledge in the field of _________\"].\n\n2. Scope of Cooperation:\n\na. Research Collaborations: Joint research initiatives, sharing of resources, and publications.\n\nb. Internships and Placements: Facilitation of student internships, projects, and job placements.\n\nc. Seminars and Workshops: Organizing joint seminars, conferences, and workshops.\n\nd. Facilities and Resource Sharing: Providing access to labs, equipment, libraries, etc.\n\n3. Intellectual Property:\n\nAll intellectual property developed jointly will be shared, and a separate agreement will detail the rights, ownership, and any revenue distribution.\n\n4. Funding and Resources:\n\nBoth parties agree to jointly contribute [Specify Amount or Percentage], and additional fund sourcing details will be determined on a project-by-project basis.\n\n5. Confidentiality:\n\nBoth parties agree to maintain the confidentiality of shared proprietary information.\n\n6. Duration and Termination:\n\nThis Agreement will remain in effect for [e.g., \"three years\"] from the date of signing, unless terminated earlier by either party with [e.g., \"30 days\"] written notice.\n\n7. Dispute Resolution:\n\nAny disputes arising from this Agreement will first attempt resolution through mediation. If unresolved, disputes will be subject to the jurisdiction of [State/Country].\n\n8. Amendments:\n\nChanges to this Agreement must be in writing and signed by both parties.\n\n9. Liability:\n\nEach party assumes responsibility for its actions and is not liable for the actions of the other party.\n\n10. Governing Law:\n\nThis Agreement is governed by the laws of [State/Country].\n\nIN WITNESS WHEREOF, both parties have executed this Agreement as of the date first mentioned above.", + "SUPPLY AGREEMENT FOR UNIVERSITY LABORATORY\n\nThis Supply Agreement (the \"Agreement\"), made as of [Insert Date], is entered into by and between [Supplier Name], a [business entity type, e.g., \"corporation\"] having its principal office at [Supplier Address], hereinafter referred to as the \"Supplier', and [University Name], a higher education institution located at [University Address], acting through its [specific department or laboratory, e.g., \"Department of Chemistry\"], hereinafter referred to as the \"University\".\n\n1. Purpose:\n\nThe Supplier agrees to provide specific products/materials/equipment, as detailed in Annex A, to the University for use in its laboratory.\n\n2. Terms of Supply:\n\na. Description of Goods: The goods to be supplied are detailed in Annex A attached herewith.\n\nb. Delivery: Goods will be delivered to [University Address or specific lab address] within [specific timeframe].\n\nc. Pricing: The price for the goods is set out in Annex A and includes all packaging, transportation, and delivery costs unless otherwise specified.\n\n3. Payment Terms:\n\nPayments will be made by the University within [e.g., \"30 days\"] of receiving the invoice from the Supplier.\n\n4. Warranty:\n\nThe Supplier warrants that all goods supplied under this Agreement will be free from defects for a period of [specific duration, e.g., \"12 months\"] from the date of delivery.\n\n5. No Disclosure Clause:\n\na. The University agrees not to disclose, reproduce, or distribute any proprietary information, trade secrets, or other confidential details related to the products/materials/equipment provided by the Supplier without the Supplier's prior written consent.\n\nb. This clause remains effective for a period of [e.g., \"5 years\"] from the date of the last delivery of the goods under this Agreement.\n\n6. Termination:\n\nEither party may terminate this Agreement with [e.g., \"30 days\"] written notice if the other party breaches any term of this Agreement and fails to remedy such breach within the notice period.\n\n7. Governing Law:\n\nThis Agreement shall be governed by and interpreted in accordance with the laws of [State/Country].\n\n8. Amendments:\n\nModifications to this Agreement must be in writing and signed by both parties.\n\nIN WITNESS WHEREOF, the parties hereto have executed this Supply Agreement as of the date first above written.", + "LABORATORY SUPPLY AGREEMENT\n\nDate: [Insert Date]\n\nParties:\n\n Supplier: [Supplier Name], [Supplier Address]\n University: [University Name], [University Address]\n\n1. Purpose: Supplier will provide goods as listed in Annex A to the University's laboratory.\n\n2. Delivery: Within [specific timeframe, e.g., \"30 days\"] to [specific lab address].\n\n3. Payment: University will pay within [e.g., \"30 days\"] of invoice receipt.\n\n4. Warranty: Goods are defect-free for [e.g., \"12 months\"] from delivery.\n\n5. Non-disclosure: University will not disclose Supplier\u2019s proprietary details for [e.g., \"5 years\"]. Breach will result in a penalty of [specific amount or formula, e.g., \"$5,000 per incident or actual damages, whichever is greater\"].\n\n6. Termination: [e.g., \"30 days\"] notice for breaches unresolved within said period.\n\n7. Law: Governed by [State/Country] laws.\n\n8. Amendments: Both parties must sign written changes.", + "FREELANCER AGREEMENT\n\nEffective Date: [Date]\n\nBETWEEN:\n\n Client: [Client Full Name or Company Name], located at [Client Address].\n Freelancer: [Freelancer Full Name], located at [Freelancer Address].\n\n1. SERVICES:\n\nFreelancer agrees to provide the following services: [Brief description of services, e.g., \"web design, content creation, graphic design\"].\n\n2. PAYMENT TERMS:\n\nFor the services rendered, Client agrees to pay Freelancer a total of [Total Amount, e.g., \"$1,000\"]. Payments shall be made as follows: [Payment structure, e.g., \"50% upfront, 50% upon completion\"].\n\n3. DEADLINE:\n\nThe services will be completed by [End Date, e.g., \"December 31, 2023\"].\n\n4. CONFIDENTIALITY:\n\nFreelancer agrees to maintain confidentiality regarding all proprietary information of the Client.\n\n5. TERMINATION:\n\nEither party may terminate this agreement with [X days, e.g., \"14 days\"] written notice. Upon termination, payments will be adjusted for work completed.\n\n6. INDEPENDENT CONTRACTOR:\n\nFreelancer is an independent contractor and not an employee of the Client. No benefits, rights, or obligations of employment are conferred by this agreement.\n\n7. GOVERNING LAW:\n\nThis Agreement is governed by the laws of [specific country/state, e.g., \"the State of New York\"].\n\n8. AMENDMENTS:\n\nAny changes to this agreement must be in writing and signed by both parties.", + "FREELANCER AGREEMENT\n\nEffective Date: [Date]\n\nBETWEEN:\n\n Client: [Client Full Name or Company Name], located at [Client Address].\n Freelancer: [Freelancer Full Name], located at [Freelancer Address].\n\n1. SERVICES:\n\nFreelancer agrees to provide the following services: [Brief description of services, e.g., \"web design, content creation, graphic design\"].\n\n2. PAYMENT TERMS:\n\nFor the services rendered, Client agrees to pay Freelancer a total of [Total Amount, e.g., \"$1,000\"]. Payments shall be made as follows: [Payment structure, e.g., \"50% upfront, 50% upon completion\"].\n\n3. DEADLINE:\n\nThe services will be completed by [End Date, e.g., \"December 31, 2023\"].\n\n4. PENALTIES:\n\na. Late Delivery: If Freelancer fails to deliver the completed service by the specified deadline, a penalty of [specific amount, e.g., \"$50\"] per day will be deducted from the final payment until the service is delivered.\n\nb. Confidentiality Breach: Breaching the confidentiality clause will result in a penalty of [specific amount, e.g., \"$2,000\"].\n\n5. CONFIDENTIALITY:\n\nFreelancer agrees to maintain confidentiality regarding all proprietary information of the Client.\n\n6. TERMINATION:\n\nEither party may terminate this agreement with [X days, e.g., \"14 days\"] written notice. Upon termination, payments will be adjusted for work completed.\n\n7. INDEPENDENT CONTRACTOR:\n\nFreelancer is an independent contractor and not an employee of the Client. No benefits, rights, or obligations of employment are conferred by this agreement.\n\n8. GOVERNING LAW:\n\nThis Agreement is governed by the laws of [specific country/state, e.g., \"the State of New York\"].\n\n9. AMENDMENTS:\n\nAny changes to this agreement must be in writing and signed by both parties.", + "This document outlines the terms of cooperation between Company A and Company B for a joint research project. The duties of each company are designated, with a detailed financial contribution outlined in Appendix A. Confidentiality is strictly enforced, and any intellectual property created will be jointly owned. All published findings will be reviewed by both parties for protection of proprietary information. Termination of this agreement requires 30 days' written notice, and each party assumes any risks or liabilities during this collaboration. Amendments must be in writing and signed by both parties. The duration of the agreement lasts from the start date to the end date, unless extended. By signing, both parties acknowledge and agree to these terms.", + "BUSINESS AGREEMENT\n\nEffective Date: [Date]\n\nBETWEEN:\n\n Business A: [Full Legal Name of Business A], located at [Business A Address].\n Business B: [Full Legal Name of Business B], located at [Business B Address].\n\n1. PURPOSE:\n\nThis Agreement outlines the terms of the collaboration/project/service between Business A and Business B regarding [Brief Description of the Collaboration/Project/Service].\n\n2. TERMS OF SERVICE:\n\n Business A agrees to: [Specific tasks/responsibilities, e.g., \"Supply 500 units of Product X monthly.\"].\n Business B agrees to: [Specific tasks/responsibilities, e.g., \"Pay $50 per unit of Product X within 30 days of delivery.\"].\n\n3. PAYMENT TERMS:\n\nPayments shall be made as follows: [Payment structure, e.g., \"Payment due within 30 days of invoice.\"].\n\n4. CONFIDENTIALITY:\n\nBoth parties commit to maintaining confidentiality regarding all proprietary information exchanged during this agreement.\n\n5. TERMINATION:\n\nEither party may terminate this Agreement with [X days, e.g., \"30 days\"] written notice. If breached, the aggrieved party may seek remedies as per governing laws.\n\n6. GOVERNING LAW:\n\nThis Agreement is governed by the laws of [specific country/state, e.g., \"the State of California\"].\n\n7. AMENDMENTS:\n\nModifications to this Agreement must be in writing and signed by both parties.\n\nACKNOWLEDGEMENT:\n\nBy signing below, both parties affirm their understanding and acceptance of this Business Agreement.", + "CONFIDENTIALITY:\n\n4.1. Confidential Information: For the purposes of this Agreement, \"Confidential Information\" refers to any data or information, regardless of its form, proprietary to or maintained as confidential by either party, which is not publicly known and which is disclosed during the term of this Agreement or in relation to the collaboration/project/service.\n\n4.2. Protection and Non-Disclosure: Both parties agree to use the Confidential Information solely for the purposes of the Agreement and will exert reasonable efforts to prevent the unauthorized disclosure or use of the Confidential Information. Neither party shall disclose, reproduce, or distribute any portion of the Confidential Information without the disclosing party's prior written consent.\n\n4.3. Exclusions: Confidential Information shall not include any data or information which:\n\n Is or becomes publicly known through no wrongful act of the receiving party;\n Is independently developed by the receiving party without the use of the Confidential Information;\n Is rightfully received from a third party without any obligation of confidentiality;\n Is disclosed under legal requirement or order.\n\n4.4. Return or Destruction: Upon the termination of this Agreement, or at the request of the disclosing party, the receiving party shall return all copies of the Confidential Information to the disclosing party or certify in writing that it has destroyed all such copies.\n\n4.5. Duration: The obligations set forth in this Confidentiality section shall survive the termination or expiration of this Agreement for a period of [specific time, e.g., \"five years\"].", + "LOYALTY AGREEMENT\n\nEffective Date: [Date]\n\nBETWEEN:\n\n Party A: [Full Legal Name of Party A], located at [Party A Address].\n Party B: [Full Legal Name of Party B], located at [Party B Address].\n\n1. LOYALTY COMMITMENT:\n\nBoth parties acknowledge the mutual value of their business relationship. They commit to work in good faith, ensuring a collaborative environment that prioritizes trust, loyalty, and shared objectives.\n\n2. NON-POACHING OF EMPLOYEES:\n\nFor the duration of this Agreement and [specific time after termination, e.g., \"for 12 months following its termination\"], neither Party A nor Party B shall, without the prior written consent of the other party:\n\na. Directly or indirectly solicit, induce, or encourage any employees of the other party to terminate their employment or to engage in employment or other services elsewhere.\nb. Hire, employ, or contract the services of any employee of the other party who has been employed by the said party within the last 12 months.\n\n3. BREACH:\n\nAny violation of the clauses in this Agreement will be deemed a material breach and may result in legal action or other remedies as available by law.\n\n4. GOVERNING LAW:\n\nThis Agreement is governed by the laws of [specific country/state, e.g., \"the State of California\"].\n\n5. AMENDMENTS:\n\nAny modifications to this Agreement must be in writing and signed by both parties.\n\nACKNOWLEDGEMENT:\n\nBy signing below, both parties affirm their understanding and acceptance of this Loyalty Agreement.", + "NON-COMPETE AND LOYALTY AGREEMENT\n\nEffective Date: [Date]\n\nBETWEEN:\n\n Business A: [Full Legal Name of Business A], located at [Business A Address].\n Business B: [Full Legal Name of Business B], located at [Business B Address].\n\n1. PURPOSE:\n\nThis Agreement is designed to protect the proprietary and business interests of both parties by ensuring loyalty and preventing competition during and after the period of collaboration or engagement.\n\n2. NON-COMPETE:\n\nFor the duration of this Agreement and [specific time after termination, e.g., \"for 24 months following its termination\"], neither party shall:\n\na. Engage in or support any venture that directly competes with the core business of the other party within [specific geographical region, e.g., \"the State of California\"].\nb. Invest in, partner with, or advise any business entity that competes directly with the other party.\n\n3. LOYALTY AND NON-POACHING:\n\nBoth parties pledge their commitment to a loyal business relationship. Specifically:\n\na. Neither party will, without the prior written consent of the other, solicit, induce, or encourage any employees or contractors of the other party to terminate their engagement or to join another business.\nb. Neither party shall disparage or encourage others to disparage the other party, its products, services, or its employees.\n\n4. CONFIDENTIALITY:\n\nBoth parties agree to maintain confidentiality regarding any proprietary or business-sensitive information exchanged during the course of this Agreement, ensuring that such information isn't disclosed without the explicit consent of the party owning that information.\n\n5. BREACH AND REMEDIES:\n\nA violation of any provision in this Agreement will be deemed a significant breach. The aggrieved party shall be entitled to seek injunctive relief, damages, or any other remedies available under the laws of [specific country/state, e.g., \"the State of California\"].\n\n6. GOVERNING LAW:\n\nThis Agreement shall be governed by and interpreted in accordance with the laws of [specific country/state, e.g., \"the State of California\"].\n\n7. AMENDMENTS:\n\nModifications or amendments to this Agreement must be in writing and duly signed by authorized representatives of both parties.\n\nACKNOWLEDGEMENT:\n\nBy signing below, representatives from both businesses affirm their understanding and acceptance of this Non-Compete and Loyalty Agreement.", + "AMENDMENT TO CONTRACT: LENGTH OF ENGAGEMENT\n\nThis Amendment is made on [Date], and amends the Non-Compete and Loyalty Agreement dated [Original Agreement Date] between:\n\n Business A: [Full Legal Name of Business A], located at [Business A Address].\n Business B: [Full Legal Name of Business B], located at [Business B Address].\n\nAMENDMENT:\n\nThe parties hereby agree to amend the Non-Compete and Loyalty Agreement as follows:\n\nSection [Specific Section Number, e.g., \"2\"] - Length of Engagement\n\nThe period of engagement between Business A and Business B as stipulated in the original Agreement is hereby extended/shortened/set to commence from [New Start Date] and conclude on [New End Date].\n\nGENERAL PROVISIONS:\n\n Except as specifically amended herein, all other terms and conditions of the original Agreement remain unchanged and in full effect.\n This Amendment, together with the original Agreement, represents the entire understanding between the parties.\n\nACKNOWLEDGEMENT:\n\nBoth parties, by their signatures below, indicate their acceptance and approval of this Amendment.", + "AMENDMENT TO CONTRACT: FEES FOR LATE DELIVERY\n\nThis Amendment is made on [Date], and amends the Agreement dated [Original Agreement Date] between:\n\n Business A: [Full Legal Name of Business A], located at [Business A Address].\n Business B: [Full Legal Name of Business B], located at [Business B Address].\n\nAMENDMENT:\n\nThe parties hereby agree to amend the Agreement as follows:\n\nSection [Specific Section Number, e.g., \"3\"] - Fees for Late Delivery\n\na. If Business A/B fails to deliver the products/services by the agreed-upon deadline, a late fee of [Specific Amount or Percentage, e.g., \"$100\" or \"5% of the total contract value\"] shall be applied for each [time period, e.g., \"day\"] of delay.\n\nb. The total late fees shall not exceed [Specific Maximum Amount or Percentage, e.g., \"$1,000\" or \"20% of the total contract value\"].\n\nc. The fees will be deducted from the final payment or invoiced separately, as deemed appropriate by the non-defaulting party.\n\nGENERAL PROVISIONS:\n\n Except as specifically amended herein, all other terms and conditions of the original Agreement remain unchanged and in full effect.\n This Amendment, together with the original Agreement, represents the entire understanding between the parties.\n\nACKNOWLEDGEMENT:\n\nBoth parties, by their signatures below, indicate their acceptance and approval of this Amendment.", + "AMENDMENT TO CONTRACT\n\nThis Amendment (the \"Amendment\") is entered into on [Date], between [Party One Name], hereinafter referred to as the \"First Party', and [Party Two Name], hereinafter referred to as the \"Second Party'', collectively referred to as the \"Parties\".\n\nWHEREAS, the Parties entered into a contract dated [Original Contract Date], hereinafter referred to as the \"Original Contract', for [Brief Description of the Original Contract, e.g., \"provision of IT services to First Party\"];\n\nWHEREAS, the Parties now wish to amend the Original Contract to add additional responsibilities pertaining to the maintenance of existing IT systems;\n\nNOW, THEREFORE, in consideration of the mutual covenants contained herein and for other good and valuable consideration, the receipt and sufficiency of which is hereby acknowledged, the Parties agree as follows:\n\n Additional Responsibility:\n\n The Second Party shall assume the responsibility of maintaining and ensuring the smooth functioning of the existing IT systems of the First Party. This responsibility includes, but is not limited to:\n\n a. Regular monitoring of the IT systems for any anomalies or issues.\n\n b. Prompt troubleshooting and rectification of any issues identified.\n\n c. Routine updates and patches to ensure the systems are secure and up-to-date.\n\n d. Any other related tasks as deemed necessary by the First Party.\n\n Compensation:\n\n As a result of this additional responsibility, the Parties agree to a revised compensation of [New Compensation Details, e.g., \"$XXX per month\"]. All other payment terms as outlined in the Original Contract shall remain unchanged.\n\n Duration and Termination:\n\n The duration and termination clauses from the Original Contract shall remain applicable to this Amendment unless otherwise agreed upon in writing by the Parties.\n\n Miscellaneous:\n\n All other terms and conditions of the Original Contract, which are not specifically amended by this Amendment, shall remain in full force and effect. In the event of a conflict between this Amendment and the Original Contract, the terms of this Amendment shall prevail.\n\n Entire Agreement:\n\n This Amendment, along with the Original Contract, constitutes the entire agreement between the Parties and supersedes any prior understandings, written or oral, relating to the subject matter of this Amendment.\n\n Governing Law:\n\n This Amendment shall be governed by the laws of [Jurisdiction, e.g., \"State of New York\"].", + "This appendix, part of the Contract between Party One and Party Two, sets milestones and deadlines for Party Two. \n\nMilestone 1 involves tasks such as gathering requirements, designing user interface etc. with the objective of developing initial prototype of a software application. Delivery Deadline: September 15, 2023. \n\nMilestone 2 involves tasks like incorporating feedback and conducting beta testing, aiming at finalizing and testing the software application. Delivery Deadline: October 15, 2023. \n\nEach milestone's completion will be reviewed by Party One; if requirements aren't met, Party Two may correct and resubmit. Payment terms and penalties are outlined in the main Contract. This appendix is executed adhering to main Contract's terms and conditions.", + "APPENDIX B \u2013 CHANGE IN TIME OF DELIVERY\n\nThis Appendix is an addendum to the contract (the \"Contract\") dated [Original Contract Date], entered into between [Party One Name], hereinafter referred to as the \"First Party', and [Party Two Name], hereinafter referred to as the \"Second Party\". The purpose of this Appendix is to amend and modify the delivery time as specified in the original Contract.\n\n Original Delivery Time: As per the terms of the original Contract, the delivery time was set for [Original Delivery Date, e.g., \"September 15, 2023\"].\n\n Revised Delivery Time: The Parties, through mutual agreement, have now decided to amend the delivery time. The new delivery date shall be [Revised Delivery Date, e.g., \"October 10, 2023\"].\n\n Reason for Change: [Provide a brief explanation for the change in delivery time, e.g., \"Due to unforeseen challenges in the production process, additional time is required to ensure that the deliverables meet the agreed-upon quality standards.\"]\n\n Consequences of Delay: Unless otherwise stated in the main body of the Contract:\n\n a. If the Second Party fails to meet the revised delivery time, penalties or consequences as outlined in the original Contract for late delivery will apply from the revised delivery date.\n\n b. All other terms related to late delivery, including but not limited to penalties, refunds, or rights to terminate, remain effective and unchanged by this Appendix.\n\n Prevailing Terms: All other terms and conditions of the original Contract not specifically amended by this Appendix shall remain in full force and effect. In the event of any inconsistency or conflict between the original Contract and this Appendix, the terms of this Appendix shall prevail with respect to the change in the delivery time.\n\n Acknowledgment: By signing this Appendix, the Parties acknowledge and agree to the revised delivery time and any associated consequences of delays.\n\nThis Appendix is executed as an acknowledgment and agreement to the revised delivery time and shall be considered an integral part of the original Contract.", + "APPENDIX C \u2013 ADDITIONAL CONFIDENTIAL INFORMATION\n\nThis Appendix is an extension of the contract (the \"Contract\") dated [Original Contract Date], between [Party One Name] (\"First Party\") and [Party Two Name] (\"Second Party\"). It outlines additional categories of confidential information beyond those detailed in the Contract.\n\n Additional Confidential Information Includes:\n\n a. Non-public financial data.\n\n b. Unpublished marketing strategies and materials.\n\n c. Upcoming product or service details.\n\n d. Proprietary software codes and processes.\n\n e. Personnel records.\n\n f. Any data labeled as \"Confidential\" or \"Proprietary\" after the Contract\u2019s execution.\n\n Protection & Exclusions:\n\nBoth Parties shall extend the same protection to this Additional Confidential Information as previously agreed upon in the Contract. Information that becomes public, is received rightfully from a third party, is independently developed, or gets written release authorization is excluded from confidentiality obligations.\n\n Duration:\n\nThe confidentiality obligations for this Appendix shall persist as defined in the Contract or, if unspecified, for [e.g., \"five years\"] from the disclosure date.\n\n Prevailing Terms:\n\nIf there\u2019s any conflict between this Appendix and the Contract concerning confidentiality, this Appendix takes precedence concerning Additional Confidential Information.\n\nExecuted as an integral part of the Contract.", + "AMENDMENT TO NON-DISCLOSURE AGREEMENT\n\nThis Amendment (the \u201cAmendment\u201d) is made and entered into as of [Amendment Date], by and between [Party A Name], having an address at [Party A Address] (\u201cParty A\u201d), and [Party B Name], having an address at [Party B Address] (\u201cParty B\u201d), collectively referred to as the \u201cParties.\u201d\n\nRECITALS\n\nWHEREAS, the Parties entered into a Non-Disclosure Agreement dated [Original NDA Date] (the \u201cOriginal Agreement\u201d);\n\nWHEREAS, the Parties desire to amend the Original Agreement to extend the duration of certain restrictions therein;\n\nNOW, THEREFORE, in consideration of the mutual covenants and promises made by the Parties hereto, the Parties agree as follows:\n\n Extension of Time Restrictions: The time restriction set forth in Section [X] of the Original Agreement, currently stating a period of [Original Time, e.g., \"two (2) years\"], is hereby amended and extended to [New Time, e.g., \"five (5) years\"] from the date of disclosure of the Confidential Information.\n\n Full Force and Effect: Except as expressly modified by this Amendment, all terms, conditions, and provisions of the Original Agreement shall remain in full force and effect. In the event of any conflict between the terms of this Amendment and the Original Agreement, the terms of this Amendment shall govern.\n\n Counterparts: This Amendment may be executed in counterparts, each of which shall be deemed an original and all of which together shall constitute one and the same instrument.\n\n Governing Law: This Amendment shall be governed by and construed in accordance with the laws of [Governing State or Country, e.g., \"the State of California\"], without regard to its conflict of laws principles.\n\nIN WITNESS WHEREOF, the Parties hereto have executed this Amendment as of the date first above written.", + "BUSINESS COOPERATION AGREEMENT\n\nThis Agreement is between [Business A Name], at [Business A Address] (\"Business A\"), and [Business B Name], at [Business B Address] (\"Business B\"), effective [Day, Month, Year].\n\n1. Purpose:\nBoth businesses will cooperate in [brief description, e.g., \"joint marketing\"].\n\n2. Responsibilities:\n\n Business A will: [Key obligation, e.g., \"Promote Business B in newsletters.\"]\n Business B will: [Key obligation, e.g., \"Display Business A products.\"]\n\n3. Term:\nEffective from the above date for [e.g., \"12 months\"]. Either party can terminate with [e.g., \"30 days\"] notice.\n\n4. Confidentiality:\nConfidential information remains private, during and post-agreement.\n\n5. Governing Law:\nGoverning laws of [State/Country, e.g., \"California\"].\n\n6. Amendments:\nChanges must be written and signed by both parties.", + "APPENDIX TO BUSINESS COOPERATION AGREEMENT\n\nEXTENSION OF CONFIDENTIALITY CONDITIONS\n\nThis Appendix is made as of [Day, Month, Year], and is appended to the Business Cooperation Agreement dated [Original Agreement Date] (\"Original Agreement\") between [Business A Name], located at [Business A Address] (\"Business A\") and [Business B Name], located at [Business B Address] (\"Business B\").\n\n1. Extension of Confidentiality Period:\nThe confidentiality period stipulated in Section 4 (or the appropriate section number) of the Original Agreement is hereby extended. Previously set to expire [Original Expiry Date], it will now extend to [New Expiry Date].\n\n2. Continued Obligations:\nAll other confidentiality obligations and conditions outlined in the Original Agreement remain unchanged and in full effect.\n\n3. Entire Agreement:\nThis Appendix, in conjunction with the Original Agreement, constitutes the entire agreement between the parties regarding the subject matter herein.\n\n4. Governing Law:\nThis Appendix shall be governed by the laws of [State/Country, e.g., \"California\"].\n\nIN WITNESS WHEREOF, both parties hereto have executed this Appendix as of the date first above written.", + "APPENDIX: LOYALTY CLAUSE\n\nEffective [Day, Month, Year], attached to the Agreement dated [Original Agreement Date] between [Party A Name] (\"Party A\") and [Party B Name] (\"Party B\").\n\n1. Loyalty Commitment:\nFor one year from the Effective Date, both parties pledge loyalty by refraining from activities harmful or competitive to the other within the context of the Agreement.\n\n2. Consequences:\nBreaches may result in Agreement termination and legal action as per the original terms.\n\n3. Governing Law:\nGoverned by the laws of [State/Country, e.g., \"California\"].", + "APPENDIX: CONFIDENTIALITY CLAUSE\n\nThis Appendix is appended to the B2B Contractor Agreement (\"Agreement\") dated [Original Agreement Date, e.g., \"August 15, 2023\"] between [Company Name], hereinafter referred to as \"Company', and [Contractor Name], hereinafter referred to as \"Contractor\".\n\n1. Confidentiality:\n\n1.1 Both Company and Contractor acknowledge that they may have access to or receive information during the term of the Agreement which is confidential to the disclosing party (\"Confidential Information\").\n\n1.2 Confidential Information shall not include information that:\n\n is or becomes public knowledge without breach of this clause;\n was known by the receiving party before receipt from the disclosing party;\n is received from a third party without breach of any obligation of confidentiality.\n\n1.3 The receiving party shall:\n\n use the Confidential Information only for performing under the Agreement;\n take all reasonable precautions to prevent any unauthorized disclosure of the Confidential Information;\n not disclose, reproduce, or distribute Confidential Information without the written consent of the disclosing party.\n\n2. Duration:\n\nThe obligations set forth in this Appendix shall continue for a period of [e.g., \"two years\"] from the date of termination or expiration of the Agreement.\n\n3. Return or Destruction:\n\nUpon the expiration or termination of the Agreement, or upon the disclosing party's request, the receiving party shall return or, if directed by the disclosing party, destroy all copies of the Confidential Information.\n\n4. Governing Law:\n\nThis Appendix shall be governed by the laws of [State/Country, e.g., \"California\"], consistent with the Agreement.", + "APPENDIX: CONFIDENTIALITY CLAUSE\n\nThis Appendix is part of the Agreement dated [Original Agreement Date, e.g., \"August 15, 2023\"] between [Company Name] (\"Company\") and [Contractor Name] (\"Contractor\").\n\n1. Confidential Information:\nBoth parties may access or receive the other's confidential information (\"Confidential Information\") during the Agreement term. Confidential Information excludes publicly known details, data known prior, or information obtained from third parties without confidentiality obligations.\n\n2. Obligations:\nThe recipient shall:\n\n Use the Confidential Information solely for the Agreement's purpose.\n Prevent unauthorized disclosures.\n Not disclose without prior written consent.\n\n3. Duration:\nObligations persist for [e.g., \"two years\"] post Agreement termination or expiration.\n\n4. Return/Destruction:\nUpon Agreement conclusion, or on request, all Confidential Information copies should be returned or destroyed.\n\n5. Governing Law:\nGoverned by [State/Country, e.g., \"California\"] laws.", + "NON-DISCLOSURE AGREEMENT (NDA)\n\nEffective [Effective Date, e.g., \"August 15, 2023\"], between [Tech Company Name], located at [Tech Company Address], (\"Company\") and [Contractor's Full Name], located at [Contractor Address], (\"Contractor\").\n\nPurpose:\nContractor will access Company's confidential information during their engagement.\n\n1. Definition:\n\"Confidential Information\" means proprietary data related to the Company\u2019s business, excluding publicly known details, prior known information, or data from third parties without confidentiality bounds.\n\n2. Obligation:\nContractor shall:\n\n Use Confidential Information solely for engagement purposes.\n Prevent unauthorized disclosure.\n\n3. Duration:\nObligations persist for [e.g., \"two years\"] from disclosure date.\n\n4. Return:\nContractor shall return all Confidential Information items upon engagement completion or Company's request, retaining no copies.\n\n5. Remedies:\nBreach may result in legal actions, damages, and costs.\n\n6. Governing Law:\nGoverned by [State/Country, e.g., \"California\"] laws.", + "APPENDIX: EXTENSION OF CONTRACT DURATION\n\nThis Appendix is a part of the Agreement initially dated [Original Agreement Date, e.g., \"August 15, 2021\"], between [Party A Name], located at [Party A Address] (\"Party A\") and [Party B Name], located at [Party B Address] (\"Party B\").\n\n1. Duration Extension:\nThe duration of the Agreement referenced above is hereby extended for an additional two (2) years from the original expiration date. With this extension, the new expiration date of the Agreement will be [New Expiration Date, e.g., \"August 15, 2025\"].\n\n2. All Other Terms Remain Unchanged:\nExcept for the extension of the contract duration as described herein, all other terms and conditions of the Agreement remain unchanged and in full effect.\n\n3. Entire Agreement:\nThis Appendix, in conjunction with the original Agreement, constitutes the entire agreement between Party A and Party B. Any previous understandings, written or oral, relating to the subject of this Appendix are superseded by the terms herein.\n\n4. Governing Law:\nThis Appendix shall be governed by the laws of [State/Country, e.g., \"California\"], consistent with the original Agreement.", + "CONFIDENTIALITY AGREEMENT\n\nEffective [Effective Date, e.g., \"August 15, 2023\"], between [Company Name], located at [Company Address] (\"Company\"), and [Supplier Name], located at [Supplier Address] (\"Supplier\").\n\n1. Definition:\n\"Confidential Information\" means proprietary data of the Company, excluding:\n\n Pre-disclosed or publicly known data.\n Info from third parties without confidentiality bounds.\n\n2. Obligations:\nSupplier will:\n\n Use Confidential Information solely for business purposes with the Company.\n Protect its secrecy and prevent unauthorized disclosure.\n Return or destroy all Confidential Information upon request or business completion.\n\n3. Duration:\nObligations last for [e.g., \"two years\"] from disclosure date.\n\n4. Remedies:\nBreaches may result in legal actions, damages, and costs by the Company.\n\n5. Governing Law:\nGoverned by [State/Country, e.g., \"California\"] laws.", + "APPENDIX: BREACH CONSEQUENCES\n\nRelated to the Agreement on [Original Agreement Date, e.g., \"August 15, 2023\"], between [Party A Name] (\"Party A\") and [Party B Name] (\"Party B\").\n\n1. Notification:\nSuspected breaches must be reported in writing by the non-breaching party.\n\n2. Rectification:\nThe breaching party has [e.g., \"14 days\"] from notification to rectify, unless irreparable.\n\n3. Fees:\nBreaches incur a penalty of [e.g., \"$10,000\"], aside from claimed damages.\n\n4. Legal Actions:\nUnresolved or damaging breaches may lead to legal actions, including injunctive relief, damages, and legal fees.\n\n5. Termination:\nRepeated or severe breaches can cause Agreement termination by the non-breaching party.\n\n6. Law:\nGoverned by [State/Country, e.g., \"California\"] laws.", + "APPENDIX: TERMS OF CONTRACT TERMINATION\n\nRelated to the Agreement on [Original Agreement Date, e.g., \"August 15, 2023\"], between [Party A Name] (\"Party A\") and [Party B Name] (\"Party B\").\n\n1. Termination for Breach:\nIf either party breaches any conditions of the Agreement, the non-breaching party may terminate the Agreement immediately upon written notice to the breaching party.\n\n2. Termination by Notice:\nEither party may terminate the Agreement for any reason by providing a written notice to the other party. The termination will become effective 30 days after the receipt of such notice.\n\n3. Obligations Upon Termination:\nUpon termination, all rights and obligations under the Agreement will cease, except for those which by their nature should survive termination (e.g., confidentiality, liability for prior breaches, etc.).\n\n4. Governing Law:\nThis Appendix, and any disputes arising from it, will be governed by the laws of [State/Country, e.g., \"California\"], consistent with the original Agreement.", + "APPENDIX: OBLIGATIONS UPON TERMINATION\n\nPertaining to the Agreement dated [Original Agreement Date, e.g., \"August 15, 2023\"], between [Party A Name] (\"Party A\") and [Party B Name] (\"Party B\").\n\n1. Return of Property:\nUpon termination, each party shall promptly return to the other all property, materials, and assets belonging to the other party, unless otherwise specified in the Agreement.\n\n2. Confidential Information:\nBoth parties shall continue to abide by any confidentiality obligations set forth in the Agreement. Any confidential information must be returned or destroyed, as instructed by the owning party.\n\n3. Outstanding Payments:\nAll due payments must be settled within [e.g., \"14 days\"] of termination, as per the terms of the original Agreement.\n\n4. Non-Disparagement:\nBoth parties agree not to make any derogatory or disparaging statements about the other party post-termination.\n\n5. Survival of Provisions:\nAny provisions in the Agreement that, by their nature, should persist beyond termination (e.g., indemnity, liability, confidentiality) will continue to be in effect.\n\n6. Notifications:\nEach party must inform their respective stakeholders, if necessary, about the termination in a manner that maintains the goodwill and reputation of both parties.\n\n7. Transition Assistance:\nTo ensure a smooth transition, both parties agree to cooperate, as reasonably requested by the other, for a period of [e.g., \"30 days\"] after termination.\n\n8. Governing Law:\nThis Appendix is governed by the laws of [State/Country, e.g., \"California\"], consistent with the original Agreement.", + "NON-DISCLOSURE AGREEMENT (NDA)\n\nEffective [Date, e.g., \"August 15, 2023\"], between [Client Name], (\"Client\") and [Business Name], (\"Business\").\n\nPurpose:\nProtection of confidential information exchanged due to potential collaboration.\n\n1. Confidentiality:\nBusiness agrees to keep secret all Confidential Information shared by Client.\n\n2. Definition:\n\"Confidential Information\" is non-public data shared by either party, excluding info that's publicly available, already known, or received without confidentiality constraints.\n\n3. Duration:\nObligations last [e.g., \"two years\"] from the date of disclosure.\n\n4. Return/Destruction:\nUpon Client's request, Business will return or destroy all Confidential Information.\n\n5. Remedies:\nUnauthorized disclosures may lead to legal action by Client, including damages.\n\n6. Law:\nGoverned by [State/Country, e.g., \"California\"] laws.", + "IT SERVICES AGREEMENT\n\nEffective Date: [Date, e.g., \"August 15, 2023\"]\n\nParties:\n\n [Client Name], located at [Client Address] (\"Client\")\n [Service Provider Name], located at [Service Provider Address] (\"Provider\")\n\nScope of Work:\nProvider agrees to offer IT services, including [e.g., \"network setup, software installation, and routine maintenance\"], as detailed in Attachment A.\n\nPayment:\nClient shall pay Provider [e.g., \"$1,000\"] per month. Invoices will be sent monthly and are due within [e.g., \"30 days\"].\n\nDuration:\nThis Agreement starts on [Start Date] and ends on [End Date], unless terminated earlier.\n\nTermination:\nEither party may terminate with [e.g., \"30 days\"] written notice. Upon termination, any unpaid fees for services rendered become immediately due.\n\nConfidentiality:\nBoth parties agree to keep all business and technical information confidential.\n\nLimitation of Liability:\nProvider's liability is limited to the amount paid by the Client for the specific service causing damage.\n\nGoverning Law:\nThis Agreement is governed by the laws of [State/Country, e.g., \"California\"].\n\nEntire Agreement:\nThis constitutes the full agreement between both parties.", + "CONFIDENTIALITY AMENDMENT TO NDA\n\nThis Amendment, effective [Date, e.g., \"August 15, 2023\"], modifies the NDA dated [Original Agreement Date] between [Party A Name] (\"Party A\") and [Party B Name] (\"Party B\").\n\n1. Responsibilities:\n\na) Protection: Parties must safeguard Confidential Information at least as they do their own.\n\nb) Access: Access is limited to those needing it who are also bound by confidentiality.\n\nc) Breach Notification: Parties must immediately inform the other of any breaches.\n\nd) Return/Destruction: Upon request or agreement end, parties must return or certify the destruction of Confidential Information.\n\ne) No Reverse Engineering: Receiving party shall not reverse engineer any provided items.\n\n2. Remedies:\nUnauthorized disclosures permit injunctive relief and other legal remedies.\n\n3. Original Agreement:\nExcept for this Amendment, the NDA remains unchanged.\n\nGoverning Law:\nAs per the NDA.", + "LOYALTY AGREEMENT\n\nThis Agreement (\"Agreement\") is made as of [Date, e.g., \"August 15, 2023\"], between:\n\n [Party A Name], with its principal office at [Party A Address] (\"Party A\"),\n [Party B Name], with its principal office at [Party B Address] (\"Party B\").\n\nPurpose:\nThe parties wish to collaborate and establish a loyal relationship in their joint business endeavors.\n\n1. Loyalty Commitment:\n\na) Both parties commit to act in good faith and refrain from engaging in any activity or partnership that might conflict with the interests of the other party during the term of this Agreement.\n\nb) Neither party shall assist, collaborate, or engage with third parties that may cause harm or disrepute to the other party.\n\nc) Each party shall prioritize the other's interests in situations where opportunities arise from their collaboration.\n\n2. Non-Solicitation:\nDuring the term of this Agreement, and for [e.g., \"one year\"] thereafter, neither party shall solicit or attempt to entice away any clients, customers, or employees of the other party.\n\n3. Duration:\nThis Agreement will begin on the Effective Date and remain in effect for [e.g., \"two years\"] unless terminated earlier by mutual consent.\n\n4. Termination:\nEither party may terminate this Agreement with [e.g., \"30 days\"] written notice if the other party breaches any term herein.\n\n5. Confidentiality:\nBoth parties agree to maintain the confidentiality of all proprietary or non-public information obtained during the collaboration.\n\n6. Governing Law:\nThis Agreement is governed by the laws of [State/Country, e.g., \"California\"].\n\n7. Entire Agreement:\nThis document constitutes the full understanding between both parties, superseding all prior discussions, agreements, or understandings.", + "BUSINESS CONSULTING CONTRACT\n\nThis Consulting Contract (\"Contract\") is made as of [Date, e.g., \"August 15, 2023\"], between:\n\n [Client Name], with its principal office at [Client Address] (\"Client\"),\n [Consultant Name], with its principal office at [Consultant Address] (\"Consultant\").\n\nPurpose:\nThe Consultant will provide professional consulting services to the Client as described below.\n\n1. Scope of Services:\nConsultant agrees to offer services including, but not limited to:\na) Business strategy development\nb) Market analysis\nc) [Other services as needed]\nAny additional services will require an amendment to this Contract.\n\n2. Compensation:\nFor services rendered, the Client shall pay the Consultant [e.g., \"$100\"] per hour. Invoices will be issued [e.g., \"monthly\"] and are due within [e.g., \"30 days\"] of receipt.\n\n3. Duration:\nThis Contract begins on [Start Date] and ends on [End Date], unless extended by mutual agreement or terminated earlier.\n\n4. Termination:\nEither party can terminate this Contract with [e.g., \"30 days\"] written notice. In case of termination, the Client will pay for services rendered up to the notice date.\n\n5. Confidentiality:\nThe Consultant shall maintain the confidentiality of all proprietary information received during the engagement, unless obligated by law to disclose.\n\n6. Non-compete:\nFor [e.g., \"six months\"] after Contract termination, the Consultant agrees not to provide similar services to any direct competitor of the Client within [e.g., \"50 miles\"] of the Client's primary location.\n\n7. Independent Contractor:\nThe Consultant is an independent contractor and not an employee of the Client.\n\n8. Governing Law:\nThis Contract shall be governed by and interpreted under the laws of [State/Country, e.g., \"California\"].\n\n9. Entire Agreement:\nThis Contract represents the entire understanding between both parties, superseding all prior negotiations, discussions, or agreements.", + "APPENDIX A: CONFIDENTIALITY BREACH FEES\n\nThis Appendix is attached to and made part of the Contract (\"Original Contract\") dated [Original Contract Date], between [Party A Name] (\"Party A\") and [Party B Name] (\"Party B\").\n\n1. Purpose:\nThis Appendix defines the fees and penalties associated with any breach of confidentiality as stipulated in the Original Contract.\n\n2. Confidentiality Breach Fee:\nIn the event of a breach of the confidentiality provisions in the Original Contract by either party:\n\na) The breaching party will be liable for an immediate penalty of [specific amount, e.g., \"$10,000\"].\n\nb) If the breach results in any direct financial loss to the non-breaching party, the breaching party shall additionally reimburse the non-breaching party for the full amount of such loss.\n\nc) The breaching party will also bear all costs, including legal fees, that the non-breaching party incurs while addressing or remedying the breach.\n\n3. Payment Terms:\nPayment of any penalty or reimbursement as defined above shall be made within [e.g., \"30 days\"] of written notification of the breach.\n\n4. Disputes:\nAny disputes related to this Appendix shall be resolved as stipulated in the dispute resolution clause of the Original Contract.\n\n5. Continuation of Original Contract:\nExcept as modified by this Appendix, the Original Contract remains in full force and effect.\n\n6. Governing Law:\nThis Appendix, consistent with the Original Contract, is governed by the laws of [State/Country, e.g., \"California\"].", + "APPENDIX A: STRICT CONFIDENTIALITY BREACH PENALTIES\n\nThis Appendix is annexed to the Contract dated [Original Contract Date] between [Party A Name] and [Party B Name].\n\n1. Breach Fees:\nIf a party breaches confidentiality:\n\na) Immediate penalty: [e.g., \"$50,000\"].\n\nb) For reputational harm or business loss: Additional [e.g., \"$100,000\"].\n\nc) Full reimbursement for direct financial losses caused by the breach.\n\nd) All associated legal and remedy costs borne by the breaching party.\n\n2. Remedial Actions:\nThe breaching party must swiftly rectify the breach, potentially including public apologies or recalling disclosed information.\n\n3. Payment:\nDue within [e.g., \"15 days\"] of breach notification.\n\n4. Termination:\nNon-breaching party can immediately terminate the main contract upon a breach.\n\n5. Governing Law:\nThis Appendix adheres to [State/Country, e.g., \"California\"] laws." +] \ No newline at end of file diff --git a/docs/src/examples/keyword_counting/README.md b/docs/src/examples/keyword_counting/README.md new file mode 100644 index 0000000..f923fdc --- /dev/null +++ b/docs/src/examples/keyword_counting/README.md @@ -0,0 +1,45 @@ +# Keyword Counting + +The use case in this directory computes the frequencies of occurring countries +in a long passage of text. We provide implementations of seven different approaches: +- IO +- Chain-of-Thought (CoT) +- Tree of Thought (ToT): + - ToT: wider tree, meaning more branches per level + - ToT2: tree with more levels, but fewer branches per level +- Graph of Thoughts (GoT): + - GoT4: split passage into 4 sub-passages + - GoT8: split passage into 8 sub-passages + - GoTx: split by sentences + +## Data + +We provide an input file with 100 samples: `countries.csv`. It is also possible to use +the data generator `dataset_gen_countries.py` to generate additional or +different samples (using GPT-4). The parameters can be updated on line 54 (number of samples to be generated). +Note that not every generated sample will be included in the dataset, as each sample is +additionally tested for validity (observe script output for details). + +## Execution + +The file to execute the use case is called +`keyword_counting.py`. In the main body, one can +select the specific samples to be run (variable samples) and the +approaches (variable approaches). It is also possible to set a budget in +dollars (variable budget). + +The Python scripts will create the directory `result`, if it is not +already present. In the `result` directory, another directory is created +for each run: `{name of LLM}_{list of approaches}_{day}_{start time}`. +Inside each execution specific directory two files (`config.json`, +`log.log`) and a separate directory for each selected approach are +created. `config.json` contains the configuration of the run: input data, +selected approaches, name of the LLM, and the budget. `log.log` contains +the prompts and responses of the LLM as well as additional debug data. +The approach directories contain a separate json file for every sample +and the file contains the Graph Reasoning State (GRS) for that sample. + +## Plot Data + +Change the results directory in line 150 of `plot.py` and run `python3 +plot.py` to plot your data. diff --git a/docs/src/examples/keyword_counting/dataset_gen_countries.py b/docs/src/examples/keyword_counting/dataset_gen_countries.py new file mode 100644 index 0000000..8e1c315 --- /dev/null +++ b/docs/src/examples/keyword_counting/dataset_gen_countries.py @@ -0,0 +1,535 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Ales Kubicek + +import csv +from typing import List, Tuple +from graph_of_thoughts import controller + + +def find_country_indices(text: str, country: str) -> List[Tuple[int, str]]: + """ + Finds the indices of the occurences of a given country in the input text. + + :param text: Input text. + :type text: str + :param country: Country to search for. + :type country: str + :return: List of tuples, where each tuple consists of index and country. + :rtype: List[Tuple[int, str]] + """ + + indices = [] + index = text.find(country) + while index != -1: + indices.append(index) + index = text.find(country, index + 1) + return [(index, country) for index in indices] + + +primary_countries = [ + "Afghanistan", + "Argentina", + "Australia", + "Brazil", + "Canada", + "China", + "Colombia", + "Cuba", + "Egypt", + "France", + "Germany", + "Greece", + "India", + "Indonesia", + "Iran", + "Iraq", + "Ireland", + "Israel", + "Italy", + "Japan", + "Kenya", + "Mexico", + "Netherlands", + "New Zealand", + "Nigeria", + "North Korea", + "Pakistan", + "Peru", + "Philippines", + "Poland", + "Portugal", + "Russia", + "Saudi Arabia", + "South Africa", + "South Korea", + "Spain", + "Sweden", + "Switzerland", + "Thailand", + "Turkey", + "Ukraine", + "United Arab Emirates", + "United Kingdom", + "United States", + "Venezuela", + "Vietnam", + "Yemen", + "Zimbabwe", + "Belgium", + "Norway", +] +primary_adjectives = [ + "Afghan", + "Argentine ", + "Argentinean", + "Australian", + "Brazilian", + "Canadian", + "Chinese", + "Colombian", + "Cuban", + "Egyptian", + "French", + "German", + "Greek", + "Indian", + "Indonesian", + "Iranian", + "Iraqi", + "Irish", + "Israeli", + "Italian", + "Japanese", + "Kenyan", + "Mexican", + "Dutch", + "New Zealander ", + "Kiwi", + "Nigerian", + "North Korean", + "Pakistani", + "Peruvian", + "Filipino", + "Philippine", + "Polish", + "Portuguese", + "Russian", + "Saudi ", + "Saudi Arabian", + "South African", + "South Korean", + "Spanish", + "Swedish", + "Swiss", + "Thai", + "Turkish", + "Ukrainian", + "United Arab Emirates", + "Emirati", + "British", + "American", + "Venezuelan", + "Vietnamese", + "Yemeni", + "Zimbabwean", + "Belgian", + "Norwegian", +] +rest_countries = [ + "Albania", + "Algeria", + "Andorra", + "Angola", + "Antigua and Barbuda", + "Armenia", + "Austria", + "Azerbaijan", + "The Bahamas", + "Bahrain", + "Bangladesh", + "Barbados", + "Belarus", + "Belize", + "Benin", + "Bhutan", + "Bolivia", + "Bosnia and Herzegovina", + "Botswana", + "Brunei", + "Bulgaria", + "Burkina Faso", + "Burundi", + "Cabo Verde", + "Cambodia", + "Cameroon", + "Central African Republic", + "Chad", + "Chile", + "Comoros", + "Congo", + "Costa Rica", + "Côte d’Ivoire", + "Croatia", + "Cyprus", + "Czech Republic", + "Czechia", + "Denmark", + "Djibouti", + "Dominica", + "Dominican Republic", + "East Timor", + "Timor-Leste", + "Ecuador", + "El Salvador", + "Equatorial Guinea", + "Eritrea", + "Estonia", + "Eswatini", + "Ethiopia", + "Fiji", + "Finland", + "Gabon", + "The Gambia", + "Georgia", + "Ghana", + "Grenada", + "Guatemala", + "Guinea", + "Guinea-Bissau", + "Guyana", + "Haiti", + "Honduras", + "Hungary", + "Iceland", + "Jamaica", + "Jordan", + "Kazakhstan", + "Kiribati", + "Kosovo", + "Kuwait", + "Kyrgyzstan", + "Laos", + "Latvia", + "Lebanon", + "Lesotho", + "Liberia", + "Libya", + "Liechtenstein", + "Lithuania", + "Luxembourg", + "Madagascar", + "Malawi", + "Malaysia", + "Maldives", + "Mali", + "Malta", + "Marshall Islands", + "Mauritania", + "Mauritius", + "Micronesia", + "Moldova", + "Monaco", + "Mongolia", + "Montenegro", + "Morocco", + "Mozambique", + "Myanmar", + "Burma", + "Namibia", + "Nauru", + "Nepal", + "Nicaragua", + "Niger", + "North Macedonia", + "Oman", + "Palau", + "Panama", + "Papua New Guinea", + "Paraguay", + "Qatar", + "Romania", + "Rwanda", + "Saint Kitts and Nevis", + "Saint Lucia", + "Saint Vincent and the Grenadines", + "Samoa", + "San Marino", + "Sao Tome and Principe", + "Senegal", + "Serbia", + "Seychelles", + "Sierra Leone", + "Singapore", + "Slovakia", + "Slovenia", + "Solomon Islands", + "Somalia", + "Sri Lanka", + "Sudan", + "Suriname", + "Syria", + "Taiwan", + "Tajikistan", + "Tanzania", + "Togo", + "Tonga", + "Trinidad and Tobago", + "Tunisia", + "Turkmenistan", + "Tuvalu", + "Uganda", + "Uruguay", + "Uzbekistan", + "Vanuatu", + "Vatican City", + "Zambia", +] +rest_adjectives = [ + "Albanian", + "Algerian", + "Andorran", + "Angolan", + "Antiguan and Barbudan", + "Armenian", + "Austrian", + "Azerbaijani", + "Bahamian", + "Bahraini", + "Bangladeshi", + "Barbadian", + "Belarusian", + "Belizean", + "Beninese", + "Bhutanese", + "Bolivian", + "Bosnian and Herzegovinian", + "Botswanan", + "Bruneian", + "Bulgarian", + "Burkinabè", + "Burundian", + "Cape Verdean", + "Cambodian", + "Cameroonian", + "Central African", + "Chadian", + "Chilean", + "Comorian", + "Congolese", + "Costa Rican", + "Ivorian", + "Croatian", + "Cypriot", + "Czech", + "Czech", + "Danish", + "Djiboutian", + "Dominican", + "Dominican", + "East Timorese", + "Timorese", + "Ecuadorian", + "Salvadoran", + "Equatorial Guinean", + "Eritrean", + "Estonian", + "Swazi", + "Ethiopian", + "Fijian", + "Finnish", + "Gabonese", + "Gambian", + "Georgian", + "Ghanaian", + "Grenadian", + "Guatemalan", + "Guinean", + "Bissau-Guinean", + "Guyanese", + "Haitian", + "Honduran", + "Hungarian", + "Icelandic", + "Jamaican", + "Jordanian", + "Kazakh", + "I-Kiribati", + "Kosovar", + "Kuwaiti", + "Kyrgyz", + "Laotian", + "Latvian", + "Lebanese", + "Basotho", + "Liberian", + "Libyan", + "Liechtensteiner", + "Lithuanian", + "Luxembourger", + "Malagasy", + "Malawian", + "Malaysian", + "Maldivian", + "Malian", + "Maltese", + "Marshallese", + "Mauritanian", + "Mauritian", + "Micronesian", + "Moldovan", + "Monégasque", + "Mongolian", + "Montenegrin", + "Moroccan", + "Mozambican", + "Myanmarese", + "Burmese", + "Namibian", + "Nauruan", + "Nepali", + "Nicaraguan", + "Nigerien", + "Macedonian", + "Omani", + "Palauan", + "Panamanian", + "Papua New Guinean", + "Paraguayan", + "Qatari", + "Romanian", + "Rwandan", + "Kittitian", + "Nevisian", + "Saint Lucian", + "Vincentian", + "Samoan", + "Sammarinese", + "Santomean", + "Senegalese", + "Serbian", + "Seychellois", + "Sierra Leonean", + "Singaporean", + "Slovak", + "Slovenian", + "Solomon Islander", + "Somali", + "Sri Lankan", + "Sudanese", + "Surinamese", + "Syrian", + "Taiwanese", + "Tajik", + "Tanzanian", + "Togolese", + "Tongan", + "Trinidadian ", + "Tobagonian", + "Tunisian", + "Turkmen", + "Tuvaluan", + "Ugandan", + "Uruguayan", + "Uzbek", + "Ni-Vanuatu", + "Vatican", + "Zambian", +] + +lm = controller.ChatGPT( + "../../graph_of_thoughts/controller/config.json", model_name="chatgpt4" +) + +prompt = """ Generate a continuous passage (single paragraph) of 16 sentences following the provided restrictions precisely. + + +The following restrictions must apply to the generated text: +1. Single continuous passage of exactly 16 sentences without any paragraphs (line breaks). +2. Countries appearing in the passage must be only from the provided list. No other countries can be mentioned. +3. When a country is mentioned in the passage, it must be mentioned multiple times consecutively in the same or following sentences. +4. Passage should be creative and coherent. +5. Using adjectives of a country is NOT allowed (e.g., "Colombian coffee" should be "coffee from Colombia" instead) + + + +List of countries: [Afghanistan, Argentina, Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway] +Passage: +While exploring the ancient ruins in Greece, Sam discovered manuscripts that hinted at the hidden treasures of Egypt. It seemed these treasures were once stolen from Egypt by rogue merchants and secretly moved to Greece, only to be buried under layers of time. Intrigued, he shared the findings with his friend Maya from India, who was an expert in decoding ancient languages. She pointed out that there was a similar legend in India about treasures from China that had somehow ended up in the southern parts of India, possibly through trade or conquest. She also recounted tales from China that spoke of incredible artifacts from Indonesia, suggesting a rich tapestry of cultural exchanges throughout history. Their conversation took an interesting turn when Sam mentioned a book he'd read about the mysterious connections between Argentina and Brazil. The book detailed how both Argentina and Brazil, despite their differences, shared tales of lost civilizations and forgotten cities deep within their jungles. Maya excitedly mentioned that she'd been to the Philippines and had heard local legends about ancient ties with Indonesia and how traders from the Philippines would journey to Indonesia in search of spices and other goods. Thinking of spices, Sam fondly recalled his trip to Spain, where he had learned about the country's historical links with Portugal. Spain and Portugal, both maritime giants of their time, had extensively explored unknown lands and established trade routes. Maya, remembering her travels, said that she had been to Belgium once and was fascinated by its connections with the Netherlands. Both Belgium and the Netherlands, she explained, had rich histories of art, trade, and diplomacy that intertwined them for centuries. They both sat back, marveling at the interconnectedness of the world and how countries from Greece to the Netherlands shared tales of adventure, discovery, and mystery. + + +List of countries: [Afghanistan, Argentina, Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway] +Passage: +""" + +num_samples = 100 +sample_id = 0 +result = [["ID", "Text", "Countries", "Sentences", "Characters"]] + +""" +Generate passages of text that contain country names to be used as input for the +keyword counting. + +Input(x) : Number of samples +Output(y) : Passages written to a file in the CSV format. + File contains the sample ID, the passage, the countries the passage + contains, the sentences of the passages, number of characters of the + passage. +""" + +# For x batches of y responses +for _ in range(num_samples): + response = lm.query(prompt, 1) + texts = lm.get_response_texts(response) + for text in texts: + # Clean paragraphs - single long passage + text = text.strip().replace("\n", "") + + # Get all occurrences of all primary permissible countries + occurrences = [] + for country in [country for country in primary_countries if country in text]: + occurrences.extend(find_country_indices(text, country)) + # Order exactly how they appear in the text + ordered_occurrences = [country[1] for country in sorted(occurrences)] + + # Check invalid countries and adjectives + invalid_primary_adjective = [ + adjective for adjective in primary_adjectives if adjective in text + ] + invalid_rest_country = [ + country for country in rest_countries if country in text + ] + invalid_rest_adjective = [ + adjective for adjective in rest_adjectives if adjective in text + ] + invalid_count = ( + len(invalid_primary_adjective) + + len(invalid_rest_country) + + len(invalid_rest_adjective) + ) + + if invalid_count > 0: + print( + f"Invalid countries or adjectives present: {invalid_primary_adjective}, {invalid_rest_country}, {invalid_rest_adjective}" + ) + continue + + result.append( + [ + sample_id, + text, + "[{0}]".format(", ".join(map(str, ordered_occurrences))), + len(text.split(".")) - 1, + len(text), + ] + ) + sample_id += 1 + +# Writing to csv file +with open("countries_script.csv", "w") as csvfile: + csvwriter = csv.writer(csvfile) + csvwriter.writerows(result) diff --git a/docs/src/examples/keyword_counting/plot.py b/docs/src/examples/keyword_counting/plot.py new file mode 100644 index 0000000..d63d9d2 --- /dev/null +++ b/docs/src/examples/keyword_counting/plot.py @@ -0,0 +1,167 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach +# contributions: Ales Kubicek + +import json +import os +import matplotlib.pyplot as plt + + +def get_complete_results(base_directory): + results_complete = {} + for folder_name in os.listdir(base_directory): + folder_path = os.path.join(base_directory, folder_name) + if os.path.isdir(folder_path): + results_complete[folder_name] = [] + for file_name in os.listdir(folder_path): + if file_name.endswith(".json"): + file_path = os.path.join(folder_path, file_name) + with open(file_path, "r") as f: + data = json.load(f) + results_complete[folder_name].append( + {"key": int(file_name.split(".")[0]), "data": data} + ) + for key in results_complete.keys(): + results_complete[key] = sorted( + results_complete[key], key=lambda x: x["key"] + ) + return results_complete + + +def get_final_scores(results_complete): + scores = {} + for method in results_complete.keys(): + scores[method] = [] + for result in results_complete[method]: + score = 100 + solved = False + cost = 1 + prompt_tokens = 0 + completion_tokens = 0 + for op in result["data"]: + if "operation" in op and op["operation"] == "ground_truth_evaluator": + try: + score = min(op["scores"]) + solved = any(op["problem_solved"]) + except: + continue + if "cost" in op: + cost = op["cost"] + prompt_tokens = op["prompt_tokens"] + completion_tokens = op["completion_tokens"] + scores[method].append( + [result["key"], score, solved, prompt_tokens, completion_tokens, cost] + ) + scores[method] = sorted(scores[method], key=lambda x: x[0]) + return scores + + +def get_plotting_data(base_directory): + results_complete = get_complete_results(base_directory) + scores = get_final_scores(results_complete) + results_plotting = { + method: { + "scores": [x[1] for x in scores[method]], + "solved": sum([1 for x in scores[method] if x[2]]), + "costs": [x[5] for x in scores[method]], + } + for method in scores.keys() + } + return results_plotting + + +def plot_results( + results, + methods_order=["io", "cot", "tot", "tot2", "got4", "got8", "gotx"], + model="GPT-3.5", + y_lower=0, + y_upper=40, + cost_upper=1.8, + display_solved=True, + annotation_offset=1, + display_left_ylabel=False, + display_right_ylabel=False, +): + methods_order = [method for method in methods_order if method in results] + # Extract scores based on the order + scores_ordered = [ + [score for score in results[method]["scores"] if score != 100 and score != 300] + for method in methods_order + ] + total_costs = [sum(results[method]["costs"]) for method in methods_order] + + # Create figure and axis + fig, ax = plt.subplots(dpi=150, figsize=(3.75, 4)) + + # Create boxplots + positions = range(1, len(methods_order) + 1) + ax.boxplot(scores_ordered, positions=positions) + + fig_fontsize = 12 + + # Set the ticks and labels + methods_labels = ["IO", "CoT", "ToT", "ToT2", "GoT4", "GoT8", "GoTx"] + ax.set_xticks(range(1, len(methods_order) + 1)) + ax.set_xticks(range(1, len(methods_order) + 1)) + ax.set_xticklabels(methods_labels, fontsize=10) + + ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1) + plt.yticks(fontsize=fig_fontsize) + if display_left_ylabel: + ax.set_ylabel(f"Number of errors; the lower the better", fontsize=fig_fontsize) + + ax.set_title(f"Keyword Counting") + + ax2 = ax.twinx() + ax2.bar(positions, total_costs, alpha=0.5, color="blue", label="Total Cost ($)") + ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize) + ax2.set_ylim(0, cost_upper) + number_of_ticks = len(ax.get_yticks()) + tick_interval = cost_upper / (number_of_ticks) + ax2_ticks = [tick_interval * i for i in range(number_of_ticks)] + + ax2.set_yticks(ax2_ticks) + + if display_right_ylabel: + ax2.set_ylabel( + "Total Cost ($); the lower the better", + color="#1919ff", + fontsize=fig_fontsize, + ) + + if display_solved: + annotation_height = y_upper + annotation_offset + count = 1 + for method in methods_order: + if method not in results: + continue + solved = results[method]["solved"] + ax.text( + count, + annotation_height, + f"{solved}", + ha="center", + va="bottom", + fontsize=fig_fontsize, + ) + count += 1 + + model = model.replace(".", "").replace("-", "").lower() + fig.savefig(f"keyword_counting_{model}.pdf", bbox_inches="tight") + + +plot_results( + get_plotting_data("results/"), + display_solved=True, + annotation_offset=-0.3, + model="GPT-3.5", + y_upper=35, + display_left_ylabel=True, + display_right_ylabel=True, + cost_upper=9, +) diff --git a/docs/src/examples/set_intersection/README.md b/docs/src/examples/set_intersection/README.md new file mode 100644 index 0000000..a780dae --- /dev/null +++ b/docs/src/examples/set_intersection/README.md @@ -0,0 +1,52 @@ +# Set Intersection + +The use case in this directory computes the intersection of two input +sets. We provide implementations of five different approaches for 32, 64 +and 128 elements: +- IO +- Chain-of-Thought (CoT) +- Tree of Thought (ToT): + - ToT: wider tree, meaning more branches per level + - ToT2: tree with more levels, but fewer branches per level +- Graph of Thoughts (GoT) + +## Data + +We provide input files with 100 precomputed samples for each set length: +`set_intersection_.csv`. It is also possible to use +the data generator `dataset_gen_intersection.py` to generate additional or +different samples. The parameters can be updated in lines 24 to 28 of +the main body: +- set_size = 32 # size of the generated sets +- int_value_ubound = 64 # (exclusive) upper limit of generated numbers +- seed = 42 # seed of the random number generator +- num_sample = 100 # number of samples +- filename = 'set_intersection_032.csv' # output filename + +## Execution + +The files to execute the use case are called +`set_intersection_.py`. In the main body, one can +select the specific samples to be run (variable sample) and the +approaches (variable approaches). It is also possible to set a budget in +dollars (variable budget). +The input filename for the samples is currently hardcoded to +`set_intersection_.csv`, but can be updated in the +function `run`. + +The Python scripts will create the directory `result`, if it is not +already present. In the `result` directory, another directory is created +for each run: `{name of LLM}_{list of approaches}_{day}_{start time}`. +Inside each execution specific directory two files (`config.json`, +`log.log`) and a separate directory for each selected approach are +created. `config.json` contains the configuration of the run: input data, +selected approaches, name of the LLM, and the budget. `log.log` contains +the prompts and responses of the LLM as well as additional debug data. +The approach directories contain a separate json file for every sample +and the file contains the Graph Reasoning State (GRS) for that sample. + +## Plot Data + +Change the results directory in line 170 of `plot.py` and update the +length parameter in the subsequent line and run `python3 plot.py` to +plot your data. diff --git a/docs/src/examples/set_intersection/dataset_gen_intersection.py b/docs/src/examples/set_intersection/dataset_gen_intersection.py new file mode 100644 index 0000000..8e26e21 --- /dev/null +++ b/docs/src/examples/set_intersection/dataset_gen_intersection.py @@ -0,0 +1,92 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Robert Gerstenberger + +import csv +import numpy as np + + +def scramble(array: np.ndarray, rng: np.random.Generator) -> None: + """ + Helper function to change the order of the elements in an array randomly. + + :param array: Array to be scrambled. + :type: numpy.ndarray + :param rng: Random number generator. + :type rng: numpy.random.Generator + """ + + size = array.shape[0] + + index_array = rng.integers(0, size, size) + + for i in range(size): + temp = array[i] + array[i] = array[index_array[i]] + array[index_array[i]] = temp + + +if __name__ == "__main__": + """ + Input(u) : Set size. + Input(v) : Range of the integer numbers in the sets: 0..v (exclusive) + Input(w) : Seed for the random number generator. + Input(x) : Number of samples to be generated. + Input(y) : Filename for the output CSV file. + Output(z) : Input sets and intersected set written a file in the CSV format. + File contains the sample ID, input set 1, input set 2, + intersection set. + """ + + set_size = 32 # size of the generated sets + int_value_ubound = 64 # (exclusive) upper limit of generated numbers + seed = 42 # seed of the random number generator + num_sample = 100 # number of samples + filename = "set_intersection_032.csv" # output filename + + assert 2 * set_size <= int_value_ubound + + rng = np.random.default_rng(seed) + + intersection_sizes = rng.integers(set_size // 4, 3 * set_size // 4, num_sample) + + np.set_printoptions( + linewidth=np.inf + ) # no wrapping in the array fields in the output file + + with open(filename, "w") as f: + fieldnames = ["ID", "SET1", "SET2", "INTERSECTION"] + writer = csv.DictWriter(f, delimiter=",", fieldnames=fieldnames) + writer.writeheader() + + for i in range(num_sample): + intersection_size = intersection_sizes[i] + + full_set = np.arange(0, int_value_ubound, dtype=np.int16) + + scramble(full_set, rng) + + intersection = full_set[:intersection_size].copy() + + sorted_intersection = np.sort(intersection) + + set1 = full_set[:set_size].copy() + set2 = np.concatenate( + [intersection, full_set[set_size : 2 * set_size - intersection_size]] + ) + + scramble(set1, rng) + scramble(set2, rng) + + writer.writerow( + { + "ID": i, + "SET1": set1.tolist(), + "SET2": set2.tolist(), + "INTERSECTION": sorted_intersection.tolist(), + } + ) diff --git a/docs/src/examples/set_intersection/plot.py b/docs/src/examples/set_intersection/plot.py new file mode 100644 index 0000000..55a3e7b --- /dev/null +++ b/docs/src/examples/set_intersection/plot.py @@ -0,0 +1,184 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach +# contributions: Robert Gerstenberger + +import json +import os +import matplotlib.pyplot as plt + + +def get_complete_results(base_directory): + results_complete = {} + for folder_name in os.listdir(base_directory): + folder_path = os.path.join(base_directory, folder_name) + if os.path.isdir(folder_path): + results_complete[folder_name] = [] + for file_name in os.listdir(folder_path): + if file_name.endswith(".json"): + file_path = os.path.join(folder_path, file_name) + with open(file_path, "r") as f: + data = json.load(f) + results_complete[folder_name].append( + {"key": int(file_name.split(".")[0]), "data": data} + ) + for key in results_complete.keys(): + results_complete[key] = sorted( + results_complete[key], key=lambda x: x["key"] + ) + return results_complete + + +def get_final_scores(results_complete): + scores = {} + for method in results_complete.keys(): + scores[method] = [] + for result in results_complete[method]: + score = 100 + solved = False + cost = 1 + prompt_tokens = 0 + completion_tokens = 0 + for op in result["data"]: + if "operation" in op and op["operation"] == "ground_truth_evaluator": + try: + score = min(op["scores"]) + solved = any(op["problem_solved"]) + except: + continue + if "cost" in op: + cost = op["cost"] + prompt_tokens = op["prompt_tokens"] + completion_tokens = op["completion_tokens"] + scores[method].append( + [result["key"], score, solved, prompt_tokens, completion_tokens, cost] + ) + scores[method] = sorted(scores[method], key=lambda x: x[0]) + return scores + + +def get_plotting_data(base_directory): + results_complete = get_complete_results(base_directory) + scores = get_final_scores(results_complete) + results_plotting = { + method: { + "scores": [x[1] for x in scores[method]], + "solved": sum([1 for x in scores[method] if x[2]]), + "costs": [x[5] for x in scores[method]], + } + for method in scores.keys() + } + return results_plotting + + +def plot_results( + results, + methods_order=["io", "cot", "tot", "tot2", "got"], + model="GPT-3.5", + length=32, + y_lower=0, + cost_upper=0.0, + display_solved=True, + annotation_offset=0, + display_left_ylabel=False, + display_right_ylabel=False, +): + methods_order = [method for method in methods_order if method in results] + # Extract scores based on the order + scores_ordered = [ + [score for score in results[method]["scores"] if score != 1000] + for method in methods_order + ] + total_costs = [sum(results[method]["costs"]) for method in methods_order] + + # Create figure and axis + fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5)) + + # Create boxplots + positions = range(1, len(methods_order) + 1) + ax.boxplot(scores_ordered, positions=positions) + + fig_fontsize = 12 + + # Set the ticks and labels + methods_labels = ["IO", "CoT", "ToT", "ToT2", "GoT"] + plt.yticks(fontsize=fig_fontsize) + ax.set_xticks(range(1, len(methods_order) + 1)) + ax.set_xticks(range(1, len(methods_order) + 1)) + ax.set_xticklabels(methods_labels, fontsize=fig_fontsize) + + y_upper = length + + range_increase = 1 + if display_solved: + if length < 48: + range_increase = 2 + elif length < 96: + range_increase = 4 + else: + range_increase = 8 + + ax.set_ylim(y_lower, y_upper + range_increase) + ax1_yticks = range( + y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8) + ) + ax.set_yticks(ax1_yticks) + if display_left_ylabel: + ax.set_ylabel( + f"#incorrect elements; the lower the better", fontsize=fig_fontsize + ) + + ax.set_title(f"{length} elements") + + ax2 = ax.twinx() + ax2.bar(positions, total_costs, alpha=0.5, color="blue", label="Total Cost ($)") + ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize) + if cost_upper > 0: + ax2.set_ylim(0, cost_upper) + number_of_ticks = len(ax.get_yticks()) + tick_interval = cost_upper / (number_of_ticks) + ax2_ticks = [tick_interval * i for i in range(number_of_ticks)] + + # Set custom tick positions for ax2 + ax2.set_yticks(ax2_ticks) + + if display_right_ylabel: + ax2.set_ylabel( + "Total Cost ($); the lower the better", + color="#1919ff", + fontsize=fig_fontsize, + ) + + if display_solved: + annotation_height = y_upper + annotation_offset + count = 1 + for method in methods_order: + if method not in results: + continue + solved = results[method]["solved"] + ax.text( + count, + annotation_height, + f"{solved}", + ha="center", + va="bottom", + fontsize=fig_fontsize, + ) + count += 1 + + model = model.replace(".", "").replace("-", "").lower() + fig.savefig(f"set_intersection_{model}_{length}.pdf", bbox_inches="tight") + + +plot_results( + get_plotting_data("results/"), + length=32, + display_solved=True, + model="GPT-3.5", + display_left_ylabel=True, + display_right_ylabel=True, +) diff --git a/docs/src/examples/set_intersection/utils.py b/docs/src/examples/set_intersection/utils.py new file mode 100644 index 0000000..a6ada18 --- /dev/null +++ b/docs/src/examples/set_intersection/utils.py @@ -0,0 +1,99 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# The source code is adapted from the sorting source code written by +# Nils Blach. +# +# main author: Robert Gerstenberger + +from typing import Dict, List, Set + + +def string_to_list(string: str) -> List[int]: + """ + Helper function to convert a list encoded inside a string into a Python + list object of integer elements. + + :param string: Input string containing a list. + :type string: str + :return: List of integer elements. + :rtype: List[int] + :raise AssertionError: If input string does not contain a list. + """ + + assert string[0] == "[" and string[-1] == "]", "String is not a list." + return [int(num) for num in string[1:-1].split(",")] + + +def string_to_set(string: str) -> Set[int]: + """ + Helper function to convert a list encoded inside a string into a Python + set object of integer elements. + + :param string: Input string containing a list. + :type string: str + :return: Set of integer elements. + :rtype: Set[int] + :raise AssertionError: If input string does not contain a list. + """ + + assert string[0] == "[" and string[-1] == "]", "String is not a list." + return {int(num) for num in string[1:-1].split(",")} + + +def test_set_intersection(state: Dict) -> bool: + """ + Function to test whether the final solution matches ground truth. + + :param state: Thought state that represents the final solution. + :type state: Dict + :return: Returns whether the solution matches the ground truth. + :rtype: bool + """ + + # convert string to list + try: + correct_list = string_to_list(state["result"]) + sorted_list = sorted(string_to_list(state["current"])) + return sorted_list == correct_list + except: + return False + + +def num_errors(state: Dict) -> float: + """ + Function to locally count the number of errors that serves as a score. + + :param state: Thought state to be scored. + :type state: Dict + :return: Number of errors. + :rtype: float + """ + + try: + set1 = string_to_set(state["set1"]) + set2 = string_to_set(state["set2"]) + if "subset" in state and state["subset"] != "" and state["subset"] is not None: + set2 = string_to_set(state["subset"]) + common = sorted(list(set1 & set2)) + llm_solution = sorted(string_to_list(state["current"])) + num_errors = 0 + common_idx = 0 + llm_idx = 0 + while common_idx < len(common) and llm_idx < len(llm_solution): + if common[common_idx] == llm_solution[llm_idx]: + common_idx += 1 + llm_idx += 1 + elif common[common_idx] < llm_solution[llm_idx]: + common_idx += 1 + num_errors += 1 + elif common[common_idx] > llm_solution[llm_idx]: + llm_idx += 1 + num_errors += 1 + num_errors += len(common) - common_idx + len(llm_solution) - llm_idx + return num_errors + except: + return 1000 diff --git a/docs/src/examples/sorting/README.md b/docs/src/examples/sorting/README.md new file mode 100644 index 0000000..09ab55e --- /dev/null +++ b/docs/src/examples/sorting/README.md @@ -0,0 +1,46 @@ +# Sorting + +The use case in this directory sorts the provided list of +numbers containing numbers from 0 to 9 (duplicates allowed). +We provide implementations of five different approaches for +32, 64 and 128 elements: +- IO +- Chain-of-Thought (CoT) +- Tree of Thought (ToT): + - ToT: wider tree, meaning more branches per level + - ToT2: tree with more levels, but fewer branches per level +- Graph of Thoughts (GoT): + - GoT: split into subarrays / sort / merge + +## Data + +We provide input files with 100 precomputed samples for each list +length: `sorting_.csv`. + +## Execution + +The files to execute the use case are called +`sorting_.py`. In the main body, one can select the +specific samples to be run (variable sample) and the approaches +(variable approaches). It is also possible to set a budget in dollars +(variable budget). +The input filename for the samples is currently hardcoded to +`sorting_.csv`, but can be updated in the function +`run`. + +The Python scripts will create the directory `result`, if it is not +already present. In the 'result' directory, another directory is created +for each run: `{name of LLM}_{list of approaches}_{day}_{start time}`. +Inside each execution specific directory two files (`config.json`, +`log.log`) and a separate directory for each selected approach are +created. `config.json` contains the configuration of the run: input data, +selected approaches, name of the LLM, and the budget. `log.log` contains +the prompts and responses of the LLM as well as additional debug data. +The approach directories contain a separate json file for every sample +and the file contains the Graph Reasoning State (GRS) for that sample. + +## Plot Data + +Change the results directory in line 171 of `plot.py` and update the +length parameter in the subsequent line and run `python3 plot.py` to +plot your data. diff --git a/docs/src/examples/sorting/plot.py b/docs/src/examples/sorting/plot.py new file mode 100644 index 0000000..f1dc5e6 --- /dev/null +++ b/docs/src/examples/sorting/plot.py @@ -0,0 +1,186 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach +# contributions: Robert Gerstenberger + +import json +import os +import matplotlib.pyplot as plt + + +def get_complete_results(base_directory): + results_complete = {} + for folder_name in os.listdir(base_directory): + folder_path = os.path.join(base_directory, folder_name) + if os.path.isdir(folder_path): + results_complete[folder_name] = [] + for file_name in os.listdir(folder_path): + if file_name.endswith(".json"): + file_path = os.path.join(folder_path, file_name) + with open(file_path, "r") as f: + data = json.load(f) + results_complete[folder_name].append( + {"key": int(file_name.split(".")[0]), "data": data} + ) + for key in results_complete.keys(): + results_complete[key] = sorted( + results_complete[key], key=lambda x: x["key"] + ) + return results_complete + + +def get_final_scores(results_complete): + scores = {} + for method in results_complete.keys(): + scores[method] = [] + for result in results_complete[method]: + score = 100 + solved = False + cost = 1 + prompt_tokens = 0 + completion_tokens = 0 + for op in result["data"]: + if "operation" in op and op["operation"] == "ground_truth_evaluator": + try: + score = min(op["scores"]) + solved = any(op["problem_solved"]) + except: + continue + if "cost" in op: + cost = op["cost"] + prompt_tokens = op["prompt_tokens"] + completion_tokens = op["completion_tokens"] + scores[method].append( + [result["key"], score, solved, prompt_tokens, completion_tokens, cost] + ) + scores[method] = sorted(scores[method], key=lambda x: x[0]) + return scores + + +def get_plotting_data(base_directory): + results_complete = get_complete_results(base_directory) + scores = get_final_scores(results_complete) + results_plotting = { + method: { + "scores": [x[1] for x in scores[method]], + "solved": sum([1 for x in scores[method] if x[2]]), + "costs": [x[5] for x in scores[method]], + } + for method in scores.keys() + } + return results_plotting + + +def plot_results( + results, + methods_order=["io", "cot", "tot", "tot2", "got"], + model="GPT-3.5", + length=32, + y_lower=0, + cost_upper=0.0, + display_solved=True, + annotation_offset=0, + display_left_ylabel=False, + display_right_ylabel=False, +): + methods_order = [method for method in methods_order if method in results] + # Extract scores based on the order + scores_ordered = [ + [ + min(score, length) + for score in results[method]["scores"] + if score != 100 and score != 300 + ] + for method in methods_order + ] + total_costs = [sum(results[method]["costs"]) for method in methods_order] + + # Create figure and axis + fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5)) + + # Create boxplots + positions = range(1, len(methods_order) + 1) + ax.boxplot(scores_ordered, positions=positions) + + fig_fontsize = 12 + + # Set the ticks and labels + method_labels = ["IO", "CoT", "ToT", "ToT2", "GoT"] + plt.yticks(fontsize=fig_fontsize) + ax.set_xticks(range(1, len(methods_order) + 1)) + ax.set_xticks(range(1, len(methods_order) + 1)) + ax.set_xticklabels(method_labels, fontsize=fig_fontsize) + + y_upper = length + + range_increase = 1 + if display_solved: + if length < 48: + range_increase = 2 + elif length < 96: + range_increase = 4 + else: + range_increase = 8 + + ax.set_ylim(y_lower, y_upper + range_increase) + ax1_yticks = range( + y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8) + ) + ax.set_yticks(ax1_yticks) + if display_left_ylabel: + ax.set_ylabel(f"#incorrectly sorted elements; the lower the better") + + ax.set_title(f"{length} elements") + + ax2 = ax.twinx() + ax2.bar(positions, total_costs, alpha=0.5, color="blue", label="Total Cost ($)") + ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize) + if cost_upper > 0: + ax2.set_ylim(0, cost_upper) + number_of_ticks = len(ax.get_yticks()) + tick_interval = cost_upper / (number_of_ticks) + ax2_ticks = [tick_interval * i for i in range(number_of_ticks)] + + # Set custom tick positions for ax2 + ax2.set_yticks(ax2_ticks) + + if display_right_ylabel: + ax2.set_ylabel( + "Total Cost ($); the lower the better", + color="#1919ff", + fontsize=fig_fontsize, + ) + + if display_solved: + annotation_height = y_upper + annotation_offset + count = 1 + for method in methods_order: + if method not in results: + continue + solved = results[method]["solved"] + ax.text( + count, + annotation_height, + f"{solved}", + ha="center", + va="bottom", + fontsize=fig_fontsize, + ) + count += 1 + + model = model.replace(".", "").replace("-", "").lower() + fig.savefig(f"sorting_{model}_{length}.pdf", bbox_inches="tight") + + +plot_results( + get_plotting_data("results/"), + length=32, + display_solved=True, + model="GPT-3.5", + display_left_ylabel=True, + display_right_ylabel=True, +) diff --git a/docs/src/examples/sorting/utils.py b/docs/src/examples/sorting/utils.py new file mode 100644 index 0000000..0bad7b6 --- /dev/null +++ b/docs/src/examples/sorting/utils.py @@ -0,0 +1,78 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach + +from typing import Dict, List + + +def string_to_list(string: str) -> List[int]: + """ + Helper function to convert a list encoded inside a string into a Python + list object of string elements. + + :param string: Input string containing a list. + :type string: str + :return: List of string elements. + :rtype: List[str] + :raise AssertionError: If input string does not contain a list. + """ + + assert string[0] == "[" and string[-1] == "]", "String is not a list." + return [int(num) for num in string[1:-1].split(",")] + + +def test_sorting(state: Dict) -> bool: + """ + Function to test whether the final solution matches ground truth. + + :param state: Thought state that represents the final solution. + :type state: Dict + :return: Returns whether the solution matches the ground truth. + :rtype: bool + """ + + try: + correct_list = sorted(string_to_list(state["original"])) + sorted_list = string_to_list(state["current"]) + return sorted_list == correct_list + except: + return False + + +def num_errors(state: Dict) -> float: + """ + Function to locally count the number of errors that serves as a score. + + :param state: Thought state to be scored. + :type state: Dict + :return: Number of errors. + :rtype: float + """ + + try: + unsorted_list = state["original"] + if ( + "unsorted_sublist" in state + and state["unsorted_sublist"] != "" + and state["unsorted_sublist"] is not None + and len(state["unsorted_sublist"]) < len(unsorted_list) - 5 + ): + unsorted_list = state["unsorted_sublist"] + correct_list = sorted(string_to_list(unsorted_list)) + current_list = string_to_list(state["current"]) + num_errors = 0 + for i in range(10): + num_errors += abs( + sum([1 for num in current_list if num == i]) + - sum([1 for num in correct_list if num == i]) + ) + num_errors += sum( + [1 for num1, num2 in zip(current_list, current_list[1:]) if num1 > num2] + ) + return num_errors + except: + return 300 diff --git a/docs/src/graph_of_thoughts/controller/README.md b/docs/src/graph_of_thoughts/controller/README.md new file mode 100644 index 0000000..ca05662 --- /dev/null +++ b/docs/src/graph_of_thoughts/controller/README.md @@ -0,0 +1,28 @@ +# Controller + +The Controller class is responsible for traversing the Graph of Operations (GoO), which is a static structure that is constructed once, before the execution starts. +GoO prescribes the execution plan of thought operations and the Controller invokes their execution, generating the Graph Reasoning State (GRS). + +In order for a GoO to be executed, an instance of Large Language Model (LLM) must be supplied to the controller (along with other required objects). +Please refer to the [Language Models](../language_models/README.md) section for more information about LLMs. + +The following section describes how to instantiate the Controller to run a defined GoO. + +## Controller Instantiation +- Requires custom `Prompter`, `Parser`, as well as instantiated `GraphOfOperations` and `AbstractLanguageModel` - creation of these is described separately. +- Prepare initial state (thought) as dictionary - this can be used in the initial prompts by the operations. +``` +lm = ...create +graph_of_operations = ...create + +executor = controller.Controller( + lm, + graph_of_operations, + , + , + , +) +executor.run() +executor.output_graph("path/to/output.json") +``` +- After the run the graph is written to an output file, which contains individual operations, their thoughts, information about scores and validity and total amount of used tokens / cost. diff --git a/docs/src/graph_of_thoughts/controller/__init__.py b/docs/src/graph_of_thoughts/controller/__init__.py new file mode 100644 index 0000000..f41157f --- /dev/null +++ b/docs/src/graph_of_thoughts/controller/__init__.py @@ -0,0 +1 @@ +from .controller import Controller diff --git a/docs/src/graph_of_thoughts/controller/controller.py b/docs/src/graph_of_thoughts/controller/controller.py new file mode 100644 index 0000000..821beac --- /dev/null +++ b/docs/src/graph_of_thoughts/controller/controller.py @@ -0,0 +1,152 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach + +import json +import logging +from typing import List +from graph_of_thoughts.language_models import AbstractLanguageModel +from graph_of_thoughts.operations import GraphOfOperations, Thought +from graph_of_thoughts.prompter import Prompter +from graph_of_thoughts.parser import Parser + + +class Controller: + """ + Controller class to manage the execution flow of the Graph of Operations, + generating the Graph Reasoning State. + This involves language models, graph operations, prompting, and parsing. + """ + + def __init__( + self, + lm: AbstractLanguageModel, + graph: GraphOfOperations, + prompter: Prompter, + parser: Parser, + problem_parameters: dict, + ) -> None: + """ + Initialize the Controller instance with the language model, + operations graph, prompter, parser, and problem parameters. + + :param lm: An instance of the AbstractLanguageModel. + :type lm: AbstractLanguageModel + :param graph: The Graph of Operations to be executed. + :type graph: OperationsGraph + :param prompter: An instance of the Prompter class, used to generate prompts. + :type prompter: Prompter + :param parser: An instance of the Parser class, used to parse responses. + :type parser: Parser + :param problem_parameters: Initial parameters/state of the problem. + :type problem_parameters: dict + """ + self.logger = logging.getLogger(self.__class__.__module__) + self.lm = lm + self.graph = graph + self.prompter = prompter + self.parser = parser + self.problem_parameters = problem_parameters + self.run_executed = False + + def run(self) -> None: + """ + Run the controller and execute the operations from the Graph of + Operations based on their readiness. + Ensures the program is in a valid state before execution. + :raises AssertionError: If the Graph of Operation has no roots. + :raises AssertionError: If the successor of an operation is not in the Graph of Operations. + """ + self.logger.debug("Checking that the program is in a valid state") + assert self.graph.roots is not None, "The operations graph has no root" + self.logger.debug("The program is in a valid state") + + execution_queue = [ + operation + for operation in self.graph.operations + if operation.can_be_executed() + ] + + while len(execution_queue) > 0: + current_operation = execution_queue.pop(0) + self.logger.info("Executing operation %s", current_operation.operation_type) + current_operation.execute( + self.lm, self.prompter, self.parser, **self.problem_parameters + ) + self.logger.info("Operation %s executed", current_operation.operation_type) + for operation in current_operation.successors: + assert ( + operation in self.graph.operations + ), "The successor of an operation is not in the operations graph" + if operation.can_be_executed(): + execution_queue.append(operation) + self.logger.info("All operations executed") + self.run_executed = True + + def get_final_thoughts(self) -> List[List[Thought]]: + """ + Retrieve the final thoughts after all operations have been executed. + + :return: List of thoughts for each operation in the graph's leaves. + :rtype: List[List[Thought]] + :raises AssertionError: If the `run` method hasn't been executed yet. + """ + assert self.run_executed, "The run method has not been executed" + return [operation.get_thoughts() for operation in self.graph.leaves] + + def output_graph(self, path: str) -> None: + """ + Serialize the state and results of the operations graph to a JSON file. + + :param path: The path to the output file. + :type path: str + """ + output = [] + for operation in self.graph.operations: + operation_serialized = { + "operation": operation.operation_type.name, + "thoughts": [thought.state for thought in operation.get_thoughts()], + } + if any([thought.scored for thought in operation.get_thoughts()]): + operation_serialized["scored"] = [ + thought.scored for thought in operation.get_thoughts() + ] + operation_serialized["scores"] = [ + thought.score for thought in operation.get_thoughts() + ] + if any([thought.validated for thought in operation.get_thoughts()]): + operation_serialized["validated"] = [ + thought.validated for thought in operation.get_thoughts() + ] + operation_serialized["validity"] = [ + thought.valid for thought in operation.get_thoughts() + ] + if any( + [ + thought.compared_to_ground_truth + for thought in operation.get_thoughts() + ] + ): + operation_serialized["compared_to_ground_truth"] = [ + thought.compared_to_ground_truth + for thought in operation.get_thoughts() + ] + operation_serialized["problem_solved"] = [ + thought.solved for thought in operation.get_thoughts() + ] + output.append(operation_serialized) + + output.append( + { + "prompt_tokens": self.lm.prompt_tokens, + "completion_tokens": self.lm.completion_tokens, + "cost": self.lm.cost, + } + ) + + with open(path, "w") as file: + file.write(json.dumps(output, indent=2)) diff --git a/docs/src/graph_of_thoughts/language_models/README.md b/docs/src/graph_of_thoughts/language_models/README.md new file mode 100644 index 0000000..334d1bb --- /dev/null +++ b/docs/src/graph_of_thoughts/language_models/README.md @@ -0,0 +1,95 @@ +# Language Models + +The Language Models module is responsible for managing the large language models (LLMs) used by the Controller. + +Currently, the framework supports the following LLMs: +- GPT-4 / GPT-3.5 (Remote - OpenAI API) +- Llama-2 (Local - HuggingFace Transformers) + +The following sections describe how to instantiate individual LLMs and how to add new LLMs to the framework. + +## LLM Instantiation +- Create a copy of `config_template.json` named `config.json`. +- Fill configuration details based on the used model (below). + +### GPT-4 / GPT-3.5 +- Adjust predefined `chatgpt`, `chatgpt4` or create new configuration with an unique key. + +| Key | Value | +|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| model_id | Model name based on [OpenAI model overview](https://platform.openai.com/docs/models/overview). | +| prompt_token_cost | Price per 1000 prompt tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance. | +| response_token_cost | Price per 1000 response tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance. | +| temperature | Parameter of OpenAI models that controls randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 2.0, default is 1.0. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature). | +| max_tokens | The maximum number of tokens to generate in the chat completion. Value depends on the maximum context size of the model specified in the [OpenAI model overview](https://platform.openai.com/docs/models/overview). More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-max_tokens). | +| stop | String or array of strings specifying sequence of characters which if detected, stops further generation of tokens. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-stop). | +| organization | Organization to use for the API requests (may be empty). | +| api_key | Personal API key that will be used to access OpenAI API. | + +- Instantiate the language model based on the selected configuration key (predefined / custom). +``` +lm = controller.ChatGPT( + "path/to/config.json", + model_name= +) +``` + +### Llama-2 +- Requires local hardware to run inference and a HuggingFace account. +- Adjust predefined `llama7b-hf`, `llama13b-hf`, `llama70b-hf` or create a new configuration with an unique key. + +| Key | Value | +|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| model_id | Specifies HuggingFace Llama 2 model identifier (`meta-llama/`). | +| cache_dir | Local directory where model will be downloaded and accessed. | +| prompt_token_cost | Price per 1000 prompt tokens (currently not used - local model = no cost). | +| response_token_cost | Price per 1000 response tokens (currently not used - local model = no cost). | +| temperature | Parameter that controls randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 1.0, default is 0.6. | +| top_k | Top-K sampling method described in [Transformers tutorial](https://huggingface.co/blog/how-to-generate). Default value is set to 10. | +| max_tokens | The maximum number of tokens to generate in the chat completion. More tokens require more memory. | + +- Instantiate the language model based on the selected configuration key (predefined / custom). +``` +lm = controller.Llama2HF( + "path/to/config.json", + model_name= +) +``` +- Request access to Llama-2 via the [Meta form](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) using the same email address as for the HuggingFace account. +- After the access is granted, go to [HuggingFace Llama-2 model card](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), log in and accept the license (_"You have been granted access to this model"_ message should appear). +- Generate HuggingFace access token. +- Log in from CLI with: `huggingface-cli login --token `. + +Note: 4-bit quantization is used to reduce the model size for inference. During instantiation, the model is downloaded from HuggingFace into the cache directory specified in the `config.json`. Running queries using larger models will require multiple GPUs (splitting across many GPUs is done automatically by the Transformers library). + +## Adding LLMs +More LLMs can be added by following these steps: +- Create new class as a subclass of `AbstractLanguageModel`. +- Use the constructor for loading configuration and instantiating the language model (if needed). +``` +class CustomLanguageModel(AbstractLanguageModel): + def __init__( + self, + config_path: str = "", + model_name: str = "llama7b-hf", + cache: bool = False + ) -> None: + super().__init__(config_path, model_name, cache) + self.config: Dict = self.config[model_name] + + # Load data from configuration into variables if needed + + # Instantiate LLM if needed +``` +- Implement `query` abstract method that is used to get a list of responses from the LLM (call to remote API or local model inference). +``` +def query(self, query: str, num_responses: int = 1) -> Any: + # Support caching + # Call LLM and retrieve list of responses - based on num_responses + # Return LLM response structure (not only raw strings) +``` +- Implement `get_response_texts` abstract method that is used to get a list of raw texts from the LLM response structure produced by `query`. +``` +def get_response_texts(self, query_response: Union[List[Dict], Dict]) -> List[str]: + # Retrieve list of raw strings from the LLM response structure +``` diff --git a/docs/src/graph_of_thoughts/language_models/__init__.py b/docs/src/graph_of_thoughts/language_models/__init__.py new file mode 100644 index 0000000..076d62e --- /dev/null +++ b/docs/src/graph_of_thoughts/language_models/__init__.py @@ -0,0 +1,3 @@ +from .abstract_language_model import AbstractLanguageModel +from .chatgpt import ChatGPT +from .llamachat_hf import Llama2HF diff --git a/docs/src/graph_of_thoughts/language_models/abstract_language_model.py b/docs/src/graph_of_thoughts/language_models/abstract_language_model.py new file mode 100644 index 0000000..cead63c --- /dev/null +++ b/docs/src/graph_of_thoughts/language_models/abstract_language_model.py @@ -0,0 +1,92 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach + +from abc import ABC, abstractmethod +from typing import List, Dict, Union, Any +import json +import os +import logging + + +class AbstractLanguageModel(ABC): + """ + Abstract base class that defines the interface for all language models. + """ + + def __init__( + self, config_path: str = "", model_name: str = "", cache: bool = False + ) -> None: + """ + Initialize the AbstractLanguageModel instance with configuration, model details, and caching options. + + :param config_path: Path to the config file. Defaults to "". + :type config_path: str + :param model_name: Name of the language model. Defaults to "". + :type model_name: str + :param cache: Flag to determine whether to cache responses. Defaults to False. + :type cache: bool + """ + self.logger = logging.getLogger(self.__class__.__name__) + self.config: Dict = None + self.model_name: str = model_name + self.cache = cache + if self.cache: + self.respone_cache: Dict[str, List[Any]] = {} + self.load_config(config_path) + self.prompt_tokens: int = 0 + self.completion_tokens: int = 0 + self.cost: float = 0.0 + + def load_config(self, path: str) -> None: + """ + Load configuration from a specified path. + + :param path: Path to the config file. If an empty path provided, + default is `config.json` in the current directory. + :type path: str + """ + if path == "": + current_dir = os.path.dirname(os.path.abspath(__file__)) + path = os.path.join(current_dir, "config.json") + + with open(path, "r") as f: + self.config = json.load(f) + + self.logger.debug(f"Loaded config from {path} for {self.model_name}") + + def clear_cache(self) -> None: + """ + Clear the response cache. + """ + self.respone_cache.clear() + + @abstractmethod + def query(self, query: str, num_responses: int = 1) -> Any: + """ + Abstract method to query the language model. + + :param query: The query to be posed to the language model. + :type query: str + :param num_responses: The number of desired responses. + :type num_responses: int + :return: The language model's response(s). + :rtype: Any + """ + pass + + @abstractmethod + def get_response_texts(self, query_responses: Union[List[Any], Any]) -> List[str]: + """ + Abstract method to extract response texts from the language model's response(s). + + :param query_responses: The responses returned from the language model. + :type query_responses: Union[List[Any], Any] + :return: List of textual responses. + :rtype: List[str] + """ + pass diff --git a/docs/src/graph_of_thoughts/language_models/chatgpt.py b/docs/src/graph_of_thoughts/language_models/chatgpt.py new file mode 100644 index 0000000..4f63d61 --- /dev/null +++ b/docs/src/graph_of_thoughts/language_models/chatgpt.py @@ -0,0 +1,157 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach + +import backoff +import os +import random +import time +from typing import List, Dict, Union +from openai import OpenAI, OpenAIError +from openai.types.chat.chat_completion import ChatCompletion + +from .abstract_language_model import AbstractLanguageModel + + +class ChatGPT(AbstractLanguageModel): + """ + The ChatGPT class handles interactions with the OpenAI models using the provided configuration. + + Inherits from the AbstractLanguageModel and implements its abstract methods. + """ + + def __init__( + self, config_path: str = "", model_name: str = "chatgpt", cache: bool = False + ) -> None: + """ + Initialize the ChatGPT instance with configuration, model details, and caching options. + + :param config_path: Path to the configuration file. Defaults to "". + :type config_path: str + :param model_name: Name of the model, default is 'chatgpt'. Used to select the correct configuration. + :type model_name: str + :param cache: Flag to determine whether to cache responses. Defaults to False. + :type cache: bool + """ + super().__init__(config_path, model_name, cache) + self.config: Dict = self.config[model_name] + # The model_id is the id of the model that is used for chatgpt, i.e. gpt-4, gpt-3.5-turbo, etc. + self.model_id: str = self.config["model_id"] + # The prompt_token_cost and response_token_cost are the costs for 1000 prompt tokens and 1000 response tokens respectively. + self.prompt_token_cost: float = self.config["prompt_token_cost"] + self.response_token_cost: float = self.config["response_token_cost"] + # The temperature of a model is defined as the randomness of the model's output. + self.temperature: float = self.config["temperature"] + # The maximum number of tokens to generate in the chat completion. + self.max_tokens: int = self.config["max_tokens"] + # The stop sequence is a sequence of tokens that the model will stop generating at (it will not generate the stop sequence). + self.stop: Union[str, List[str]] = self.config["stop"] + # The account organization is the organization that is used for chatgpt. + self.organization: str = self.config["organization"] + if self.organization == "": + self.logger.warning("OPENAI_ORGANIZATION is not set") + self.api_key: str = os.getenv("OPENAI_API_KEY", self.config["api_key"]) + if self.api_key == "": + raise ValueError("OPENAI_API_KEY is not set") + # Initialize the OpenAI Client + self.client = OpenAI(api_key=self.api_key, organization=self.organization) + + def query( + self, query: str, num_responses: int = 1 + ) -> Union[List[ChatCompletion], ChatCompletion]: + """ + Query the OpenAI model for responses. + + :param query: The query to be posed to the language model. + :type query: str + :param num_responses: Number of desired responses, default is 1. + :type num_responses: int + :return: Response(s) from the OpenAI model. + :rtype: Dict + """ + if self.cache and query in self.respone_cache: + return self.respone_cache[query] + + if num_responses == 1: + response = self.chat([{"role": "user", "content": query}], num_responses) + else: + response = [] + next_try = num_responses + total_num_attempts = num_responses + while num_responses > 0 and total_num_attempts > 0: + try: + assert next_try > 0 + res = self.chat([{"role": "user", "content": query}], next_try) + response.append(res) + num_responses -= next_try + next_try = min(num_responses, next_try) + except Exception as e: + next_try = (next_try + 1) // 2 + self.logger.warning( + f"Error in chatgpt: {e}, trying again with {next_try} samples" + ) + time.sleep(random.randint(1, 3)) + total_num_attempts -= 1 + + if self.cache: + self.respone_cache[query] = response + return response + + @backoff.on_exception(backoff.expo, OpenAIError, max_time=10, max_tries=6) + def chat(self, messages: List[Dict], num_responses: int = 1) -> ChatCompletion: + """ + Send chat messages to the OpenAI model and retrieves the model's response. + Implements backoff on OpenAI error. + + :param messages: A list of message dictionaries for the chat. + :type messages: List[Dict] + :param num_responses: Number of desired responses, default is 1. + :type num_responses: int + :return: The OpenAI model's response. + :rtype: ChatCompletion + """ + response = self.client.chat.completions.create( + model=self.model_id, + messages=messages, + temperature=self.temperature, + max_tokens=self.max_tokens, + n=num_responses, + stop=self.stop, + ) + + self.prompt_tokens += response.usage.prompt_tokens + self.completion_tokens += response.usage.completion_tokens + prompt_tokens_k = float(self.prompt_tokens) / 1000.0 + completion_tokens_k = float(self.completion_tokens) / 1000.0 + self.cost = ( + self.prompt_token_cost * prompt_tokens_k + + self.response_token_cost * completion_tokens_k + ) + self.logger.info( + f"This is the response from chatgpt: {response}" + f"\nThis is the cost of the response: {self.cost}" + ) + return response + + def get_response_texts( + self, query_response: Union[List[ChatCompletion], ChatCompletion] + ) -> List[str]: + """ + Extract the response texts from the query response. + + :param query_response: The response dictionary (or list of dictionaries) from the OpenAI model. + :type query_response: Union[List[ChatCompletion], ChatCompletion] + :return: List of response strings. + :rtype: List[str] + """ + if not isinstance(query_response, List): + query_response = [query_response] + return [ + choice.message.content + for response in query_response + for choice in response.choices + ] diff --git a/docs/src/graph_of_thoughts/language_models/config_template.json b/docs/src/graph_of_thoughts/language_models/config_template.json new file mode 100644 index 0000000..3ec45ae --- /dev/null +++ b/docs/src/graph_of_thoughts/language_models/config_template.json @@ -0,0 +1,49 @@ +{ + "chatgpt" : { + "model_id": "gpt-3.5-turbo", + "prompt_token_cost": 0.0015, + "response_token_cost": 0.002, + "temperature": 1.0, + "max_tokens": 1536, + "stop": null, + "organization": "", + "api_key": "" + }, + "chatgpt4" : { + "model_id": "gpt-4", + "prompt_token_cost": 0.03, + "response_token_cost": 0.06, + "temperature": 1.0, + "max_tokens": 4096, + "stop": null, + "organization": "", + "api_key": "" + }, + "llama7b-hf" : { + "model_id": "Llama-2-7b-chat-hf", + "cache_dir": "/llama", + "prompt_token_cost": 0.0, + "response_token_cost": 0.0, + "temperature": 0.6, + "top_k": 10, + "max_tokens": 4096 + }, + "llama13b-hf" : { + "model_id": "Llama-2-13b-chat-hf", + "cache_dir": "/llama", + "prompt_token_cost": 0.0, + "response_token_cost": 0.0, + "temperature": 0.6, + "top_k": 10, + "max_tokens": 4096 + }, + "llama70b-hf" : { + "model_id": "Llama-2-70b-chat-hf", + "cache_dir": "/llama", + "prompt_token_cost": 0.0, + "response_token_cost": 0.0, + "temperature": 0.6, + "top_k": 10, + "max_tokens": 4096 + } +} diff --git a/docs/src/graph_of_thoughts/language_models/llamachat_hf.py b/docs/src/graph_of_thoughts/language_models/llamachat_hf.py new file mode 100644 index 0000000..d423a50 --- /dev/null +++ b/docs/src/graph_of_thoughts/language_models/llamachat_hf.py @@ -0,0 +1,119 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Ales Kubicek + +import os +import torch +from typing import List, Dict, Union +from .abstract_language_model import AbstractLanguageModel + + +class Llama2HF(AbstractLanguageModel): + """ + An interface to use LLaMA 2 models through the HuggingFace library. + """ + + def __init__( + self, config_path: str = "", model_name: str = "llama7b-hf", cache: bool = False + ) -> None: + """ + Initialize an instance of the Llama2HF class with configuration, model details, and caching options. + + :param config_path: Path to the configuration file. Defaults to an empty string. + :type config_path: str + :param model_name: Specifies the name of the LLaMA model variant. Defaults to "llama7b-hf". + Used to select the correct configuration. + :type model_name: str + :param cache: Flag to determine whether to cache responses. Defaults to False. + :type cache: bool + """ + super().__init__(config_path, model_name, cache) + self.config: Dict = self.config[model_name] + # Detailed id of the used model. + self.model_id: str = self.config["model_id"] + # Costs for 1000 tokens. + self.prompt_token_cost: float = self.config["prompt_token_cost"] + self.response_token_cost: float = self.config["response_token_cost"] + # The temperature is defined as the randomness of the model's output. + self.temperature: float = self.config["temperature"] + # Top K sampling. + self.top_k: int = self.config["top_k"] + # The maximum number of tokens to generate in the chat completion. + self.max_tokens: int = self.config["max_tokens"] + + # Important: must be done before importing transformers + os.environ["TRANSFORMERS_CACHE"] = self.config["cache_dir"] + import transformers + + hf_model_id = f"meta-llama/{self.model_id}" + model_config = transformers.AutoConfig.from_pretrained(hf_model_id) + bnb_config = transformers.BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_use_double_quant=True, + bnb_4bit_compute_dtype=torch.bfloat16, + ) + + self.tokenizer = transformers.AutoTokenizer.from_pretrained(hf_model_id) + self.model = transformers.AutoModelForCausalLM.from_pretrained( + hf_model_id, + trust_remote_code=True, + config=model_config, + quantization_config=bnb_config, + device_map="auto", + ) + self.model.eval() + torch.no_grad() + + self.generate_text = transformers.pipeline( + model=self.model, tokenizer=self.tokenizer, task="text-generation" + ) + + def query(self, query: str, num_responses: int = 1) -> List[Dict]: + """ + Query the LLaMA 2 model for responses. + + :param query: The query to be posed to the language model. + :type query: str + :param num_responses: Number of desired responses, default is 1. + :type num_responses: int + :return: Response(s) from the LLaMA 2 model. + :rtype: List[Dict] + """ + if self.cache and query in self.respone_cache: + return self.respone_cache[query] + sequences = [] + query = f"<>You are a helpful assistant. Always follow the intstructions precisely and output the response exactly in the requested format.<>\n\n[INST] {query} [/INST]" + for _ in range(num_responses): + sequences.extend( + self.generate_text( + query, + do_sample=True, + top_k=self.top_k, + num_return_sequences=1, + eos_token_id=self.tokenizer.eos_token_id, + max_length=self.max_tokens, + ) + ) + response = [ + {"generated_text": sequence["generated_text"][len(query) :].strip()} + for sequence in sequences + ] + if self.cache: + self.respone_cache[query] = response + return response + + def get_response_texts(self, query_responses: List[Dict]) -> List[str]: + """ + Extract the response texts from the query response. + + :param query_responses: The response list of dictionaries generated from the `query` method. + :type query_responses: List[Dict] + :return: List of response strings. + :rtype: List[str] + """ + return [query_response["generated_text"] for query_response in query_responses] diff --git a/docs/src/graph_of_thoughts/operations/README.md b/docs/src/graph_of_thoughts/operations/README.md new file mode 100644 index 0000000..68745ff --- /dev/null +++ b/docs/src/graph_of_thoughts/operations/README.md @@ -0,0 +1,70 @@ +# Operations + +The Operations module contains operations to manipulate and process thoughts represented by the [Thought](thought.py) class. +Operations interface with a language model and use other helper classes like [Prompter](../prompter/prompter.py) and [Parser](../parser/parser.py) for effective communication and extraction of results from the language model. +The [Graph of Operations](graph_of_operations.py) class is the main class of the module and is responsible for orchestrating the operations, defining their relationships and maintaining the state of the thought graph, also known as Graph Reasoning State. + +## Graph of Operations +The [GraphOfOperations](graph_of_operations.py) class facilitates the creation and management of a directed graph representing the sequence and interrelationships of operations on thoughts. Here’s how you can construct and work with the Graph of Operations: + +### Initialization +Creating a new instance of GraphOfOperations: + +```python +from graph_of_thoughts.operations import GraphOfOperations + +graph = GraphOfOperations() +``` + +Upon initialization, the graph will be empty with no operations, roots, or leaves. + +### Adding Operations +**Append Operation:** You can append operations to the end of the graph using the append_operation method. This ensures that the operation becomes a successor to all current leaf operations in the graph. +```python +from graph_of_thoughts.operations import Generate + +operationA = Generate() +graph.append_operation(operationA) +``` +**Add Operation with Relationships:** If you want to define specific relationships for an operation, use the add_operation method. +```python +operationB = Generate() +operationB.predecessors.append(operationA) +graph.add_operation(operationB) +``` +Remember to set up the predecessors (and optionally successors) for your operation before adding it to the graph. + +## Available Operations +The following operations are available in the module: + +**Score:** Collect all thoughts from preceding operations and score them either using the LLM or a custom scoring function. +- num_samples (Optional): The number of samples to use for scoring, defaults to 1. +- combined_scoring (Optional): Whether to score all thoughts together in a single prompt or separately, defaults to False. +- scoring_function (Optional): A function that takes in a list of thought states and returns a list of scores for each thought. + +**ValidateAndImprove:** For each thought, validate it and if it is invalid, improve it. +- num_samples (Optional): The number of samples to use for validation, defaults to 1. +- improve (Optional): Whether to improve the thought if it is invalid, defaults to True. +- num_tries (Optional): The number of times to try improving the thought, before giving up, defaults to 3. +- validate_function (Optional): A function that takes in a thought state and returns a boolean indicating whether the thought is valid. + +**Generate:** Generate new thoughts from the current thoughts. If no previous thoughts are available, the thoughts are initialized with the input to the [Controller](../controller/controller.py). +- num_branches_prompt (Optional): Number of responses that each prompt should generate (passed to prompter). Defaults to 1. +- num_branches_response (Optional): Number of responses the LLM should generate for each prompt. Defaults to 1. + +**Improve:** Improve the current thoughts. This operation is similar to the ValidateAndImprove operation, but it does not validate the thoughts and always tries to improve them. + +**Aggregate:** Aggregate the current thoughts into a single thought. This operation is useful when you want to combine multiple thoughts into a single thought. +- num_responses (Optional): Number of responses to request from the LLM (generates multiple new thoughts). Defaults to 1. + +**KeepBestN:** Keep the best N thoughts from the preceding thoughts. Assumes that the thoughts are already scored and throws an error if they are not. +- n: The number of thoughts to keep in order of score. +- higher_is_better (Optional): Whether higher scores are better (True) or lower scores are better (False). Defaults to True. + +**KeepValid:** Keep only the valid thoughts from the preceding thoughts. Assumes that each thought has already been validated, if not, it will be considered valid. + +**Selector:** Select a number of thoughts from the preceding thoughts using a selection function. This is useful if subsequent operations should only be applied to a subset of the preceding thoughts. +- selector: A function that takes in a list of thoughts and returns a list of thoughts to select. + +**GroundTruth**: Evaluates if the preceding/current thoughts solve the problem and equal the ground truth. This operation is useful for terminating the graph and checking if the final thoughts solve the problem, but is only useful if the ground truth is known. +- ground_truth_evaluator: A function that takes in a thought state and returns a boolean indicating whether the thought solves the problem. diff --git a/docs/src/graph_of_thoughts/operations/__init__.py b/docs/src/graph_of_thoughts/operations/__init__.py new file mode 100644 index 0000000..7b7c545 --- /dev/null +++ b/docs/src/graph_of_thoughts/operations/__init__.py @@ -0,0 +1,14 @@ +from .thought import Thought +from .graph_of_operations import GraphOfOperations +from .operations import ( + Operation, + Score, + ValidateAndImprove, + Generate, + Aggregate, + KeepBestN, + KeepValid, + Selector, + GroundTruth, + Improve, +) diff --git a/docs/src/graph_of_thoughts/operations/graph_of_operations.py b/docs/src/graph_of_thoughts/operations/graph_of_operations.py new file mode 100644 index 0000000..6efc90a --- /dev/null +++ b/docs/src/graph_of_thoughts/operations/graph_of_operations.py @@ -0,0 +1,69 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach + +from __future__ import annotations +from typing import List + +from graph_of_thoughts.operations.operations import Operation + + +class GraphOfOperations: + """ + Represents the Graph of Operations, which prescribes the execution plan of thought operations. + """ + + def __init__(self) -> None: + """ + Initializes a new Graph of Operations instance with empty operations, roots, and leaves. + The roots are the entry points in the graph with no predecessors. + The leaves are the exit points in the graph with no successors. + """ + self.operations: List[Operation] = [] + self.roots: List[Operation] = [] + self.leaves: List[Operation] = [] + + def append_operation(self, operation: Operation) -> None: + """ + Appends an operation to all leaves in the graph and updates the relationships. + + :param operation: The operation to append. + :type operation: Operation + """ + self.operations.append(operation) + + if len(self.roots) == 0: + self.roots = [operation] + else: + for leave in self.leaves: + leave.add_successor(operation) + + self.leaves = [operation] + + def add_operation(self, operation: Operation) -> None: + """ + Add an operation to the graph considering its predecessors and successors. + Adjust roots and leaves based on the added operation's position within the graph. + + :param operation: The operation to add. + :type operation: Operation + """ + self.operations.append(operation) + if len(self.roots) == 0: + self.roots = [operation] + self.leaves = [operation] + assert ( + len(operation.predecessors) == 0 + ), "First operation should have no predecessors" + else: + if len(operation.predecessors) == 0: + self.roots.append(operation) + for predecessor in operation.predecessors: + if predecessor in self.leaves: + self.leaves.remove(predecessor) + if len(operation.successors) == 0: + self.leaves.append(operation) diff --git a/docs/src/graph_of_thoughts/operations/operations.py b/docs/src/graph_of_thoughts/operations/operations.py new file mode 100644 index 0000000..bb9b31e --- /dev/null +++ b/docs/src/graph_of_thoughts/operations/operations.py @@ -0,0 +1,900 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach + +from __future__ import annotations +import logging +from enum import Enum +from typing import List, Iterator, Dict, Callable, Union +from abc import ABC, abstractmethod +import itertools + +from graph_of_thoughts.operations.thought import Thought +from graph_of_thoughts.language_models import AbstractLanguageModel +from graph_of_thoughts.prompter import Prompter +from graph_of_thoughts.parser import Parser + + +class OperationType(Enum): + """ + Enum to represent different operation types that can be used as unique identifiers. + """ + + score: int = 0 + validate_and_improve: int = 1 + generate: int = 2 + improve: int = 3 + aggregate: int = 4 + keep_best_n: int = 5 + keep_valid: int = 6 + ground_truth_evaluator: int = 7 + selector: int = 8 + + +class Operation(ABC): + """ + Abstract base class that defines the interface for all operations. + """ + + _ids: Iterator[int] = itertools.count(0) + + operation_type: OperationType = None + + def __init__(self) -> None: + """ + Initializes a new Operation instance with a unique id, and empty predecessors and successors. + """ + self.logger: logging.Logger = logging.getLogger(self.__class__.__name__) + self.id: int = next(Operation._ids) + self.predecessors: List[Operation] = [] + self.successors: List[Operation] = [] + self.executed: bool = False + + def can_be_executed(self) -> bool: + """ + Checks if the operation can be executed based on its predecessors. + + :return: True if all predecessors have been executed, False otherwise. + :rtype: bool + """ + return all(predecessor.executed for predecessor in self.predecessors) + + def get_previous_thoughts(self) -> List[Thought]: + """ + Iterates over all predecessors and aggregates their thoughts. + + :return: A list of all thoughts from the predecessors. + :rtype: List[Thought] + """ + previous_thoughts: List[Thought] = [ + thought + for predecessor in self.predecessors + for thought in predecessor.get_thoughts() + ] + + return previous_thoughts + + def add_predecessor(self, operation: Operation) -> None: + """ + Add a preceding operation and update the relationships. + + :param operation: The operation to be set as a predecessor. + :type operation: Operation + """ + self.predecessors.append(operation) + operation.successors.append(self) + + def add_successor(self, operation: Operation) -> None: + """ + Add a succeeding operation and update the relationships. + + :param operation: The operation to be set as a successor. + :type operation: Operation + """ + self.successors.append(operation) + operation.predecessors.append(self) + + def execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Execute the operation, assuring that all predecessors have been executed. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + :raises AssertionError: If not all predecessors have been executed. + """ + assert self.can_be_executed(), "Not all predecessors have been executed" + self.logger.info( + "Executing operation %d of type %s", self.id, self.operation_type + ) + self._execute(lm, prompter, parser, **kwargs) + self.logger.debug("Operation %d executed", self.id) + self.executed = True + + @abstractmethod + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Abstract method for the actual execution of the operation. + This should be implemented in derived classes. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + """ + pass + + @abstractmethod + def get_thoughts(self) -> List[Thought]: + """ + Abstract method to retrieve the thoughts associated with the operation. + This should be implemented in derived classes. + + :return: List of associated thoughts. + :rtype: List[Thought] + """ + pass + + +class Score(Operation): + """ + Operation to score thoughts. + """ + + operation_type: OperationType = OperationType.score + + def __init__( + self, + num_samples: int = 1, + combined_scoring: bool = False, + scoring_function: Callable[ + [Union[List[Dict], Dict]], Union[List[float], float] + ] = None, + ) -> None: + """ + Initializes a new Score operation. + + :param num_samples: Number of samples to use for scoring. Defaults to 1. + :type num_samples: int + :param combined_scoring: Whether to score all thoughts together or individually. Defaults to False. + :type combined_scoring: bool + :param scoring_function: A function to score thoughts (if not using LM). Defaults to None. + :type scoring_function: Takes a list of thought states or a single thought state and + returns a list of scores or a single score. + """ + super().__init__() + self.num_samples: int = num_samples + self.combined_scoring: bool = combined_scoring + self.thoughts: List[Thought] = [] + self.scoring_function: Callable[ + [Union[List[Dict], Dict]], Union[List[float], float] + ] = scoring_function + + def get_thoughts(self) -> List[Thought]: + """ + Returns the thoughts associated with the operation. + + :return: List of scored thoughts. + :rtype: List[Thought] + """ + return self.thoughts + + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Executes the scoring operation by scoring the thoughts from the predecessors. + If combined scoring is used, the thoughts are scored together, otherwise individually. + If a scoring function is provided, it is used, otherwise the LM is prompted. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + :raises AssertionError: If operation has no predecessors. + """ + previous_thoughts: List[Thought] = self.get_previous_thoughts() + + assert ( + len(self.predecessors) > 0 + ), "Score operation needs at least one predecessor" + + if self.combined_scoring: + previous_thoughts_states = [thought.state for thought in previous_thoughts] + if self.scoring_function is not None: + self.logger.debug( + "Using scoring function %s to score states", self.scoring_function + ) + scores = self.scoring_function(previous_thoughts_states) + else: + prompt = prompter.score_prompt(previous_thoughts_states) + self.logger.debug("Prompt for LM: %s", prompt) + + responses = lm.get_response_texts( + lm.query(prompt, num_responses=self.num_samples) + ) + self.logger.debug("Responses from LM: %s", responses) + scores = parser.parse_score_answer(previous_thoughts_states, responses) + for thought, score in zip(previous_thoughts, scores): + new_thought = Thought.from_thought(thought) + new_thought.score = score + self.thoughts.append(new_thought) + else: + for thought in previous_thoughts: + new_thought = Thought.from_thought(thought) + if self.scoring_function is not None: + self.logger.debug( + "Using scoring function %s to score state", + self.scoring_function, + ) + score = self.scoring_function(thought.state) + else: + prompt = prompter.score_prompt([thought.state]) + self.logger.debug("Prompt for LM: %s", prompt) + + responses = lm.get_response_texts( + lm.query(prompt, num_responses=self.num_samples) + ) + self.logger.debug("Responses from LM: %s", responses) + score = parser.parse_score_answer([thought.state], responses)[0] + + new_thought.score = score + self.thoughts.append(new_thought) + + self.logger.info( + "Score operation %d scored %d thoughts", + self.id, + len(self.thoughts), + ) + + +class ValidateAndImprove(Operation): + """ + Operation to validate and improve thoughts. + """ + + operation_type: OperationType = OperationType.validate_and_improve + + def __init__( + self, + num_samples: int = 1, + improve: bool = True, + num_tries: int = 3, + validate_function: Callable[[Dict], bool] = None, + ) -> None: + """ + Initializes a new ValidateAndImprove operation. + + :param num_samples: Number of samples to use for validation. Defaults to 1. + :type num_samples: int + :param improve: Whether to improve the thought if it is not valid. Defaults to True. + :type improve: bool + :param num_tries: Number of tries to improve the thought before giving up. Defaults to 3. + :type num_tries: int + :param validate_function: A function to validate thoughts (if not using LM). Defaults to None. + :type validate_function: Takes a thought state and returns a boolean. + """ + super().__init__() + self.num_samples: int = num_samples + self.improve: bool = improve + self.num_tries: int = num_tries + self.validate_function: Callable[[Dict], bool] = validate_function + self.thoughts: List[List[Thought]] = [] + + def get_thoughts(self) -> List[Thought]: + """ + Returns the list of final thoughts, after validation and improvement. + + :return: List of final validated and improved thoughts. + :rtype: List[Thought] + """ + return [thought_list[-1] for thought_list in self.thoughts] + + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Executes the ValidateAndImprove operation by validating and improving the predecessors' thoughts. + If a validation function is provided, it is used, otherwise the LM is prompted. + If improvement is enabled, the LM is prompted to improve the thought, if it is not valid. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + :raises AssertionError: If operation has no predecessors. + """ + previous_thoughts: List[Thought] = self.get_previous_thoughts() + + assert ( + len(self.predecessors) > 0 + ), "ValidateAndImprove operation needs at least one predecessor" + + for thought in previous_thoughts: + thought_list = [] + current_thought = Thought.from_thought(thought) + current_try = 0 + while True: + if self.validate_function is not None: + self.logger.debug( + "Using validate function %s to score states", + self.validate_function, + ) + valid = self.validate_function(current_thought.state) + else: + prompt = prompter.validation_prompt(**current_thought.state) + self.logger.debug("Prompt for LM: %s", prompt) + responses = lm.get_response_texts( + lm.query(prompt, num_responses=self.num_samples) + ) + self.logger.debug("Responses from LM: %s", responses) + + valid = parser.parse_validation_answer( + current_thought.state, responses + ) + current_thought.valid = valid + thought_list.append(current_thought) + if ( + not self.improve + or current_thought.valid + or current_try >= self.num_tries + ): + break + improve_prompt = prompter.improve_prompt(**current_thought.state) + self.logger.debug("Prompt for LM: %s", improve_prompt) + responses = lm.get_response_texts( + lm.query(improve_prompt, num_responses=1) + ) + self.logger.debug("Responses from LM: %s", responses) + state_update = parser.parse_improve_answer( + current_thought.state, responses + ) + current_thought = Thought({**current_thought.state, **state_update}) + current_try += 1 + self.thoughts.append(thought_list) + + self.logger.info( + "Validate and improve operation %d created %d valid thoughts from %d previous thoughts", + self.id, + len( + [ + thought_list[-1] + for thought_list in self.thoughts + if thought_list[-1].valid + ] + ), + len(previous_thoughts), + ) + + +class Generate(Operation): + """ + Operation to generate thoughts. + """ + + operation_type: OperationType = OperationType.generate + + def __init__( + self, num_branches_prompt: int = 1, num_branches_response: int = 1 + ) -> None: + """ + Initializes a new Generate operation. + + :param num_branches_prompt: Number of responses that each prompt should generate (passed to prompter). Defaults to 1. + :type num_branches_prompt: int + :param num_branches_response: Number of responses the LM should generate for each prompt. Defaults to 1. + :type num_branches_response: int + """ + super().__init__() + self.num_branches_prompt: int = num_branches_prompt + self.num_branches_response: int = num_branches_response + self.thoughts: List[Thought] = [] + + def get_thoughts(self) -> List[Thought]: + """ + Returns the thoughts associated with the operation. + + :return: List of generated thoughts. + :rtype: List[Thought] + """ + return self.thoughts + + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Executes the Generate operation by generating thoughts from the predecessors. + The thoughts are generated by prompting the LM with the predecessors' thought states. + If there are no predecessors, the kwargs are used as a base state. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + """ + previous_thoughts: List[Thought] = self.get_previous_thoughts() + + if len(previous_thoughts) == 0 and len(self.predecessors) > 0: + return + + if len(previous_thoughts) == 0: + # no predecessors, use kwargs as base state + previous_thoughts = [Thought(state=kwargs)] + + for thought in previous_thoughts: + base_state = thought.state + prompt = prompter.generate_prompt(self.num_branches_prompt, **base_state) + self.logger.debug("Prompt for LM: %s", prompt) + responses = lm.get_response_texts( + lm.query(prompt, num_responses=self.num_branches_response) + ) + self.logger.debug("Responses from LM: %s", responses) + for new_state in parser.parse_generate_answer(base_state, responses): + new_state = {**base_state, **new_state} + self.thoughts.append(Thought(new_state)) + self.logger.debug( + "New thought %d created with state %s", + self.thoughts[-1].id, + self.thoughts[-1].state, + ) + if ( + len(self.thoughts) + > self.num_branches_prompt + * self.num_branches_response + * len(previous_thoughts) + and self.num_branches_prompt > 0 + ): + self.logger.warning( + "Generate operation %d created more thoughts than expected", + self.id, + ) + self.logger.info( + "Generate operation %d created %d new thoughts", self.id, len(self.thoughts) + ) + + +class Improve(Operation): + """ + Operation to improve thoughts. + """ + + operation_type: OperationType = OperationType.improve + + def __init__(self) -> None: + """ + Initializes a new Improve operation. + """ + super().__init__() + self.thoughts: List[Thought] = [] + + def get_thoughts(self) -> List[Thought]: + """ + Returns the thoughts associated with the operation after improvement. + + :return: List of improved thoughts. + :rtype: List[Thought] + """ + return self.thoughts + + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Executes the Improve operation by improving the predecessors' thoughts. + The thoughts are improved by prompting the LM with the predecessors' thought states. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + :raises AssertionError: If operation has no predecessors. + """ + previous_thoughts: List[Thought] = self.get_previous_thoughts() + + assert len(self.predecessors) > 0, "Needs at least one predecessor" + + for thought in previous_thoughts: + improve_prompt = prompter.improve_prompt(**thought.state) + self.logger.debug("Prompt for LM: %s", improve_prompt) + responses = lm.get_response_texts(lm.query(improve_prompt, num_responses=1)) + self.logger.debug("Responses from LM: %s", responses) + state_update = parser.parse_improve_answer(thought.state, responses) + self.thoughts.append(Thought({**thought.state, **state_update})) + + self.logger.info( + "Improve operation %d improved %d thoughts", self.id, len(self.thoughts) + ) + + +class Aggregate(Operation): + """ + Operation to aggregate thoughts. + """ + + operation_type: OperationType = OperationType.aggregate + + def __init__(self, num_responses: int = 1) -> None: + """ + Initializes a new Aggregate operation. + + :param num_responses: Number of responses to use for aggregation. Defaults to 1. + :type num_responses: int + """ + super().__init__() + self.thoughts: List[Thought] = [] + self.num_responses: int = num_responses + + def get_thoughts(self) -> List[Thought]: + """ + Returns the thoughts associated with the operation after aggregation. + + :return: List of aggregated thoughts. + :rtype: List[Thought] + """ + return self.thoughts + + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Executes the Aggregate operation by aggregating the predecessors' thoughts. + The thoughts are aggregated by prompting the LM with the predecessors' thought states. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + :raises AssertionError: If operation has no predecessors. + """ + assert ( + len(self.predecessors) >= 1 + ), "Aggregate operation must have at least one predecessor" + + previous_thoughts: List[Thought] = self.get_previous_thoughts() + + if len(previous_thoughts) == 0: + return + + # applied in order of score + base_state: Dict = {} + for thought in sorted(previous_thoughts, key=lambda thought: thought.score): + base_state = {**base_state, **thought.state} + + previous_thought_states = [thought.state for thought in previous_thoughts] + prompt = prompter.aggregation_prompt(previous_thought_states) + + self.logger.debug("Prompt for LM: %s", prompt) + + responses = lm.get_response_texts( + lm.query(prompt, num_responses=self.num_responses) + ) + + self.logger.debug("Responses from LM: %s", responses) + + parsed = parser.parse_aggregation_answer(previous_thought_states, responses) + + if isinstance(parsed, dict): + parsed = [parsed] + for new_state in parsed: + self.thoughts.append(Thought({**base_state, **new_state})) + + +class KeepBestN(Operation): + """ + Operation to keep the best N thoughts from predecessors based on their score. + """ + + operation_type: OperationType = OperationType.keep_best_n + + def __init__(self, n: int, higher_is_better: bool = True) -> None: + """ + Initializes a new KeepBestN operation. + + :param n: Maximum number of thoughts to keep. + :type n: int + :param higher_is_better: Whether higher scores are better. Defaults to True. + :type higher_is_better: bool + :raises AssertionError: If `n` is not greater than zero. + """ + super().__init__() + self.n: int = n + assert self.n > 0, "KeepBestN operation must keep at least one thought" + self.higher_is_better: bool = higher_is_better + self.thoughts: List[Thought] = [] + + def get_best_n(self) -> List[Thought]: + """ + Returns the best N thoughts from the predecessors based on their score. + + :return: List of best N thoughts. + :rtype: List[Thought] + :raises AssertionError: If not all predecessors have been executed. + :raises AssertionError: If not all thoughts have been scored. + """ + previous_thoughts: List[Thought] = self.get_previous_thoughts() + assert all( + previous_thought.scored for previous_thought in previous_thoughts + ), "Not all thoughts have been scored" + + try: + return sorted( + previous_thoughts, + key=lambda thought: thought.score, + reverse=self.higher_is_better, + )[: self.n] + except: + self.logger.error("Error in KeepBestN operation") + self.logger.error( + "Previous operation: %s", [op.id for op in self.predecessors] + ) + self.logger.error("Previous thoughts: %s", previous_thoughts) + self.logger.error( + "Scores: %s", [thought.score for thought in previous_thoughts] + ) + return sorted( + [i for i in previous_thoughts if isinstance(i.score, float)], + key=lambda thought: thought.score, + reverse=self.higher_is_better, + )[: self.n] + + def get_thoughts(self) -> List[Thought]: + """ + Returns the thoughts kept by the operation. + + :return: List of kept thoughts. + :rtype: List[Thought] + """ + return self.thoughts + + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Executes the KeepBestN operation by keeping the best N thoughts from the predecessors according to their score. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + :raises AssertionError: If operation has no predecessors. + :raises AssertionError: If not all predecessors have been executed. + :raises AssertionError: If not all thoughts have been scored. + """ + assert ( + len(self.predecessors) >= 1 + ), "KeepBestN operation must have at least one predecessor" + + self.thoughts = [Thought.from_thought(thought) for thought in self.get_best_n()] + + for thought in self.thoughts: + self.logger.debug( + "Thought %d with state %s kept", thought.id, thought.state + ) + + self.logger.info( + "KeepBestN operation %d kept %d thoughts", self.id, len(self.thoughts) + ) + + +class KeepValid(Operation): + """ + Operation to keep valid thoughts from predecessors. + """ + + operation_type: OperationType = OperationType.keep_valid + + def __init__(self) -> None: + """ + Initializes a new KeepValid operation. + """ + super().__init__() + self.thoughts: List[Thought] = [] + + def get_thoughts(self) -> List[Thought]: + """ + Returns the thoughts kept by the operation. + + :return: List of kept thoughts. + :rtype: List[Thought] + """ + return self.thoughts + + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Executes the KeepValid operation by keeping the valid thoughts from the predecessors. + Keeps unvalidated thoughts as well. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + :raises AssertionError: If operation has no predecessors. + """ + assert ( + len(self.predecessors) >= 1 + ), "KeepValid operation must have at least one predecessor" + + self.thoughts: List[Thought] = [ + Thought.from_thought(thought) + for thought in self.get_previous_thoughts() + if not thought.validated or thought.valid + ] + + if any(not thought.validated for thought in self.thoughts): + self.logger.warning( + "KeepValid operation %d has unvalidated thoughts", self.id + ) + + for thought in self.thoughts: + self.logger.debug( + "Thought %d with state %s kept", thought.id, thought.state + ) + + self.logger.info( + "KeepValid operation %d kept %d thoughts", self.id, len(self.thoughts) + ) + + +class GroundTruth(Operation): + """ + Operation to evaluate if thoughts correctly solve the problem, using a ground truth evaluator + """ + + operation_type: OperationType = OperationType.ground_truth_evaluator + + def __init__(self, ground_truth_evaluator: Callable[[Dict], bool]) -> None: + """ + Initializes a new GroundTruth operation. + + :param ground_truth_evaluator: A function to evaluate if a thought solves the problem. + :type ground_truth_evaluator: A function that takes a thought state and returns a boolean. + """ + super().__init__() + self.ground_truth_evaluator: Callable[[Dict], bool] = ground_truth_evaluator + self.thoughts: List[Thought] = [] + + def get_thoughts(self) -> List[Thought]: + """ + Returns the thoughts associated with the operation. + + :return: List of evaluated thoughts. + :rtype: List[Thought] + """ + return self.thoughts + + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Executes the GroundTruth operation by evaluating the predecessors' thoughts using the ground truth evaluator function. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + :raises AssertionError: If operation has no predecessor. + """ + assert ( + len(self.predecessors) >= 1 + ), "GroundTruth operation must have at least one predecessor" + + previous_thoughts: List[Thought] = self.get_previous_thoughts() + + for thought in previous_thoughts: + new_thought = Thought.from_thought(thought) + try: + new_thought.solved = self.ground_truth_evaluator(new_thought.state) + except: + new_thought.solved = False + self.thoughts.append(new_thought) + + self.logger.info( + "GroundTruth operation %d evaluated %d thoughts and %d solved the problem", + self.id, + len(self.thoughts), + len([thought for thought in self.thoughts if thought.solved]), + ) + + +class Selector(Operation): + """ + Operation to select thoughts from predecessors. + Useful for separating thoughts to perform different, subsequent operations on them. + """ + + operation_type: OperationType = OperationType.selector + + def __init__(self, selector: Callable[[List[Thought]], List[Thought]]) -> None: + """ + Initializes a new Selector operation. + + :param selector: A function to select thoughts from the predecessors' thoughts. + :type selector: A function that takes a list of thoughts and returns a list of thoughts. + """ + super().__init__() + self.selector: Callable[[List[Thought]], List[Thought]] = selector + self.thoughts: List[Thought] = [] + + def get_thoughts(self) -> List[Thought]: + """ + Returns the thoughts selected by the operation. + + :return: List of selected thoughts. + :rtype: List[Thought] + """ + return self.thoughts + + def _execute( + self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs + ) -> None: + """ + Executes the Selector operation by selecting thoughts from the predecessors using the selector function. + If the Selector has no predecessors, the selector function is called with a thought containing the kwargs as state. + + :param lm: The language model to be used. + :type lm: AbstractLanguageModel + :param prompter: The prompter for crafting prompts. + :type prompter: Prompter + :param parser: The parser for parsing responses. + :type parser: Parser + :param kwargs: Additional parameters for execution. + """ + previous_thoughts: List[Thought] = self.get_previous_thoughts() + + if len(previous_thoughts) == 0: + previous_thoughts = [Thought(kwargs)] + + self.thoughts = [ + Thought.from_thought(thought) + for thought in self.selector(previous_thoughts) + ] + + for thought in self.thoughts: + self.logger.debug( + "Thought %d with state %s selected", thought.id, thought.state + ) + + self.logger.info( + "Selector operation %d selected %d thoughts", self.id, len(self.thoughts) + ) diff --git a/docs/src/graph_of_thoughts/operations/thought.py b/docs/src/graph_of_thoughts/operations/thought.py new file mode 100644 index 0000000..efd3bea --- /dev/null +++ b/docs/src/graph_of_thoughts/operations/thought.py @@ -0,0 +1,117 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach + +from __future__ import annotations +import logging +from typing import Iterator, Dict, Optional +import itertools + + +class Thought: + """ + Represents an LLM thought with its state, constructed by the parser, and various flags. + """ + + _ids: Iterator[int] = itertools.count(0) + + def __init__(self, state: Optional[Dict] = None) -> None: + """ + Initializes a new Thought instance with a state and various default flags. + + :param state: The state of the thought. Defaults to None. + :type state: Optional[Dict] + """ + self.logger: logging.Logger = logging.getLogger(self.__class__.__name__) + self.id: int = next(Thought._ids) + self.state: Dict = state + self._score: float = 0.0 + self._valid: bool = False + self._solved: bool = False + self.scored: bool = False + self.validated: bool = False + self.compared_to_ground_truth: bool = False + + @staticmethod + def from_thought(thought: Thought) -> Thought: + """ + Creates a new thought from an existing one. + + :param thought: An instance of a Thought to clone. + :return: A new Thought instance with properties copied from the input thought. + """ + new_thought = Thought(thought.state) + new_thought.score = thought.score + new_thought.valid = thought.valid + new_thought.solved = thought.solved + new_thought.scored = thought.scored + new_thought.validated = thought.validated + new_thought.compared_to_ground_truth = thought.compared_to_ground_truth + return new_thought + + @property + def valid(self) -> bool: + """ + Returns the validity of the thought. + + :return: The validity of the thought. + :rtype: bool + """ + return self._valid + + @valid.setter + def valid(self, valid: bool) -> None: + """ + Sets the validity of the thought and the validated flag. + + :param valid: The validity of the thought. + :type valid: bool + """ + self.validated = True + self._valid = valid + + @property + def score(self) -> float: + """ + Returns the score of the thought. + + :return: The score of the thought. + :rtype: float + """ + return self._score + + @score.setter + def score(self, new_score: float) -> None: + """ + Sets the score of the thought and the scored flag. + + :param new_score: The score of the thought. + :type new_score: float + """ + self.scored = True + self._score = new_score + + @property + def solved(self) -> bool: + """ + Returns the solved flag of the thought. + + :return: The solved flag of the thought. + :rtype: bool + """ + return self._solved + + @solved.setter + def solved(self, solved: bool) -> None: + """ + Sets the solved flag of the thought and the compared_to_ground_truth flag. + + :param solved: Whether the thought contains a solution to the problem. + :type solved: bool + """ + self.compared_to_ground_truth = True + self._solved = solved diff --git a/docs/src/graph_of_thoughts/parser/__init__.py b/docs/src/graph_of_thoughts/parser/__init__.py new file mode 100644 index 0000000..2a3855a --- /dev/null +++ b/docs/src/graph_of_thoughts/parser/__init__.py @@ -0,0 +1 @@ +from .parser import Parser diff --git a/docs/src/graph_of_thoughts/parser/parser.py b/docs/src/graph_of_thoughts/parser/parser.py new file mode 100644 index 0000000..6db8ed8 --- /dev/null +++ b/docs/src/graph_of_thoughts/parser/parser.py @@ -0,0 +1,90 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main authors: Robert Gerstenberger, Nils Blach + +from __future__ import annotations +from abc import ABC, abstractmethod +from typing import Dict, List, Union + + +class Parser(ABC): + """ + Abstract base class that defines the interface for all parsers. + Parsers are used to parse the responses from the language models. + """ + + @abstractmethod + def parse_aggregation_answer( + self, states: List[Dict], texts: List[str] + ) -> Union[Dict, List[Dict]]: + """ + Parse the response from the language model for a aggregation prompt. + + :param states: The thought states used to generate the prompt. + :type states: List[Dict] + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: The new thought states after parsing the response from the language model. + :rtype: Union[Dict, List[Dict]] + """ + pass + + @abstractmethod + def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict: + """ + Parse the response from the language model for an improve prompt. + + :param state: The thought state used to generate the prompt. + :type state: Dict + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: The new thought state after parsing the response from the language model. + :rtype: Dict + """ + pass + + @abstractmethod + def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]: + """ + Parse the response from the language model for a generate prompt. + + :param state: The thought state used to generate the prompt. + :type state: Dict + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: The new thought states after parsing the response from the language model. + :rtype: List[Dict] + """ + pass + + @abstractmethod + def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool: + """ + Parse the response from the language model for a validation prompt. + + :param state: The thought state used to generate the prompt. + :type state: Dict + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: Whether the thought state is valid or not. + :rtype: bool + """ + pass + + @abstractmethod + def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]: + """ + Parse the response from the language model for a score prompt. + + :param states: The thought states used to generate the prompt. + :type states: List[Dict] + :param texts: The responses to the prompt from the language model. + :type texts: List[str] + :return: The scores for the thought states. + :rtype: List[float] + """ + pass diff --git a/docs/src/graph_of_thoughts/prompter/__init__.py b/docs/src/graph_of_thoughts/prompter/__init__.py new file mode 100644 index 0000000..51f2bf3 --- /dev/null +++ b/docs/src/graph_of_thoughts/prompter/__init__.py @@ -0,0 +1 @@ +from .prompter import Prompter diff --git a/docs/src/graph_of_thoughts/prompter/prompter.py b/docs/src/graph_of_thoughts/prompter/prompter.py new file mode 100644 index 0000000..7ff323f --- /dev/null +++ b/docs/src/graph_of_thoughts/prompter/prompter.py @@ -0,0 +1,86 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main authors: Robert Gerstenberger, Nils Blach + +from __future__ import annotations +from abc import ABC, abstractmethod +from typing import Dict, List + + +class Prompter(ABC): + """ + Abstract base class that defines the interface for all prompters. + Prompters are used to generate the prompts for the language models. + """ + + @abstractmethod + def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str: + """ + Generate a aggregation prompt for the language model. + + :param state_dicts: The thought states that should be aggregated. + :type state_dicts: List[Dict] + :param kwargs: Additional keyword arguments. + :return: The aggregation prompt. + :rtype: str + """ + pass + + @abstractmethod + def improve_prompt(self, **kwargs) -> str: + """ + Generate an improve prompt for the language model. + The thought state is unpacked to allow for additional keyword arguments + and concrete implementations to specify required arguments explicitly. + + :param kwargs: Additional keyword arguments. + :return: The improve prompt. + :rtype: str + """ + pass + + @abstractmethod + def generate_prompt(self, num_branches: int, **kwargs) -> str: + """ + Generate a generate prompt for the language model. + The thought state is unpacked to allow for additional keyword arguments + and concrete implementations to specify required arguments explicitly. + + :param num_branches: The number of responses the prompt should ask the LM to generate. + :type num_branches: int + :param kwargs: Additional keyword arguments. + :return: The generate prompt. + :rtype: str + """ + pass + + @abstractmethod + def validation_prompt(self, **kwargs) -> str: + """ + Generate a validation prompt for the language model. + The thought state is unpacked to allow for additional keyword arguments + and concrete implementations to specify required arguments explicitly. + + :param kwargs: Additional keyword arguments. + :return: The validation prompt. + :rtype: str + """ + pass + + @abstractmethod + def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str: + """ + Generate a score prompt for the language model. + + :param state_dicts: The thought states that should be scored, + if more than one, they should be scored together. + :type state_dicts: List[Dict] + :param kwargs: Additional keyword arguments. + :return: The score prompt. + :rtype: str + """ + pass diff --git a/docs/src/paper/README.md b/docs/src/paper/README.md new file mode 100644 index 0000000..92fdbb2 --- /dev/null +++ b/docs/src/paper/README.md @@ -0,0 +1,5 @@ +## Plot Data + +The data used to create the figure of the arXiv preprint article can be +found in the `final_results_gpt35.tar.bz2` archive. Unpack the archive +and run the file `plots.py`. diff --git a/docs/src/paper/plots.py b/docs/src/paper/plots.py new file mode 100644 index 0000000..4086a6a --- /dev/null +++ b/docs/src/paper/plots.py @@ -0,0 +1,337 @@ +# Copyright (c) 2023 ETH Zurich. +# All rights reserved. +# +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# main author: Nils Blach +# contributions: Robert Gerstenberger + +import json +import os +import matplotlib.pyplot as plt + + +def get_complete_results(base_directory): + results_complete = {} + for folder_name in os.listdir(base_directory): + folder_path = os.path.join(base_directory, folder_name) + if os.path.isdir(folder_path): + results_complete[folder_name] = [] + for file_name in os.listdir(folder_path): + if file_name.endswith(".json"): + file_path = os.path.join(folder_path, file_name) + with open(file_path, "r") as f: + data = json.load(f) + results_complete[folder_name].append( + {"key": int(file_name.split(".")[0]), "data": data} + ) + for key in results_complete.keys(): + results_complete[key] = sorted( + results_complete[key], key=lambda x: x["key"] + ) + return results_complete + + +def get_final_scores(results_complete): + scores = {} + for method in results_complete.keys(): + scores[method] = [] + for result in results_complete[method]: + score = 100 + solved = False + cost = 1 + prompt_tokens = 0 + completion_tokens = 0 + for op in result["data"]: + if "operation" in op and op["operation"] == "ground_truth_evaluator": + try: + score = min(op["scores"]) + solved = any(op["problem_solved"]) + except: + continue + if "cost" in op: + cost = op["cost"] + prompt_tokens = op["prompt_tokens"] + completion_tokens = op["completion_tokens"] + scores[method].append( + [result["key"], score, solved, prompt_tokens, completion_tokens, cost] + ) + scores[method] = sorted(scores[method], key=lambda x: x[0]) + return scores + + +def get_final_scores_doc_merge(results_complete): + scores = {} + for method in results_complete.keys(): + scores[method] = [] + for result in results_complete[method]: + score = 0 + solved = False + cost = 1 + prompt_tokens = 0 + completion_tokens = 0 + for op in reversed(result["data"]): + if "cost" in op: + cost = op["cost"] + prompt_tokens = op["prompt_tokens"] + completion_tokens = op["completion_tokens"] + if "operation" in op and op["operation"] == "score": + try: + score = max(op["scores"]) + break + except: + continue + scores[method].append( + [result["key"], score, solved, prompt_tokens, completion_tokens, cost] + ) + scores[method] = sorted(scores[method], key=lambda x: x[0]) + return scores + + +def get_plotting_data(base_directory, score_method): + results_complete = get_complete_results(base_directory) + scores = score_method(results_complete) + results_plotting = { + method: { + "scores": [x[1] for x in scores[method]], + "solved": sum([1 for x in scores[method] if x[2]]), + "costs": [x[5] for x in scores[method]], + } + for method in scores.keys() + } + return results_plotting + + +def plot_results( + name, + results, + methods_order=["io", "cot", "tot", "tot2", "tog"], + methods_labels=["IO", "CoT", "ToT", "ToT2", "GoT"], + model="GPT-3.5", + length=32, + y_lower=0, + y_upper=16, + cost_upper=1.8, + display_solved=True, + annotation_offset=1, + display_left_ylabel=False, + display_right_ylabel=False, +): + methods_order = [method for method in methods_order if method in results] + # Extract scores based on the order + if name == "set_intersection": + scores_ordered = [ + [min(score, length) for score in results[method]["scores"] if score != 1000] + for method in methods_order + ] + elif name == "sorting": + scores_ordered = [ + [ + min(score, length) + for score in results[method]["scores"] + if score != 100 and score != 300 + ] + for method in methods_order + ] + elif name == "keyword_counting": + scores_ordered = [ + [ + score + for score in results[method]["scores"] + if score != 100 and score != 300 + ] + for method in methods_order + ] + elif name == "document_merging": + scores_ordered = [ + [score for score in results[method]["scores"]] for method in methods_order + ] + total_costs = [sum(results[method]["costs"]) for method in methods_order] + + # Create figure and axis + if name == "keyword_counting" or name == "document_merging": + fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5)) + else: + fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5)) + + # Create boxplots + positions = range(1, len(methods_order) + 1) + ax.boxplot(scores_ordered, positions=positions) + + fig_fontsize = 12 + + # Set the ticks and labels + plt.yticks(fontsize=fig_fontsize) + ax.set_xticks(range(1, len(methods_order) + 1)) + ax.set_xticks(range(1, len(methods_order) + 1)) + if name == "keyword_counting": + ax.set_xticklabels(methods_labels, fontsize=10) + else: + ax.set_xticklabels(methods_labels, fontsize=fig_fontsize) + + if name == "document_merging": + ax.set_ylim(y_lower, 12 if display_solved else 9.75) + else: + ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1) + + if name == "sorting" or name == "set_intersection": + ax1_yticks = range( + y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8) + ) + ax.set_yticks(ax1_yticks) + + if display_left_ylabel: + if name == "keyword_counting": + ax.set_ylabel( + f"Number of errors; the lower the better", fontsize=fig_fontsize + ) + elif name == "document_merging": + ax.set_ylabel( + f"Score (out of 10); the higher the better", fontsize=fig_fontsize + ) + else: + ax.set_ylabel( + f"#incorrect elements; the lower the better", fontsize=fig_fontsize + ) + + if name == "sorting" or name == "set_intersection": + ax.set_title(f"{length} elements") + + ax2 = ax.twinx() + ax2.bar(positions, total_costs, alpha=0.5, color="blue", label="Total Cost ($)") + ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize) + ax2.set_ylim(0, cost_upper) + number_of_ticks = len(ax.get_yticks()) + tick_interval = cost_upper / (number_of_ticks) + ax2_ticks = [tick_interval * i for i in range(number_of_ticks)] + + # Set custom tick positions for ax2 + ax2.set_yticks(ax2_ticks) + + if display_right_ylabel: + ax2.set_ylabel( + "Total Cost ($); the lower the better", + color="#1919ff", + fontsize=fig_fontsize, + ) + + if display_solved: + annotation_height = y_upper + annotation_offset + count = 1 + for method in methods_order: + if method not in results: + continue + solved = results[method]["solved"] + ax.text( + count, + annotation_height, + f"{solved}", + ha="center", + va="bottom", + fontsize=fig_fontsize, + ) + count += 1 + + model = model.replace(".", "").replace("-", "").lower() + if name == "keyword_counting" or name == "document_merging": + fig.savefig(f"{name}_{model}.pdf", bbox_inches="tight") + else: + fig.savefig(f"{name}_{model}_{length}.pdf", bbox_inches="tight") + + +plot_results( + "set_intersection", + get_plotting_data("set_intersection_gpt35_032", get_final_scores), + methods_order=["io", "cot", "tot", "tot2", "tog2"], + length=32, + y_upper=19, + cost_upper=2, + display_solved=True, + annotation_offset=0.5, + display_left_ylabel=True, + display_right_ylabel=True, +) + +plot_results( + "set_intersection", + get_plotting_data("set_intersection_gpt35_064", get_final_scores), + methods_order=["io", "cot", "tot", "tot2", "tog2"], + length=64, + y_upper=32, + cost_upper=5.4, + display_solved=True, + annotation_offset=0.2, + display_left_ylabel=True, + display_right_ylabel=True, +) + +plot_results( + "set_intersection", + get_plotting_data("set_intersection_gpt35_128", get_final_scores), + methods_order=["io", "cot", "tot", "tot2", "tog2"], + length=128, + y_upper=94, + cost_upper=12, + display_solved=True, + annotation_offset=-3, + display_left_ylabel=True, + display_right_ylabel=True, +) + +plot_results( + "sorting", + get_plotting_data("sorting_gpt35_032", get_final_scores), + length=32, + display_solved=False, + annotation_offset=0.5, + display_left_ylabel=True, + display_right_ylabel=True, +) + +plot_results( + "sorting", + get_plotting_data("sorting_gpt35_064", get_final_scores), + length=64, + y_upper=64, + cost_upper=5.1, + display_solved=False, + display_left_ylabel=True, + display_right_ylabel=True, +) + +plot_results( + "sorting", + get_plotting_data("sorting_gpt35_128", get_final_scores), + length=128, + y_upper=128, + cost_upper=17, + display_solved=False, + display_left_ylabel=True, + display_right_ylabel=True, +) + +plot_results( + "keyword_counting", + get_plotting_data("keyword_counting_gpt35", get_final_scores), + methods_order=["io", "cot", "tot", "tot2", "gsp4", "gsp8", "gspx"], + methods_labels=["IO", "CoT", "ToT", "ToT2", "GoT4", "GoT8", "GoTx"], + y_upper=35, + cost_upper=9, + display_solved=True, + annotation_offset=-0.3, + display_left_ylabel=True, + display_right_ylabel=True, +) + +plot_results( + "document_merging", + get_plotting_data("document_merging_gpt35_16k", get_final_scores_doc_merge), + methods_order=["io", "cot", "tot", "gsp", "gsp2"], + methods_labels=["IO", "CoT", "ToT", "GoT", "GoT2"], + y_upper=10, + cost_upper=15, + display_solved=False, + display_left_ylabel=True, + display_right_ylabel=True, +) diff --git a/docs/src/pyproject.toml b/docs/src/pyproject.toml new file mode 100644 index 0000000..ecbf97c --- /dev/null +++ b/docs/src/pyproject.toml @@ -0,0 +1,39 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "graph_of_thoughts" +version = "0.0.3" +authors = [ + { name="Maciej Besta", email="maciej.besta@inf.ethz.ch" }, + { name="Nils Blach", email="nils.blach@inf.ethz.ch" }, + { name="Ales Kubicek", email="akubicek@student.ethz.ch" }, + { name="Robert Gerstenberger", email="gerstenberger.robert@gmail.com" }, +] +description = "Python package for Graph of Thoughts that enables solving elaborate problems with Large Language Models" +readme = "README.md" +license = {file = "LICENSE"} +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", +] +dependencies = [ + "backoff>=2.2.1,<3.0.0", + "openai>=1.0.0,<2.0.0", + "matplotlib>=3.7.1,<4.0.0", + "numpy>=1.24.3,<2.0.0", + "pandas>=2.0.3,<3.0.0", + "sympy>=1.12,<2.0", + "torch>=2.0.1,<3.0.0", + "transformers>=4.31.0,<5.0.0", + "accelerate>=0.21.0,<1.0.0", + "bitsandbytes>=0.41.0,<1.0.0", + "scipy>=1.10.1,<2.0.0", +] + +[project.urls] +Homepage = "https://github.com/spcl/graph-of-thoughts" + +[project.scripts] diff --git a/docs/tree.html b/docs/tree.html new file mode 100644 index 0000000..58a812a --- /dev/null +++ b/docs/tree.html @@ -0,0 +1,180 @@ + + + + + + + + + + + + + + +
    +

    Project Structure of: spcl/graph-of-thoughts

    +
      +
    • graph-of-thoughts Data analysis, visualization, and language modeling tools.
        +
      • README.md GoT framework: Python language model for sorting and JSON graphs
      • +
      • examples Interactive examples of data analysis and visualization.
          +
        • README.md Runnable examples for Graph of Thoughts package with standalone scripts and prompt files.
        • +
        • doc_merge Efficient NDA merging with language models and redundancy handling.
            +
          • README.md Document merging methods comparison with debug logs.
          • +
          • doc_merge.py Efficient NDA merging with language model and redundancy handling.
          • +
          • plot.py Plot script, imports, sorts, plots boxplots and bars.
          • +
          • pure_documents.json Company-supplier agreement discussions, covering various aspects.
          • +
          +
        • +
        • keyword_counting Count country occurrences in text, generate dataset, and plot results
            +
          • README.md Count frequency methods for countries in text
          • +
          • dataset_gen_countries.py Generate dataset for country occurrences using language model
          • +
          • plot.py Python script for plotting boxplots, bar charts from JSON data.
          • +
          +
        • +
        • set_intersection Set intersection data tools and visualizations +
        • +
        • sorting Sorting algorithms examples, Python, plotting, utilities.
            +
          • README.md Sorting algorithm examples in Python.
          • +
          • plot.py Sorts, plots, customizes boxplots for sorting algorithms.
          • +
          • utils.py Sorting utility functions, list conversion and testing.
          • +
          +
        • +
        +
      • +
      • graph_of_thoughts Graph-of-thoughts engine with language models and operations.
          +
        • controller Language model graph processing controller.
            +
          • README.md Controller class manages graph execution with LLM, custom prompter, parser.
          • +
          • init.py Imports Controller class from package's controller module.
          • +
          • controller.py Graph processing controller for language models.
          • +
          +
        • +
        • language_models Language model library with GPT-4, GPT-3.5, and Llama-2 support +
        • +
        • operations Manage graph-of-thought operations with language models and helper classes.
            +
          • README.md Manage thought operations with language models and helper classes.
          • +
          • init.py Imports classes for operations in graph-of-thoughts.
          • +
          • graph_of_operations.py Graph of Operations: Manages operation execution plans
          • +
          • operations.py Graph of Thoughts operations preservation.
          • +
          • thought.py Thought class: LLM operation, state, score.
          • +
          +
        • +
        • parser Python language model response parsing abstract class library.
            +
          • init.py Import Parser class from "parser" module for easier usage.
          • +
          • parser.py Abstract class with 3 parsing methods for language model responses using thought states and texts.
          • +
          +
        • +
        • prompter Generate prompt models for language.
            +
          • init.py Imports Prompter class from "prompter" module.
          • +
          • prompter.py Generate prompt language model.
          • +
          +
        • +
        +
      • +
      • paper ArXiv preprint data visualization scripts and plots.
          +
        • README.md Unpack, execute plots.py for arXiv preprint data visualization
        • +
        • plots.py Generate customized boxplots with Python and JSON data
        • +
        +
      • +
      • pyproject.toml Python package settings with Hatchling and entry point.
      • +
      +
    • +
    +
    + + + + \ No newline at end of file diff --git a/examples/doc_merge/.fdignore b/examples/doc_merge/.fdignore new file mode 100644 index 0000000..ee0d50f --- /dev/null +++ b/examples/doc_merge/.fdignore @@ -0,0 +1 @@ +example_prompts_document_merging.md \ No newline at end of file diff --git a/examples/keyword_counting/.fdignore b/examples/keyword_counting/.fdignore new file mode 100644 index 0000000..ca527e5 --- /dev/null +++ b/examples/keyword_counting/.fdignore @@ -0,0 +1,2 @@ +example_prompts_keyword_counting.md +keyword_counting.py \ No newline at end of file diff --git a/examples/set_intersection/.fdignore b/examples/set_intersection/.fdignore new file mode 100644 index 0000000..d9a6c09 --- /dev/null +++ b/examples/set_intersection/.fdignore @@ -0,0 +1,2 @@ +example_prompts_set_intersection_032.md +set* \ No newline at end of file diff --git a/examples/sorting/.fdignore b/examples/sorting/.fdignore new file mode 100644 index 0000000..15770fc --- /dev/null +++ b/examples/sorting/.fdignore @@ -0,0 +1,2 @@ +sorting* +example_prompts* \ No newline at end of file From 1f906023eb94709436126d668dc54d8d11f4cd0e Mon Sep 17 00:00:00 2001 From: GitJournal Date: Thu, 11 Jan 2024 03:13:39 +0800 Subject: [PATCH 02/25] update --- docs/metadata.json | 18 +++++----- docs/tree.html | 84 +++++++++++++++++++++++----------------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/docs/metadata.json b/docs/metadata.json index 834b307..25f2140 100644 --- a/docs/metadata.json +++ b/docs/metadata.json @@ -7,7 +7,7 @@ "0": { "filepath": "/README.md", "entry_id": 0, - "language_id": "markdown" + "language_id": "plain-text" }, "1": { "filepath": "/pyproject.toml", @@ -17,12 +17,12 @@ "2": { "filepath": "/examples/README.md", "entry_id": 20, - "language_id": "markdown" + "language_id": "plain-text" }, "3": { "filepath": "/examples/doc_merge/README.md", "entry_id": 24, - "language_id": "markdown" + "language_id": "plain-text" }, "4": { "filepath": "/examples/doc_merge/doc_merge.py", @@ -42,7 +42,7 @@ "7": { "filepath": "/examples/keyword_counting/README.md", "entry_id": 244, - "language_id": "plain-text" + "language_id": "markdown" }, "8": { "filepath": "/examples/keyword_counting/dataset_gen_countries.py", @@ -57,7 +57,7 @@ "10": { "filepath": "/examples/set_intersection/README.md", "entry_id": 294, - "language_id": "markdown" + "language_id": "plain-text" }, "11": { "filepath": "/examples/set_intersection/dataset_gen_intersection.py", @@ -77,7 +77,7 @@ "14": { "filepath": "/examples/sorting/README.md", "entry_id": 332, - "language_id": "markdown" + "language_id": "plain-text" }, "15": { "filepath": "/examples/sorting/plot.py", @@ -92,7 +92,7 @@ "17": { "filepath": "/graph_of_thoughts/controller/README.md", "entry_id": 360, - "language_id": "markdown" + "language_id": "plain-text" }, "18": { "filepath": "/graph_of_thoughts/controller/__init__.py", @@ -107,7 +107,7 @@ "20": { "filepath": "/graph_of_thoughts/language_models/README.md", "entry_id": 384, - "language_id": "markdown" + "language_id": "plain-text" }, "21": { "filepath": "/graph_of_thoughts/language_models/__init__.py", @@ -137,7 +137,7 @@ "26": { "filepath": "/graph_of_thoughts/operations/README.md", "entry_id": 450, - "language_id": "markdown" + "language_id": "plain-text" }, "27": { "filepath": "/graph_of_thoughts/operations/__init__.py", diff --git a/docs/tree.html b/docs/tree.html index 58a812a..bb2f112 100644 --- a/docs/tree.html +++ b/docs/tree.html @@ -74,79 +74,79 @@

    Project Structure of: spcl/graph-of-thoughts

    • graph-of-thoughts Data analysis, visualization, and language modeling tools.
        -
      • README.md GoT framework: Python language model for sorting and JSON graphs
      • +
      • README.md GoT framework: Python language model for sorting and JSON graphs
      • examples Interactive examples of data analysis and visualization.
          -
        • README.md Runnable examples for Graph of Thoughts package with standalone scripts and prompt files.
        • -
        • doc_merge Efficient NDA merging with language models and redundancy handling.
            -
          • README.md Document merging methods comparison with debug logs.
          • -
          • doc_merge.py Efficient NDA merging with language model and redundancy handling.
          • -
          • plot.py Plot script, imports, sorts, plots boxplots and bars.
          • -
          • pure_documents.json Company-supplier agreement discussions, covering various aspects.
          • +
          • README.md Runnable examples for Graph of Thoughts package with standalone scripts and prompt files.
          • +
          • doc_merge Efficient NDA merging with language models and redundancy handling.
              +
            • README.md Document merging methods comparison with debug logs.
            • +
            • doc_merge.py Efficient NDA merging with language model and redundancy handling.
            • +
            • plot.py Plot script, imports, sorts, plots boxplots and bars.
            • +
            • pure_documents.json Company-supplier agreement discussions, covering various aspects.
          • -
          • keyword_counting Count country occurrences in text, generate dataset, and plot results
              -
            • README.md Count frequency methods for countries in text
            • -
            • dataset_gen_countries.py Generate dataset for country occurrences using language model
            • -
            • plot.py Python script for plotting boxplots, bar charts from JSON data.
            • +
            • keyword_counting Count country occurrences in text, generate dataset, and plot results
                +
              • README.md Count frequency methods for countries in text
              • +
              • dataset_gen_countries.py Generate dataset for country occurrences using language model
              • +
              • plot.py Python script for plotting boxplots, bar charts from JSON data.
            • -
            • set_intersection Set intersection data tools and visualizations
                -
              • README.md Set intersection data generator and visualizer.
              • -
              • dataset_gen_intersection.py Shuffles, generates sets, calculates intersections.
              • -
              • plot.py Boxplot generator for AI method results
              • -
              • utils.py Set intersection utility functions and test case.
              • +
              • set_intersection Set intersection data tools and visualizations
              • sorting Sorting algorithms examples, Python, plotting, utilities.
                  -
                • README.md Sorting algorithm examples in Python.
                • -
                • plot.py Sorts, plots, customizes boxplots for sorting algorithms.
                • -
                • utils.py Sorting utility functions, list conversion and testing.
                • +
                • README.md Sorting algorithm examples in Python.
                • +
                • plot.py Sorts, plots, customizes boxplots for sorting algorithms.
                • +
                • utils.py Sorting utility functions, list conversion and testing.
            • -
            • graph_of_thoughts Graph-of-thoughts engine with language models and operations.
                +
              • graph_of_thoughts Graph-of-thoughts engine with language models and operations.
                • controller Language model graph processing controller.
                    -
                  • README.md Controller class manages graph execution with LLM, custom prompter, parser.
                  • -
                  • init.py Imports Controller class from package's controller module.
                  • -
                  • controller.py Graph processing controller for language models.
                  • +
                  • README.md Controller class manages graph execution with LLM, custom prompter, parser.
                  • +
                  • __init__.py Imports Controller class from package's controller module.
                  • +
                  • controller.py Graph processing controller for language models.
                • -
                • language_models Language model library with GPT-4, GPT-3.5, and Llama-2 support
                    -
                  • README.md Language Models module for GPT-4, GPT-3.5, and Llama-2
                  • -
                  • init.py Imports language model classes from submodules
                  • -
                  • abstract_language_model.py Abstract Language Model Class Defined
                  • -
                  • chatgpt.py ChatGPT class for OpenAI's chat API, multi-response support.
                  • -
                  • config_template.json Language model configuration template
                  • -
                  • llamachat_hf.py LLaMA 2 model for text generation init.
                  • +
                  • language_models Language model library with GPT-4, GPT-3.5, and Llama-2 support
                  • operations Manage graph-of-thought operations with language models and helper classes.
                      -
                    • README.md Manage thought operations with language models and helper classes.
                    • -
                    • init.py Imports classes for operations in graph-of-thoughts.
                    • -
                    • graph_of_operations.py Graph of Operations: Manages operation execution plans
                    • -
                    • operations.py Graph of Thoughts operations preservation.
                    • -
                    • thought.py Thought class: LLM operation, state, score.
                    • +
                    • README.md Manage thought operations with language models and helper classes.
                    • +
                    • __init__.py Imports classes for operations in graph-of-thoughts.
                    • +
                    • graph_of_operations.py Graph of Operations: Manages operation execution plans
                    • +
                    • operations.py Graph of Thoughts operations preservation.
                    • +
                    • thought.py Thought class: LLM operation, state, score.
                  • parser Python language model response parsing abstract class library.
                      -
                    • init.py Import Parser class from "parser" module for easier usage.
                    • -
                    • parser.py Abstract class with 3 parsing methods for language model responses using thought states and texts.
                    • +
                    • __init__.py Import Parser class from "parser" module for easier usage.
                    • +
                    • parser.py Abstract class with 3 parsing methods for language model responses using thought states and texts.
                  • prompter Generate prompt models for language.
                      -
                    • init.py Imports Prompter class from "prompter" module.
                    • -
                    • prompter.py Generate prompt language model.
                    • +
                    • __init__.py Imports Prompter class from "prompter" module.
                    • +
                    • prompter.py Generate prompt language model.
                • paper ArXiv preprint data visualization scripts and plots.
                    -
                  • README.md Unpack, execute plots.py for arXiv preprint data visualization
                  • -
                  • plots.py Generate customized boxplots with Python and JSON data
                  • +
                  • README.md Unpack, execute plots.py for arXiv preprint data visualization
                  • +
                  • plots.py Generate customized boxplots with Python and JSON data
                • -
                • pyproject.toml Python package settings with Hatchling and entry point.
                • +
                • pyproject.toml Python package settings with Hatchling and entry point.
              From 3e8bebb735936398104f99c7cfb07feefa72766f Mon Sep 17 00:00:00 2001 From: GitJournal Date: Thu, 11 Jan 2024 03:31:34 +0800 Subject: [PATCH 03/25] update --- docs/codeview.html | 10 ++++++++++ docs/metadata.json | 14 +++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/docs/codeview.html b/docs/codeview.html index 7f30366..08ac6f1 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -432,7 +432,17 @@ var xhr = new XMLHttpRequest(); xhr.open('GET', code_path, false); // The third parameter is set to false for synchronous request xhr.send(null); + if (xhr.status == 200){ + code_elem.textContent = xhr.responseText;}else{ + + var xhr = new XMLHttpRequest(); + // TODO: mitigate this evil hack by passing more info of the original project. + var newLink = "https://raw.githubusercontent.com/James4Ever0/LLM_Tree_Search/main/docs/"+code_path + xhr.open('GET',newLink , false); // The third parameter is set to false for synchronous request + xhr.send(null); code_elem.textContent = xhr.responseText; + } + pre_elem.appendChild(code_elem); // pre_elem.setAttribute("data-src", code_path); section_elem.appendChild(pre_elem) diff --git a/docs/metadata.json b/docs/metadata.json index 25f2140..44eba53 100644 --- a/docs/metadata.json +++ b/docs/metadata.json @@ -17,7 +17,7 @@ "2": { "filepath": "/examples/README.md", "entry_id": 20, - "language_id": "plain-text" + "language_id": "markdown" }, "3": { "filepath": "/examples/doc_merge/README.md", @@ -42,7 +42,7 @@ "7": { "filepath": "/examples/keyword_counting/README.md", "entry_id": 244, - "language_id": "markdown" + "language_id": "plain-text" }, "8": { "filepath": "/examples/keyword_counting/dataset_gen_countries.py", @@ -77,7 +77,7 @@ "14": { "filepath": "/examples/sorting/README.md", "entry_id": 332, - "language_id": "plain-text" + "language_id": "markdown" }, "15": { "filepath": "/examples/sorting/plot.py", @@ -92,7 +92,7 @@ "17": { "filepath": "/graph_of_thoughts/controller/README.md", "entry_id": 360, - "language_id": "plain-text" + "language_id": "markdown" }, "18": { "filepath": "/graph_of_thoughts/controller/__init__.py", @@ -107,7 +107,7 @@ "20": { "filepath": "/graph_of_thoughts/language_models/README.md", "entry_id": 384, - "language_id": "plain-text" + "language_id": "markdown" }, "21": { "filepath": "/graph_of_thoughts/language_models/__init__.py", @@ -137,7 +137,7 @@ "26": { "filepath": "/graph_of_thoughts/operations/README.md", "entry_id": 450, - "language_id": "plain-text" + "language_id": "markdown" }, "27": { "filepath": "/graph_of_thoughts/operations/__init__.py", @@ -182,7 +182,7 @@ "35": { "filepath": "/paper/README.md", "entry_id": 576, - "language_id": "markdown" + "language_id": "plain-text" }, "36": { "filepath": "/paper/plots.py", From a4091416237351eb062995cf178302ec2580400a Mon Sep 17 00:00:00 2001 From: GitJournal Date: Thu, 11 Jan 2024 03:34:09 +0800 Subject: [PATCH 04/25] update --- docs/codeview.html | 41 +++++++++++++++++++++++++++++------------ docs/metadata.json | 16 ++++++++-------- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/docs/codeview.html b/docs/codeview.html index 08ac6f1..10bbd48 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -432,15 +432,30 @@ var xhr = new XMLHttpRequest(); xhr.open('GET', code_path, false); // The third parameter is set to false for synchronous request xhr.send(null); - if (xhr.status == 200){ - code_elem.textContent = xhr.responseText;}else{ + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { - var xhr = new XMLHttpRequest(); - // TODO: mitigate this evil hack by passing more info of the original project. - var newLink = "https://raw.githubusercontent.com/James4Ever0/LLM_Tree_Search/main/docs/"+code_path - xhr.open('GET',newLink , false); // The third parameter is set to false for synchronous request - xhr.send(null); - code_elem.textContent = xhr.responseText; + var xhr = new XMLHttpRequest(); + // TODO: mitigate this evil hack by passing more info of the original project. + var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}/main/docs/` + code_path + xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request + xhr.send(null); + + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { + + var xhr = new XMLHttpRequest(); + // TODO: mitigate this evil hack by passing more info of the original project. + var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}/master/docs/` + code_path + xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request + xhr.send(null); + + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { code_elem.textContent = "Failed to load code." } + } } pre_elem.appendChild(code_elem); @@ -594,11 +609,13 @@

              Code Preview

              -
              -
              - -
              + + \ No newline at end of file diff --git a/docs/metadata.json b/docs/metadata.json index 44eba53..237aaa5 100644 --- a/docs/metadata.json +++ b/docs/metadata.json @@ -17,7 +17,7 @@ "2": { "filepath": "/examples/README.md", "entry_id": 20, - "language_id": "markdown" + "language_id": "plain-text" }, "3": { "filepath": "/examples/doc_merge/README.md", @@ -42,7 +42,7 @@ "7": { "filepath": "/examples/keyword_counting/README.md", "entry_id": 244, - "language_id": "plain-text" + "language_id": "markdown" }, "8": { "filepath": "/examples/keyword_counting/dataset_gen_countries.py", @@ -57,7 +57,7 @@ "10": { "filepath": "/examples/set_intersection/README.md", "entry_id": 294, - "language_id": "plain-text" + "language_id": "markdown" }, "11": { "filepath": "/examples/set_intersection/dataset_gen_intersection.py", @@ -77,7 +77,7 @@ "14": { "filepath": "/examples/sorting/README.md", "entry_id": 332, - "language_id": "markdown" + "language_id": "plain-text" }, "15": { "filepath": "/examples/sorting/plot.py", @@ -92,7 +92,7 @@ "17": { "filepath": "/graph_of_thoughts/controller/README.md", "entry_id": 360, - "language_id": "markdown" + "language_id": "plain-text" }, "18": { "filepath": "/graph_of_thoughts/controller/__init__.py", @@ -107,7 +107,7 @@ "20": { "filepath": "/graph_of_thoughts/language_models/README.md", "entry_id": 384, - "language_id": "markdown" + "language_id": "plain-text" }, "21": { "filepath": "/graph_of_thoughts/language_models/__init__.py", @@ -137,7 +137,7 @@ "26": { "filepath": "/graph_of_thoughts/operations/README.md", "entry_id": 450, - "language_id": "markdown" + "language_id": "plain-text" }, "27": { "filepath": "/graph_of_thoughts/operations/__init__.py", @@ -182,7 +182,7 @@ "35": { "filepath": "/paper/README.md", "entry_id": 576, - "language_id": "plain-text" + "language_id": "markdown" }, "36": { "filepath": "/paper/plots.py", From 63d295e9571ed86d74c8686adc7e3963695b019a Mon Sep 17 00:00:00 2001 From: GitJournal Date: Sat, 13 Jan 2024 18:29:53 +0800 Subject: [PATCH 05/25] update --- docs/cache_title.json | 1 + docs/codeview.html | 40 +- docs/data/0.json | 368 ++++++------ docs/data/1.json | 439 +++++++------- docs/data/2.json | 437 +++++++------- docs/data/3.json | 436 +++++++------- docs/data/4.json | 435 +++++++------- docs/data/5.json | 439 +++++++------- docs/data/6.json | 12 +- docs/data/titles/0.json | 293 ++++++++++ docs/index.html | 1177 +++++++++++++++++++++++++------------- docs/metadata.json | 162 +++--- docs/metadata_title.json | 1 + docs/tree.html | 94 +-- 14 files changed, 2525 insertions(+), 1809 deletions(-) create mode 100644 docs/cache_title.json create mode 100644 docs/data/titles/0.json create mode 100644 docs/metadata_title.json diff --git a/docs/cache_title.json b/docs/cache_title.json new file mode 100644 index 0000000..aa86069 --- /dev/null +++ b/docs/cache_title.json @@ -0,0 +1 @@ +{"_default": {"1": {"path": "/README.md", "hash": "c0f45483538b7f8a477a1d7ee847b5a5", "title": "GoT Framework: Efficient Python 3.8+ Language Model"}, "2": {"path": "/README.md:1-20", "hash": "48cc35e1a0fe33814e8d55bd06174e50", "title": "Installing Graph of Thoughts Framework"}, "3": {"path": "/README.md:21-48", "hash": "c33313be8a7d1da183d34df9d4fadc29", "title": "Graph of Thoughts LLM Installation and Example"}, "4": {"path": "/README.md:49-83", "hash": "f14cf27fb727e66c0aa07d8db63b40aa", "title": "Generate Graph of Thoughts using GoT"}, "5": {"path": "/README.md:85-116", "hash": "b62bef062974665e6d7c2e97313114a2", "title": "Code for Framework Execution and Error Counts"}, "6": {"path": "/README.md:116-133", "hash": "50393cd135f1bebd10da58d6faa8dff6", "title": "Framework Tutorial and Examples"}, "7": {"path": "/README.md:134-150", "hash": "abbb46f70138b34920e51809d094432c", "title": "Accessing and Citing Project Results"}, "8": {"path": "/examples/README.md", "hash": "3dca5b56faa278658d6a3882a6fdb22a", "title": "Graph of Thoughts Examples"}, "9": {"path": "/examples/doc_merge/README.md", "hash": "6d22d392c8c972e4bda2c2d6baf4099f", "title": "Document Merging with Multiple Methods"}, "10": {"path": "/examples/doc_merge/README.md:1-28", "hash": "d45e3daa3b348ae8550c7fdf5b5a4a9c", "title": "Document Merging Use Cases"}, "11": {"path": "/examples/doc_merge/README.md:29-38", "hash": "ce7c5bfc581627cd07c18a9a6494503d", "title": "Configure and Log Code Generator"}, "12": {"path": "/examples/doc_merge/doc_merge.py", "hash": "447dd0390bc88b69bf1cd1e028acaf50", "title": "Efficient NDA Merging with Language Model"}, "13": {"path": "/examples/doc_merge/doc_merge.py:1-31", "hash": "07b06a9758a0241e2cc2485d7294a119", "title": "Merge NDA Documents Prompter Class"}, "14": {"path": "/examples/doc_merge/doc_merge.py:32-54", "hash": "2e7164fbb45c7d8144f78cd87df11ac8", "title": "NDA Document Merging and Improvement Tool"}, "15": {"path": "/examples/doc_merge/doc_merge.py:55-71", "hash": "f3a85cace1b9d765551b3f206c8cb5b0", "title": "Redundancy-Aware NDA Merge Prompts"}, "16": {"path": "/examples/doc_merge/doc_merge.py:73-112", "hash": "85c6a197c977a50818ddee4135704bf4", "title": "NDA Merge and Summarize Tool"}, "17": {"path": "/examples/doc_merge/doc_merge.py:113-143", "hash": "81f1a48f0bde1750013de605aa4f67c8", "title": "Merged NDAs Prompt Generator"}, "18": {"path": "/examples/doc_merge/doc_merge.py:144-174", "hash": "039965ea908f5003fc2f01b885e75695", "title": "Generate Prompt Class"}, "19": {"path": "/examples/doc_merge/doc_merge.py:176-198", "hash": "10a0331be129ed69e24f59d61d1a6cca", "title": "Generate Prompt for Document Merging"}, "20": {"path": "/examples/doc_merge/doc_merge.py:199-221", "hash": "64b1d92f9418ad969ef50eae97fc8c71", "title": "Dynamic Prompt for Document Merging"}, "21": {"path": "/examples/doc_merge/doc_merge.py:222-245", "hash": "a862dcf0aecaf6600e40f32eb75bef34", "title": "Document Merge and Improvement Prompt"}, "22": {"path": "/examples/doc_merge/doc_merge.py:246-274", "hash": "f0945f49ceae8fb9f1146e2020119326", "title": "Generate Score Prompt for Language Model"}, "23": {"path": "/examples/doc_merge/doc_merge.py:275-315", "hash": "4c3b117416d8e173237b44c4317d3f9f", "title": "DocMergeParser: Parsing Doc Merge Language Model Responses"}, "24": {"path": "/examples/doc_merge/doc_merge.py:316-342", "hash": "3aa0d9fac6148994c5486de02237b5f9", "title": "Remove Tags from Text"}, "25": {"path": "/examples/doc_merge/doc_merge.py:343-369", "hash": "f358a0ad1bc198b494ae9cb01d8e1e80", "title": "Aggregation Prompt Parser"}, "26": {"path": "/examples/doc_merge/doc_merge.py:370-393", "hash": "0cb022d5e3cf8d873a077f1df750bb6c", "title": "Aggregating Thought States from Multiple Sources"}, "27": {"path": "/examples/doc_merge/doc_merge.py:394-420", "hash": "7c1a74e0ab4fda010b7c72fcd495ff08", "title": "Parse Thought States and Scores"}, "28": {"path": "/examples/doc_merge/doc_merge.py:421-441", "hash": "34963b231d89ace7a8a0a63b28459150", "title": "Redundancy Extraction with Regex"}, "29": {"path": "/examples/doc_merge/doc_merge.py:442-464", "hash": "886b145de38d526cc5acd6c7527a275c", "title": "Language Model Improve Prompt Parsing Function"}, "30": {"path": "/examples/doc_merge/doc_merge.py:465-497", "hash": "c911d4c80cab99bd02c1f84b761d613b", "title": "Functions for Thought State Management and IO Operations"}, "31": {"path": "/examples/doc_merge/doc_merge.py:500-533", "hash": "c2dc3a9282efd67ca0d0fbbb5bccc87e", "title": "CoT and ToT Operation Graphs"}, "32": {"path": "/examples/doc_merge/doc_merge.py:534-561", "hash": "327e0cd161317159ed1589b628d3e5f1", "title": "Document Merge Graph Generation Code"}, "33": {"path": "/examples/doc_merge/doc_merge.py:562-593", "hash": "814abff74894c109269a6fb66e9a2d87", "title": "Graph Operations Code for GoT2 Merge"}, "34": {"path": "/examples/doc_merge/doc_merge.py:594-619", "hash": "b47867d5588f7933a5aaed6a162a6a29", "title": "Document Merge Operations Graph Creation"}, "35": {"path": "/examples/doc_merge/doc_merge.py:620-649", "hash": "9452df9a9d0734c9f9bc4b23a3057840", "title": "Operations Graph for Language Model Inference"}, "36": {"path": "/examples/doc_merge/doc_merge.py:650-679", "hash": "7605ef6f25a51dc1d1f579e1fff7a885", "title": "Budgeted Language Model Folder Creation"}, "37": {"path": "/examples/doc_merge/doc_merge.py:680-712", "hash": "cb8d10460ccfd0495d963fefff34b808", "title": "Budget-Controlled Data Merging"}, "38": {"path": "/examples/doc_merge/doc_merge.py:713-741", "hash": "a518c37a0499b8127c8a27e287f26e86", "title": "Initialize Language Model and Run Executor"}, "39": {"path": "/examples/doc_merge/doc_merge.py:742-767", "hash": "61f446ba9a41281212356d09ffcfc360", "title": "Combine, Evaluate, and Score NDAs with LLM"}, "40": {"path": "/examples/doc_merge/plot.py", "hash": "e77e13e691133b13a914c80bb6c889d9", "title": "DocMerge Plotting Functionality"}, "41": {"path": "/examples/doc_merge/plot.py:1-29", "hash": "d03a12ad8efb164423972fefb2a5ee80", "title": "JSON Data Merging and Sorting"}, "42": {"path": "/examples/doc_merge/plot.py:30-59", "hash": "b66f53102e65555a9050b6fc51874240", "title": "Sorting Final Scores"}, "43": {"path": "/examples/doc_merge/plot.py:60-96", "hash": "c2edd7ffd727dcd29e11077a4163bb84", "title": "Plotting Results from Method Data"}, "44": {"path": "/examples/doc_merge/plot.py:97-132", "hash": "932839fa33d6cf5db52a854a3cc7652a", "title": "Boxplot and Bar Plot Creation with Customized Axes"}, "45": {"path": "/examples/doc_merge/plot.py:133-168", "hash": "8d60946aa763035d6c328d3af7021fe6", "title": "Custom Tick Positions and Labels for Y-Axis Plotting"}, "46": {"path": "/examples/doc_merge/plot.py:169-170", "hash": "d5422c023b16aea73c0e6e1e95f16284", "title": "Initializing DocMerge with Cost Upper Limit"}, "47": {"path": "/examples/doc_merge/pure_documents.json", "hash": "2b7a391f62cc98289ef451b62d10b449", "title": "Company-Supplier Agreement Aspects"}, "48": {"path": "/examples/doc_merge/pure_documents.json:1-3", "hash": "f5f71ef2d705f4106d741b93942532a6", "title": "JSON NDA Examples: Confidential Information Sharing Agreements"}, "49": {"path": "/examples/doc_merge/pure_documents.json:3-3", "hash": "6698107ddbd62c55e7cba3d82b931b73", "title": "Confidentiality Agreement Outline"}, "50": {"path": "/examples/doc_merge/pure_documents.json:3-4", "hash": "a6e683b199d42f96dffa97001271afd7", "title": "Non-Disclosure Pact: AquaBlue & PineTree"}, "51": {"path": "/examples/doc_merge/pure_documents.json:4-5", "hash": "be4fd50a7bc6aad17f2005c5a3320f97", "title": "AquaBlue-PineTree NDA Terms"}, "52": {"path": "/examples/doc_merge/pure_documents.json:5-6", "hash": "b0a2e6f78a0cee85ca3b68555213f19f", "title": "NDA & Non-Compete Agreement: AquaBlue-PineTree"}, "53": {"path": "/examples/doc_merge/pure_documents.json:6-6", "hash": "629cda25ac6a0692457e5a44eb60067e", "title": "Non-Disclosure & Non-Compete Agreement"}, "54": {"path": "/examples/doc_merge/pure_documents.json:7-8", "hash": "355e30a407d9048047bd41f1b5c631fd", "title": "Training Loyalty Agreement"}, "55": {"path": "/examples/doc_merge/pure_documents.json:8-8", "hash": "6228ccca3b3ce1bbe9e151586620270b", "title": "Employment Agreement Terms and Conditions"}, "56": {"path": "/examples/doc_merge/pure_documents.json:8-10", "hash": "8872bdb94a76c1b547889a83d89baefa", "title": "Loyalty Agreement Template"}, "57": {"path": "/examples/doc_merge/pure_documents.json:10-10", "hash": "d0cff2c036acd47ad3b2a792d305655f", "title": "Non-Disclosure Contract"}, "58": {"path": "/examples/doc_merge/pure_documents.json:10-11", "hash": "d7004468554739b01aa6a27818d25ee3", "title": "B2B Contractor Loyalty Agreement"}, "59": {"path": "/examples/doc_merge/pure_documents.json:11-11", "hash": "96695154f013e22097fd0ac55a20aaa1", "title": "Comprehensive Contract Template"}, "60": {"path": "/examples/doc_merge/pure_documents.json:11-12", "hash": "b0f310d29a0e1e5180c37f2dd7f8d538", "title": "Non-Disclosure Agreement between Company and Supplier"}, "61": {"path": "/examples/doc_merge/pure_documents.json:12-13", "hash": "5f7ef3aed383dd8d0817ed29ad17341f", "title": "Non-Disclosure Non-Compete Agreement"}, "62": {"path": "/examples/doc_merge/pure_documents.json:13-14", "hash": "09930153749b081df922b921d9341b0e", "title": "Supplier Contract Template"}, "63": {"path": "/examples/doc_merge/pure_documents.json:14-14", "hash": "8484f226fffb1152788032aa973e2278", "title": "Data Analyst Employment Agreement Outline"}, "64": {"path": "/examples/doc_merge/pure_documents.json:14-15", "hash": "eefd35265169ac0f3b01bccd9f78af55", "title": "Code of Employment"}, "65": {"path": "/examples/doc_merge/pure_documents.json:15-15", "hash": "60baeed7b29c78ff2a48c0abf01d1839", "title": "Data Analysis Contract Snippet"}, "66": {"path": "/examples/doc_merge/pure_documents.json:15-16", "hash": "29f9b6fc5a6e8b213cfc4e5aa639e95a", "title": "Researcher-University NDA"}, "67": {"path": "/examples/doc_merge/pure_documents.json:16-17", "hash": "7df1a7101fbbe2af1fc300a4ec39f32b", "title": "Business-University Cooperation Agreement"}, "68": {"path": "/examples/doc_merge/pure_documents.json:17-17", "hash": "d0d880ecbb35285358591caebec4e177", "title": "Research Collaboration Agreement"}, "69": {"path": "/examples/doc_merge/pure_documents.json:17-18", "hash": "1e353ec78e1b04bd7157c21addb63686", "title": "University Lab Supply Agreement Sample"}, "70": {"path": "/examples/doc_merge/pure_documents.json:18-18", "hash": "46c46ea1daa1a268aa152500a115a7eb", "title": "Supplier-University Equipment Agreement"}, "71": {"path": "/examples/doc_merge/pure_documents.json:18-19", "hash": "ef2529715d147da125ca96f456e6548a", "title": "Laboratory Supply Agreement Template"}, "72": {"path": "/examples/doc_merge/pure_documents.json:19-20", "hash": "0f4879a63ae4c653365711b6512b1b6c", "title": "Freelance Agreement Template"}, "73": {"path": "/examples/doc_merge/pure_documents.json:20-21", "hash": "67eb4764e09c3e336facc63a2985421d", "title": "Freelance Contract Terms"}, "74": {"path": "/examples/doc_merge/pure_documents.json:21-21", "hash": "e64246dfeada01ab3418f8ea638744c2", "title": "Freelancer Legal Agreement Template"}, "75": {"path": "/examples/doc_merge/pure_documents.json:21-22", "hash": "637773cdc37b1abeae911ccec70d05f7", "title": "Joint Research Agreement Outline"}, "76": {"path": "/examples/doc_merge/pure_documents.json:22-23", "hash": "035981cece4860fdef9e3a6594bb7452", "title": "Business Agreement Template"}, "77": {"path": "/examples/doc_merge/pure_documents.json:23-24", "hash": "88625a4bb181df525642d660c95c3cde", "title": "Business Agreement Template: Confidentiality & Termination"}, "78": {"path": "/examples/doc_merge/pure_documents.json:24-24", "hash": "3eb6a59644504b24369dd3e0a230e078", "title": "Non-Disclosure Agreement Clause"}, "79": {"path": "/examples/doc_merge/pure_documents.json:24-25", "hash": "f17fe125855ef42b00e7652870b017e9", "title": "Comprehensive Confidentiality and Loyalty Agreement"}, "80": {"path": "/examples/doc_merge/pure_documents.json:25-26", "hash": "8cb207a8359e179cf24c9ee992701f0f", "title": "Non-Compete and Loyalty Agreement: Terms and Consequences"}, "81": {"path": "/examples/doc_merge/pure_documents.json:26-26", "hash": "495634a0af425eab47dda5b54ac1c79a", "title": "Non-Compete Loyalty Agreement"}, "82": {"path": "/examples/doc_merge/pure_documents.json:26-27", "hash": "feed894b15625c22bd7d2659fa6a6b74", "title": "Non-Compete Amendment: Duration Update"}, "83": {"path": "/examples/doc_merge/pure_documents.json:27-28", "hash": "21bb6c73a643039e21ea87e1fe05f1dd", "title": "Late Fee Adjustment Amendment"}, "84": {"path": "/examples/doc_merge/pure_documents.json:28-29", "hash": "4664ed4b63d1d8ca74af207dde638c17", "title": "Late Fee Amendment Contract"}, "85": {"path": "/examples/doc_merge/pure_documents.json:29-29", "hash": "089accf2464c7076970d616b0e5e7757", "title": "Code Amendment for IT Maintenance"}, "86": {"path": "/examples/doc_merge/pure_documents.json:29-30", "hash": "86917164f07da29e1e05f5c02dabc812", "title": "Legal Amendment for Software Development"}, "87": {"path": "/examples/doc_merge/pure_documents.json:30-31", "hash": "86424204c41c82e6c5a4974740bcb27d", "title": "Contract Extension: New Delivery Dates"}, "88": {"path": "/examples/doc_merge/pure_documents.json:31-31", "hash": "ef777d0270f43cbc4412a4c3812a81d7", "title": "Delayed Contract Terms and Consequences"}, "89": {"path": "/examples/doc_merge/pure_documents.json:31-32", "hash": "aabdebd497b44af46b5ebce40d668c42", "title": "Contract Appendices Extraction"}, "90": {"path": "/examples/doc_merge/pure_documents.json:32-33", "hash": "ff054534047a6cd26f97c415e7428b43", "title": "Confidentiality Addendum"}, "91": {"path": "/examples/doc_merge/pure_documents.json:33-33", "hash": "f6b50c6e3e2d5f0e67850677fe145ba9", "title": "Amended NDA Time Restriction Extension"}, "92": {"path": "/examples/doc_merge/pure_documents.json:33-34", "hash": "9568513ead565cd0efd3503d5d21178a", "title": "Business Agreement Amendment: Conflict Resolution and Execution"}, "93": {"path": "/examples/doc_merge/pure_documents.json:34-35", "hash": "b91b0c5f9dc99be183ea776a5dcb448a", "title": "Business Cooperation Agreement with Confidentiality Extension"}, "94": {"path": "/examples/doc_merge/pure_documents.json:35-36", "hash": "1b584c0d4f9a9a00c1bb6171ce7e1d73", "title": "Confidentiality Period Extension"}, "95": {"path": "/examples/doc_merge/pure_documents.json:36-37", "hash": "3ffe3a523becab413e8bc49dfa0be1de", "title": "Legal Document Template: Commitment, Consequences, Governing Law"}, "96": {"path": "/examples/doc_merge/pure_documents.json:37-37", "hash": "8c3be3b589b892ba0cc0962de38ea3dc", "title": "Confidentiality Agreement Clause"}, "97": {"path": "/examples/doc_merge/pure_documents.json:37-38", "hash": "d6ac6f55f27a7ec9e2f1437178befe89", "title": "Confidentiality Agreement Outline"}, "98": {"path": "/examples/doc_merge/pure_documents.json:38-39", "hash": "734b4871343d5227f0e263784c8d9902", "title": "Confidentiality Agreement for Tech Company and Contractor"}, "99": {"path": "/examples/doc_merge/pure_documents.json:39-40", "hash": "5b15b13fcebeab95b6389c71dcbd6364", "title": "Confidentiality Terms and Remedies"}, "100": {"path": "/examples/doc_merge/pure_documents.json:40-41", "hash": "126a9b4db347c8bd69b667541fbdc223", "title": "Confidentiality Agreement Summary"}, "101": {"path": "/examples/doc_merge/pure_documents.json:41-42", "hash": "f75109080c706ae4dc7e46b762b2ad18", "title": "Legal Agreement Clause"}, "102": {"path": "/examples/doc_merge/pure_documents.json:42-43", "hash": "c977b190abf0bf04f7411b086d0fe708", "title": "Termination and Obligations"}, "103": {"path": "/examples/doc_merge/pure_documents.json:43-44", "hash": "86f5fae661456d77b57026e2d3e35fd9", "title": "Termination Obligations: Party A & B"}, "104": {"path": "/examples/doc_merge/pure_documents.json:44-45", "hash": "9524f5179c00ee87106d120804ef4e72", "title": "Comprehensive NDA Clauses"}, "105": {"path": "/examples/doc_merge/pure_documents.json:45-46", "hash": "208b5ff207f92308369913c75b891f6a", "title": "Confidentiality Agreement Template"}, "106": {"path": "/examples/doc_merge/pure_documents.json:46-46", "hash": "af83b879d400ccdf9059aeda353eba44", "title": "Service Agreement Template: Structure and Clauses"}, "107": {"path": "/examples/doc_merge/pure_documents.json:47-48", "hash": "81c3375ae51e91f0efdd427577290ec9", "title": "Legal Document Collection"}, "108": {"path": "/examples/doc_merge/pure_documents.json:48-48", "hash": "8b27385e5d5824c74b06108fcd1329d6", "title": "Collaboration and Loyalty Agreement"}, "109": {"path": "/examples/doc_merge/pure_documents.json:48-49", "hash": "bb0b4991941b1a85161f5627852b92a2", "title": "Business Consulting Contract Terms"}, "110": {"path": "/examples/doc_merge/pure_documents.json:49-49", "hash": "9372088ecd5eb9c5f0072c75127d95d9", "title": "Consulting Contract Terms"}, "111": {"path": "/examples/doc_merge/pure_documents.json:49-50", "hash": "fd86a9c58e0bbe9763bd47a4f00effa3", "title": "Confidentiality and Non-Compete Agreement"}, "112": {"path": "/examples/doc_merge/pure_documents.json:50-50", "hash": "56bd5de50bc7bee323983a40ae0b7149", "title": "Confidentiality Fee and Penalty Structure"}, "113": {"path": "/examples/doc_merge/pure_documents.json:50-51", "hash": "344e796fc101ad28f40e9eeef66ef1df", "title": "Confidentiality Breach Penalties Appendix"}, "114": {"path": "/examples/doc_merge/pure_documents.json:51-52", "hash": "be503130af310302bace91a4e6e2a035", "title": "Breach Notification and Termination Clause"}, "115": {"path": "/examples/keyword_counting/README.md", "hash": "39cd6c21ba95a74ed10979e0912ba3c2", "title": "Multi-Approach Country Frequency Computation"}, "116": {"path": "/examples/keyword_counting/README.md:1-26", "hash": "1137a42977ddd62b2d2caa0c45e61cff", "title": "Country Frequency Analysis Algorithms"}, "117": {"path": "/examples/keyword_counting/README.md:27-45", "hash": "04f6aa22c67f13fa038ff16f1542d3a3", "title": "Code for Sample Processing and Plotting"}, "118": {"path": "/examples/keyword_counting/dataset_gen_countries.py", "hash": "2331a403cc5dfbca7d3c6181d6e27762", "title": "Country-Based Language Model Dataset Generation"}, "119": {"path": "/examples/keyword_counting/dataset_gen_countries.py:1-43", "hash": "2abc8ef0578d7eafe9bec6bc9b956ffd", "title": "Country Indexing Function"}, "120": {"path": "/examples/keyword_counting/dataset_gen_countries.py:44-112", "hash": "969f9d600b0346c884efa48893f2232c", "title": "Country-Adjective Dataset for Keyword Counting"}, "121": {"path": "/examples/keyword_counting/dataset_gen_countries.py:113-176", "hash": "28746e55021a494c53bbf4dff3c3cfa4", "title": "Country Data Organization in Code"}, "122": {"path": "/examples/keyword_counting/dataset_gen_countries.py:177-243", "hash": "8061ea1bde37cbeeaca7ce15def663f8", "title": "Alphabetical Country List"}, "123": {"path": "/examples/keyword_counting/dataset_gen_countries.py:244-306", "hash": "6f4c354782b96308b69fbd6319aab88f", "title": "Country Adjective Dataset Generator"}, "124": {"path": "/examples/keyword_counting/dataset_gen_countries.py:307-370", "hash": "96602f2a58153759c4c8966b5eb25354", "title": "Country-Adjective Listing Code"}, "125": {"path": "/examples/keyword_counting/dataset_gen_countries.py:371-433", "hash": "3a0c39aff2dc6449214fd050c8f395f8", "title": "Country Adjectives for Keyword Counting"}, "126": {"path": "/examples/keyword_counting/dataset_gen_countries.py:434-460", "hash": "fe8200307756f42340e04b18145e8f62", "title": "Generate Coherent Text with Country Constraints"}, "127": {"path": "/examples/keyword_counting/dataset_gen_countries.py:460-462", "hash": "dcb40891e94e7bd6fcc846fc060ac5ed", "title": "Country Name List in Data Handling"}, "128": {"path": "/examples/keyword_counting/dataset_gen_countries.py:462-462", "hash": "b785c34028027b41f6b34fbecbe648f5", "title": "Trade and Cultural Exchanges: A Conversation"}, "129": {"path": "/examples/keyword_counting/dataset_gen_countries.py:462-471", "hash": "a455f46872b59ce549c966b920e9643b", "title": "Country Sample Dataset Generator"}, "130": {"path": "/examples/keyword_counting/dataset_gen_countries.py:473-499", "hash": "3d7ca8aa7ddab3a8749330b2267460c7", "title": "Country Name Generator for Keyword Counting"}, "131": {"path": "/examples/keyword_counting/dataset_gen_countries.py:500-532", "hash": "efba65101b8fc04cb8a26eecb353f153", "title": "Invalid Adjective Counter"}, "132": {"path": "/examples/keyword_counting/dataset_gen_countries.py:533-535", "hash": "d17c9d2b9e6b13a6de796ae649a0e076", "title": "Writing Results to CSV File"}, "133": {"path": "/examples/keyword_counting/plot.py", "hash": "d82823a44e5662ed352af67e408a1d3c", "title": "Customizable JSON Boxplots & Bar Charts"}, "134": {"path": "/examples/keyword_counting/plot.py:1-29", "hash": "a07f0549f9cbf395ddb2632abf06b2d3", "title": "Collect JSON Data from Directory"}, "135": {"path": "/examples/keyword_counting/plot.py:30-58", "hash": "5583d32dfa413bb150f55eb3fd18c0b6", "title": "Sorting Results by Key"}, "136": {"path": "/examples/keyword_counting/plot.py:59-93", "hash": "6b7dc8aa1c374a96605a431708575429", "title": "Plot Keyword Counting Results"}, "137": {"path": "/examples/keyword_counting/plot.py:94-122", "hash": "f52b680738ec8f31b36505e90828d9a0", "title": "Boxplot and Bar Chart of Keyword Counts with Customization"}, "138": {"path": "/examples/keyword_counting/plot.py:123-158", "hash": "f4862ef3422e2080f8b4909015bdf670", "title": "Plotting Graph with Y-Axis Limits and Annotations"}, "139": {"path": "/examples/keyword_counting/plot.py:159-167", "hash": "8b64ee71742cc98d86cdb9a8c24cf147", "title": "GPT-3.5 Plot Generator"}, "140": {"path": "/examples/set_intersection/README.md", "hash": "13f99534292d94b948414148d83f51b0", "title": "Set Intersection Data Generator"}, "141": {"path": "/examples/set_intersection/README.md:1-29", "hash": "5319becd65b7b78fb045dd76d7140177", "title": "Set Intersection Approaches and Data Generator"}, "142": {"path": "/examples/set_intersection/README.md:30-46", "hash": "dbccdbeb077eb1a401bbad56fbc1c24b", "title": "Budget-Controlled Sample Selection"}, "143": {"path": "/examples/set_intersection/README.md:48-52", "hash": "3ff0b7b040edd5842d5f369157d69d67", "title": "Modify Results Directory"}, "144": {"path": "/examples/set_intersection/dataset_gen_intersection.py", "hash": "9ea298f6793fe51e7d4e099c246d71a3", "title": "Random Set Intersection Generator"}, "145": {"path": "/examples/set_intersection/dataset_gen_intersection.py:1-39", "hash": "86aeae6deeaaba4d9e9eae9b0133ef91", "title": "Randomized Set Shuffle Dataset Generator"}, "146": {"path": "/examples/set_intersection/dataset_gen_intersection.py:40-67", "hash": "4fc947b7050fed894926b0bd7cf3a963", "title": "Random Set Intersection Generator"}, "147": {"path": "/examples/set_intersection/dataset_gen_intersection.py:69-92", "hash": "8f12c6e01aaeb385ba4c6820936aa71d", "title": "Scramble, Split, Intersect, and Save"}, "148": {"path": "/examples/set_intersection/plot.py", "hash": "ed5416303475cf80fc4a36561105fe72", "title": "AI Method Results Visualization and Analysis"}, "149": {"path": "/examples/set_intersection/plot.py:1-29", "hash": "0b123ef4288260090f04e9c2106d8e24", "title": "Traverse and Aggregate JSON Files"}, "150": {"path": "/examples/set_intersection/plot.py:30-58", "hash": "500f0d6d7fc080448f1f19483fe8b784", "title": "AI Results Organizer and Analyzer"}, "151": {"path": "/examples/set_intersection/plot.py:59-94", "hash": "f891aacd7ec2b792c3e49b347a8450a1", "title": "Method-based Score Plotter"}, "152": {"path": "/examples/set_intersection/plot.py:95-130", "hash": "1c854adc6cc7f878b2e014b66fe8a726", "title": "Customizable Boxplot for Method Results"}, "153": {"path": "/examples/set_intersection/plot.py:131-162", "hash": "b5143e4df0f1fa3de9b3232c6613ff43", "title": "Customizable Y-axis Bar Plot with Twin Axis and Conditional Annotations"}, "154": {"path": "/examples/set_intersection/plot.py:163-184", "hash": "303f7d054f2b0794a51a21292ccfd290", "title": "Text Annotations and Count Increment in Plotting"}, "155": {"path": "/examples/set_intersection/utils.py", "hash": "3cf6e693a65713c2b13513ce507820ac", "title": "Error-Counting Set Intersection Utilities"}, "156": {"path": "/examples/set_intersection/utils.py:1-36", "hash": "77c96220f3537c30d11c50b6b2bde9d6", "title": "String to List and Set Functions"}, "157": {"path": "/examples/set_intersection/utils.py:37-72", "hash": "23ebd71095a0fb875474938369a34e6b", "title": "Set Intersection Utilities"}, "158": {"path": "/examples/set_intersection/utils.py:73-99", "hash": "a8f8bb3c3271ab902a0a00788e82f6b2", "title": "Set Intersection Error Counter"}, "159": {"path": "/examples/sorting/README.md", "hash": "6963d93c8bc70688f3dbe8782c097ea5", "title": "Sorting Algorithm Examples and Visualization"}, "160": {"path": "/examples/sorting/README.md:1-31", "hash": "b84c200b16831331b7a4d4b5264ba3f2", "title": "Sorting Algorithms Examples and Implementations"}, "161": {"path": "/examples/sorting/README.md:32-46", "hash": "23262945ab86d6167d3451e7b23e979d", "title": "Organized Results by LLM and Run Details"}, "162": {"path": "/examples/sorting/plot.py", "hash": "3dbf2b531618c7a9772cd98e6c76bc31", "title": "Sorting Algorithm Performance Visualizer"}, "163": {"path": "/examples/sorting/plot.py:1-29", "hash": "f711e1050d5f0127386910738894ce56", "title": "JSON Data Collector and Organizer"}, "164": {"path": "/examples/sorting/plot.py:30-58", "hash": "3545440148d6adb644b788f6e769d82e", "title": "Sorting Results to Calculate Scores"}, "165": {"path": "/examples/sorting/plot.py:59-95", "hash": "ce903796ab6f98d8b5aab980bd7e8e49", "title": "Plotting Data for Sorting Algorithms"}, "166": {"path": "/examples/sorting/plot.py:96-131", "hash": "3d3f5ac0c93ba004991e31b8ba92a2d4", "title": "Visualizing Scores with Boxplot in Python"}, "167": {"path": "/examples/sorting/plot.py:132-163", "hash": "94139c4a2e1e4c1fa41e5f1af722a49a", "title": "Customizing ax2 Properties and Annotations"}, "168": {"path": "/examples/sorting/plot.py:164-186", "hash": "1491dd16d132473dbbdb001c3f839be6", "title": "Sorting Algorithm Performance Plotter"}, "169": {"path": "/examples/sorting/utils.py", "hash": "c97d98c9d76f8855a34ccd099cae37a3", "title": "String-to-Int List Converter and Sorter"}, "170": {"path": "/examples/sorting/utils.py:1-35", "hash": "078aed793a374326b4acee71eaaec661", "title": "String List Conversion Function"}, "171": {"path": "/examples/sorting/utils.py:36-70", "hash": "84f6df05b9eabb882d3e55278e832faf", "title": "Sorting Errors Counter"}, "172": {"path": "/examples/sorting/utils.py:71-78", "hash": "76a45a835ef0436bf48beb22da093f79", "title": "Error Count in Sorted Lists"}, "173": {"path": "/graph_of_thoughts/controller/README.md", "hash": "09dd4f1336d65d86e36ff875ec2f7246", "title": "Controller Class for Executing LLM Graphs"}, "174": {"path": "/graph_of_thoughts/controller/README.md:1-16", "hash": "93519872fc3814f7d76e5f38747fc4ae", "title": "Controller Class for GoO Execution"}, "175": {"path": "/graph_of_thoughts/controller/README.md:18-28", "hash": "c2a5c883226205070ed3dc6f6a312ee9", "title": "Controller Class Initialization"}, "176": {"path": "/graph_of_thoughts/controller/__init__.py", "hash": "18470c3ece7027fd5a483454fed3c1e8", "title": "Module Initialization - Controller Import"}, "177": {"path": "/graph_of_thoughts/controller/controller.py", "hash": "870c70fc883c73922e451f7a3c0754b5", "title": "JSON Graph Execution Flow Controller"}, "178": {"path": "/graph_of_thoughts/controller/controller.py:1-35", "hash": "28ef9a7a63f7ebf36f6ecd791212f71e", "title": "Controller Class for Graph of Operations"}, "179": {"path": "/graph_of_thoughts/controller/controller.py:37-60", "hash": "a34bc785ac005f2870b4521f157dcb29", "title": "Controller Initialization and Execution"}, "180": {"path": "/graph_of_thoughts/controller/controller.py:61-82", "hash": "7d41cc070d7de0d49ec88548b2223e37", "title": "Graph-Based State Validation and Execution"}, "181": {"path": "/graph_of_thoughts/controller/controller.py:83-106", "hash": "212aa5fb98985f3b36a941369b5b20c3", "title": "Graph Operations Executor and Serializer"}, "182": {"path": "/graph_of_thoughts/controller/controller.py:107-128", "hash": "2a52bb682862fb75a3a7b69ef242ecbc", "title": "Graph Operations Serializer"}, "183": {"path": "/graph_of_thoughts/controller/controller.py:129-152", "hash": "9ab73486e992061ef6eb527c9d6f737c", "title": "JSON-ifying Thoughts: Controller Analysis"}, "184": {"path": "/graph_of_thoughts/language_models/README.md", "hash": "09eef3d730f43038d04c444137703e25", "title": "Language Models: Instantiate, Add, and Query"}, "185": {"path": "/graph_of_thoughts/language_models/README.md:1-18", "hash": "df4a25e6ae41dd6fe9dab191e6ef0f88", "title": "Language Models Module Introduction"}, "186": {"path": "/graph_of_thoughts/language_models/README.md:19-21", "hash": "caf8534a0edcce53879d972259ed9fe8", "title": "OpenAI Model Cost Calculator"}, "187": {"path": "/graph_of_thoughts/language_models/README.md:21-24", "hash": "3f0ab73729461f227ced65879177f92d", "title": "OpenAI Pricing and Model Configurations"}, "188": {"path": "/graph_of_thoughts/language_models/README.md:24-26", "hash": "d2f7a58cd7b0200cbc4e6932a84b55ec", "title": "OpenAI Chat Creation Parameters"}, "189": {"path": "/graph_of_thoughts/language_models/README.md:27-42", "hash": "36c5e6a3f9a601f5d50e84b63e574325", "title": "Initializing Language Model Controller"}, "190": {"path": "/graph_of_thoughts/language_models/README.md:42-47", "hash": "bba037316e38f42a49a766f6190ab082", "title": "Defining Language Model Parameters"}, "191": {"path": "/graph_of_thoughts/language_models/README.md:47-59", "hash": "26c86c52eff9cf309ceb2abc2e5e55d8", "title": "Llama-2 Model Setup Guide"}, "192": {"path": "/graph_of_thoughts/language_models/README.md:59-76", "hash": "1a746c432034aaf008fb76703dcefb3b", "title": "Adding a New LLM to Existing Model"}, "193": {"path": "/graph_of_thoughts/language_models/README.md:77-95", "hash": "d52024fc2afa327e96e95769332f04b2", "title": "Abstract Language Model Base Class"}, "194": {"path": "/graph_of_thoughts/language_models/__init__.py", "hash": "3aa1f453312e2f3fd365c38dd861deb8", "title": "Imports Necessary Language Model Classes"}, "195": {"path": "/graph_of_thoughts/language_models/abstract_language_model.py", "hash": "82e9e2c48d8f51f0b02b5f8121ce2140", "title": "Abstract Language Model Class and Configuration"}, "196": {"path": "/graph_of_thoughts/language_models/abstract_language_model.py:1-34", "hash": "f4ba39698a687618124c696dec75c972", "title": "Abstract Language Model Base Class Initialization"}, "197": {"path": "/graph_of_thoughts/language_models/abstract_language_model.py:35-66", "hash": "8d728348e4246333e374f3c7fd60dbd2", "title": "Abstract Language Model Initialization"}, "198": {"path": "/graph_of_thoughts/language_models/abstract_language_model.py:68-92", "hash": "05460f4d2b6eb8f36d13157d6a9fb621", "title": "Abstract Language Model: Query and Get Methods"}, "199": {"path": "/graph_of_thoughts/language_models/chatgpt.py", "hash": "f0a9e7183cf21113d4c08062b939c6dc", "title": "ChatGPT Class for OpenAI Chat API"}, "200": {"path": "/graph_of_thoughts/language_models/chatgpt.py:1-35", "hash": "e4b946f25a4e806bd85852c0ec903709", "title": "Initializing ChatGPT Language Model"}, "201": {"path": "/graph_of_thoughts/language_models/chatgpt.py:35-49", "hash": "ed7f0352af8af625e21149dd701af6e8", "title": "Model Initialization and Configuration"}, "202": {"path": "/graph_of_thoughts/language_models/chatgpt.py:50-69", "hash": "69787abe2ce88bc3747d7bade9c179e2", "title": "Initialize ChatGPT Model and Query Method"}, "203": {"path": "/graph_of_thoughts/language_models/chatgpt.py:70-94", "hash": "d852b7882c869e6b87a3217078220921", "title": "ChatGPT Cache Function"}, "204": {"path": "/graph_of_thoughts/language_models/chatgpt.py:95-119", "hash": "1fb88d02570e6b08c7e61b5666f415d2", "title": "OpenAI Chat Class with Error Backoff and Caching"}, "205": {"path": "/graph_of_thoughts/language_models/chatgpt.py:120-146", "hash": "3cfa3830a553a4e3aa1f27cb5728cb26", "title": "ChatGPT Response Logger and Cost Tracker"}, "206": {"path": "/graph_of_thoughts/language_models/chatgpt.py:147-157", "hash": "a21baf3c3e51bf3c617d490910e8d3e5", "title": "Converting Chats to Strings"}, "207": {"path": "/graph_of_thoughts/language_models/config_template.json", "hash": "1a7b6a03d3e21cf5442daebefab3a3a4", "title": "User-Specific Language Model Config Template"}, "208": {"path": "/graph_of_thoughts/language_models/config_template.json:1-41", "hash": "40fab445e51736d8b6c3cde7cefc128e", "title": "Language Model Configuration Template"}, "209": {"path": "/graph_of_thoughts/language_models/config_template.json:42-49", "hash": "91b4f3174f813d898ed20ba4d48e59e4", "title": "Optimal Language Model Config"}, "210": {"path": "/graph_of_thoughts/language_models/llamachat_hf.py", "hash": "4047e547fd0fccd086eafb32d823f827", "title": "LLaMA Chat Model Implementation"}, "211": {"path": "/graph_of_thoughts/language_models/llamachat_hf.py:1-31", "hash": "3e63981b6650b6dc1cae95b3e09d673d", "title": "LLaMA 2 HF Interface"}, "212": {"path": "/graph_of_thoughts/language_models/llamachat_hf.py:32-53", "hash": "b9ecfa01cff6b600d11bc9aa0378508e", "title": "Initializing Language Model Attributes"}, "213": {"path": "/graph_of_thoughts/language_models/llamachat_hf.py:54-82", "hash": "d87a421407cdfc07b5f74309fe1fe7a5", "title": "LLaMA Text Generation Pipeline"}, "214": {"path": "/graph_of_thoughts/language_models/llamachat_hf.py:83-107", "hash": "e1436394a33310e28d6526bcddfc3b58", "title": "LLaMA Chat Cache Responses"}, "215": {"path": "/graph_of_thoughts/language_models/llamachat_hf.py:108-119", "hash": "08cf8301010494783c9512a5c5ec4e5e", "title": "Extracting Generated Texts"}, "216": {"path": "/graph_of_thoughts/operations/README.md", "hash": "cff0b612678d5716f2c2996634fa252a", "title": "Thought Manipulation Operations"}, "217": {"path": "/graph_of_thoughts/operations/README.md:1-14", "hash": "1cea32cc0240fed3ef893408210db596", "title": "Operations in Graph of Thoughts"}, "218": {"path": "/graph_of_thoughts/operations/README.md:16-40", "hash": "a7cfb85179ef81ccb2401c4dd296badb", "title": "Graph-Based Thought Scoring"}, "219": {"path": "/graph_of_thoughts/operations/README.md:41-51", "hash": "782e3eebd715997b6059253e0567a8f9", "title": "Thought Processing System Operations"}, "220": {"path": "/graph_of_thoughts/operations/README.md:52-62", "hash": "d94bd20ff7b9695fef3164a88103974e", "title": "Thought Operations and Parameters"}, "221": {"path": "/graph_of_thoughts/operations/README.md:64-70", "hash": "7da50dfa59f65ee38b6061180797fccb", "title": "Thought Operations: KeepValid, Selector, GroundTruth"}, "222": {"path": "/graph_of_thoughts/operations/__init__.py", "hash": "bd37a88c53dc9da2c0b40d1caf56e2bf", "title": "Graph of Thoughts Operations"}, "223": {"path": "/graph_of_thoughts/operations/graph_of_operations.py", "hash": "5304a6d11c9bae8f009dd74d1fa40c95", "title": "Operation Plan Executor"}, "224": {"path": "/graph_of_thoughts/operations/graph_of_operations.py:1-32", "hash": "4e7d16802dab76adca24cd82485e5c42", "title": "Graph of Operations Class"}, "225": {"path": "/graph_of_thoughts/operations/graph_of_operations.py:34-64", "hash": "d1193f4eb96c268ad37175e4aa89836f", "title": "Operations Appending in Graphs"}, "226": {"path": "/graph_of_thoughts/operations/graph_of_operations.py:65-69", "hash": "dde7954a27bd6d8df095f0dc7c2d837a", "title": "Trimming Predecessors and Leaves"}, "227": {"path": "/graph_of_thoughts/operations/operations.py", "hash": "2a6bba2ce168f404208fd2a4b21dac38", "title": "Abstract Base Class for Graph of Thoughts Operations"}, "228": {"path": "/graph_of_thoughts/operations/operations.py:1-40", "hash": "e14ebbf6cd3a6edf12df1edab70b9a68", "title": "Abstract Base Class for Graph of Thoughts Operations"}, "229": {"path": "/graph_of_thoughts/operations/operations.py:41-71", "hash": "fb285ca37461ffa6c57781380ad90a7e", "title": "Operation Initialization and Execution"}, "230": {"path": "/graph_of_thoughts/operations/operations.py:72-105", "hash": "4adc5ff47935bb22b344371ae1191dfa", "title": "Operation Execution and Relationships"}, "231": {"path": "/graph_of_thoughts/operations/operations.py:107-130", "hash": "a8773694e5f1c9ed734f051a4a1ddbc8", "title": "Abstract Execution Class in Operations"}, "232": {"path": "/graph_of_thoughts/operations/operations.py:132-168", "hash": "2136cbd429efcd6af04eeadc33803406", "title": "Score Class in Graph of Thoughts Operations"}, "233": {"path": "/graph_of_thoughts/operations/operations.py:169-192", "hash": "fcbec9cf4bfc21bac7b03b985b98af8a", "title": "Score Operation Class"}, "234": {"path": "/graph_of_thoughts/operations/operations.py:193-218", "hash": "486d9ef6bd0e733bfa1646e3ea247d8e", "title": "Scoring Predecessor Thoughts Method"}, "235": {"path": "/graph_of_thoughts/operations/operations.py:220-239", "hash": "a8f204464943dfa89e5739d9fddf54fa", "title": "Scoring Thoughts with Functions or Prompts"}, "236": {"path": "/graph_of_thoughts/operations/operations.py:240-263", "hash": "27e0aa1535c2728f52b965406db4c1f2", "title": "Scoring Thoughts with LM or User Input"}, "237": {"path": "/graph_of_thoughts/operations/operations.py:264-293", "hash": "be256ca8b5401b62cb0d413f1ec5fb39", "title": "Validate and Improve Thoughts Operation"}, "238": {"path": "/graph_of_thoughts/operations/operations.py:294-319", "hash": "515403f16e8f3a450ae064906e18e400", "title": "Validate and Improve Class Definition"}, "239": {"path": "/graph_of_thoughts/operations/operations.py:320-344", "hash": "e3a0facb34e3fe6f5e85499006ebc0c9", "title": "Iterating Through Previous Thoughts"}, "240": {"path": "/graph_of_thoughts/operations/operations.py:345-366", "hash": "e399814adf073440c8f24f7f6625c1f3", "title": "Validate Thoughts via Language Model"}, "241": {"path": "/graph_of_thoughts/operations/operations.py:367-399", "hash": "243144e20764b359cdf251732ff5b7a7", "title": "Generative Thought Validation Algorithm"}, "242": {"path": "/graph_of_thoughts/operations/operations.py:400-427", "hash": "21e7c94b970df63f90922cb70d121671", "title": "Thoughts Generator Class"}, "243": {"path": "/graph_of_thoughts/operations/operations.py:428-451", "hash": "979fd878243eecdf462844959da936b2", "title": "Generate Thoughts with Language Model"}, "244": {"path": "/graph_of_thoughts/operations/operations.py:452-476", "hash": "6667dce6dee977a4261696a8f81e5435", "title": "Generating Responses and Parsing Thoughts"}, "245": {"path": "/graph_of_thoughts/operations/operations.py:477-513", "hash": "7ec716ae96347da565f2197a0c1921c5", "title": "Enhancing Thoughts with Improve Class"}, "246": {"path": "/graph_of_thoughts/operations/operations.py:514-537", "hash": "537ccf12119d25638ceb7e645c39759a", "title": "Improve and Aggregate Operations"}, "247": {"path": "/graph_of_thoughts/operations/operations.py:538-568", "hash": "e0f71ea8efb836e2fc1150d14258b98a", "title": "Aggregate Operation Class"}, "248": {"path": "/graph_of_thoughts/operations/operations.py:570-594", "hash": "8b061313974062b4c67eebd4c04fda18", "title": "Operation Predecessor Check and Prompt Construction"}, "249": {"path": "/graph_of_thoughts/operations/operations.py:596-627", "hash": "1e8f1d9981c6729be04042c05e765109", "title": "Keep Best N Thoughts Operation"}, "250": {"path": "/graph_of_thoughts/operations/operations.py:628-655", "hash": "72548e2c1d39bc7e4439aaff3bc99f97", "title": "KeepBestN: Initialize, Check N Thoughts"}, "251": {"path": "/graph_of_thoughts/operations/operations.py:656-683", "hash": "7067e602115d3d0633424f3504dfad6f", "title": "Keep Best N Thoughts"}, "252": {"path": "/graph_of_thoughts/operations/operations.py:685-708", "hash": "9e3cef1ea2372583336b4d7f1d002db1", "title": "KeepBestN Operation Definition"}, "253": {"path": "/graph_of_thoughts/operations/operations.py:709-746", "hash": "b29fc4d209019fbbd7480c450b81b156", "title": "KeepValid: Preserving Valid Thoughts"}, "254": {"path": "/graph_of_thoughts/operations/operations.py:747-778", "hash": "4957ea27908a08872dd63e9a2e6fbcc8", "title": "Operations: KeepValid and GroundTruth Classes"}, "255": {"path": "/graph_of_thoughts/operations/operations.py:779-807", "hash": "b5b889ac8ddecd3c2f2337e208c4e6d6", "title": "GroundTruth Operation for Thoughts Evaluation"}, "256": {"path": "/graph_of_thoughts/operations/operations.py:809-833", "hash": "2036af900561b2ddeec55fe8d9f87f70", "title": "GroundTruth Operation Evaluation"}, "257": {"path": "/graph_of_thoughts/operations/operations.py:834-864", "hash": "7f766716b28c696ed4a6e73c8e2a0e7b", "title": "Graph of Thoughts Selector Operation"}, "258": {"path": "/graph_of_thoughts/operations/operations.py:865-890", "hash": "b711783c489f79747b3da949ad19bdd4", "title": "Selector Operation: Thought Selection from Predecessors"}, "259": {"path": "/graph_of_thoughts/operations/operations.py:891-900", "hash": "979c7510ea5cbb02062f8d94ce5d139c", "title": "Thoughts Selector Logging"}, "260": {"path": "/graph_of_thoughts/operations/thought.py", "hash": "2e70f2bc1891443c8d1637f2412ba1dc", "title": "Thought Class Operations"}, "261": {"path": "/graph_of_thoughts/operations/thought.py:1-35", "hash": "e4995594663c83f7d7fb6980edafe0f7", "title": "Thought Class Definition"}, "262": {"path": "/graph_of_thoughts/operations/thought.py:36-68", "hash": "9acc43ecfe3da235880869f7df772176", "title": "Thought Class Definition"}, "263": {"path": "/graph_of_thoughts/operations/thought.py:69-111", "hash": "4efeef21fc0211bca0031f3579473765", "title": "Thought Class Definition and Operations"}, "264": {"path": "/graph_of_thoughts/operations/thought.py:113-117", "hash": "8462435edc91d42599b7e14b798cd449", "title": "Solve Thought Method Definition"}, "265": {"path": "/graph_of_thoughts/parser/__init__.py", "hash": "2fe4436ba77dabd44ccf4257e2764a1c", "title": "Efficient Parser Imports"}, "266": {"path": "/graph_of_thoughts/parser/parser.py", "hash": "567ce025206b94a36f0e4a8cbbc5867d", "title": "Abstract Parser for Thought States and Text Inputs"}, "267": {"path": "/graph_of_thoughts/parser/parser.py:1-31", "hash": "151a077f19a9c1dcc8185beb3e987e21", "title": "Language Model Response Parser"}, "268": {"path": "/graph_of_thoughts/parser/parser.py:32-59", "hash": "b28aeeff7332291682b6d206b227132d", "title": "Language Model Parser Methods"}, "269": {"path": "/graph_of_thoughts/parser/parser.py:60-89", "hash": "8f70d72ca35fcba9db9e2c3a93349308", "title": "Thought Parser Methods"}, "270": {"path": "/graph_of_thoughts/parser/parser.py:90-90", "hash": "21c74b484d452085101883611dd360a4", "title": "Understanding 'pass': A Placeholder in Code"}, "271": {"path": "/graph_of_thoughts/prompter/__init__.py", "hash": "d35543d3c3cec018abb43e0145cf3bbb", "title": "Importing Prompter Class"}, "272": {"path": "/graph_of_thoughts/prompter/prompter.py", "hash": "6322f0fbd377499b405c385d82ab6bee", "title": "Generating Prompts with Prompter Class"}, "273": {"path": "/graph_of_thoughts/prompter/prompter.py:1-36", "hash": "5b4e35715b43ae4ae20cbe9b580a09e4", "title": "Abstract Base Class for Language Model Prompt Generation"}, "274": {"path": "/graph_of_thoughts/prompter/prompter.py:37-65", "hash": "83924ce2e7a05fff42a6d33ef7ba128a", "title": "Abstract Base Class for Prompt and Validation Prompt Generation"}, "275": {"path": "/graph_of_thoughts/prompter/prompter.py:66-86", "hash": "1e175ca945635a218a9eac9162b69c8a", "title": "Abstract Class for Prompt Generation"}, "276": {"path": "/paper/README.md", "hash": "44b6127cfb7ade6deb97fd41fb189062", "title": "Access and Visualize ArXiv Data"}, "277": {"path": "/paper/plots.py", "hash": "5effb09a46a7a59e1c1314bf5b8f78a6", "title": "Python Scripts for Data Visualization"}, "278": {"path": "/paper/plots.py:1-29", "hash": "53fff646d1ede7ce47ebc062851be43d", "title": "Python JSON Data Organizer"}, "279": {"path": "/paper/plots.py:30-58", "hash": "bd4b1046e11d9bd756fdcd0225619855", "title": "Results Sorting and Scoring Algorithm"}, "280": {"path": "/paper/plots.py:59-87", "hash": "fdf12510c3c69a001dec59e847e7ba82", "title": "Final Scores Calculation"}, "281": {"path": "/paper/plots.py:88-123", "hash": "b650a38ee0edf907ce03a92d4615154b", "title": "Plotting Functions for Scores and Results"}, "282": {"path": "/paper/plots.py:124-153", "hash": "a7f764dd40b9c3d738b34b3b227dd2b2", "title": "Task-Based Score and Cost Filtering"}, "283": {"path": "/paper/plots.py:154-184", "hash": "542ae26fb5cc859245a53c7087a1dac8", "title": "Boxplot Creation for Sorted Scores"}, "284": {"path": "/paper/plots.py:185-212", "hash": "9ef8302b9a7c521702dcdbbce1cb8e2c", "title": "Plot Customization for Multi-Functionality"}, "285": {"path": "/paper/plots.py:213-246", "hash": "54c79748a48612171efa22bd8ba534e1", "title": "Creating Y-Label, Saving Plots with Method and Model Names"}, "286": {"path": "/paper/plots.py:247-289", "hash": "6a999201249956f7ed3699c104555641", "title": "Customizable Model Data Visualization with 'plot_results'"}, "287": {"path": "/paper/plots.py:290-330", "hash": "37f8423e922f2495b873b9b2877cae34", "title": "Task-Specific Data Plots in Python"}, "288": {"path": "/paper/plots.py:331-337", "hash": "87cccac40631b2074bc168de8dee867b", "title": "Plot Parameters and Display Options"}, "289": {"path": "/pyproject.toml", "hash": "f8970cb349e805d9d8495b180aaa3fdf", "title": "Python Package Setup with Hatchling"}, "290": {"path": "/pyproject.toml:1-37", "hash": "c303f2fd64b49ed3616bc29cfdbc82e7", "title": "Python Package Settings with Hatchling"}, "291": {"path": "/pyproject.toml:39-39", "hash": "6dab0ebe904e0b744e6acbf1618ac7d4", "title": "TOML Project Script Entry Point"}}} \ No newline at end of file diff --git a/docs/codeview.html b/docs/codeview.html index 10bbd48..d0da4af 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -430,6 +430,8 @@ const code_elem = document.createElement('code'); code_elem.className = `language-${language}` var xhr = new XMLHttpRequest(); + console.log("trying: " + code_path) + xhr.open('GET', code_path, false); // The third parameter is set to false for synchronous request xhr.send(null); if (xhr.status == 200) { @@ -438,7 +440,10 @@ var xhr = new XMLHttpRequest(); // TODO: mitigate this evil hack by passing more info of the original project. + // the reason is that github does not allow accessing file with '_' as prefix. + // is that disabled for security reasons? var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}/main/docs/` + code_path + console.log("trying: " + newLink) xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request xhr.send(null); @@ -454,7 +459,34 @@ if (xhr.status == 200) { code_elem.textContent = xhr.responseText; - } else { code_elem.textContent = "Failed to load code." } + } else { + var xhr = new XMLHttpRequest(); + // TODO: mitigate this evil hack by passing more info of the original project. + var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}_doc/master/` + code_path + console.log("trying: " + newLink) + + xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request + xhr.send(null); + + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { + var xhr = new XMLHttpRequest(); + // TODO: mitigate this evil hack by passing more info of the original project. + var newLink = `https://raw.githubusercontent.com/James4Ever0/${project_name}_doc/main/` + code_path + console.log("trying: " + newLink) + + xhr.open('GET', newLink, false); // The third parameter is set to false for synchronous request + xhr.send(null); + + if (xhr.status == 200) { + code_elem.textContent = xhr.responseText; + } else { + + code_elem.textContent = "Failed to load code." + } + } + } } } @@ -528,14 +560,14 @@ /* Provide spacing to accommodate the fixed header */ /* Ensure the section fills the remaining viewport height */ overflow-y: auto; + /* overflow-x:hidden; */ /* white-space: pre-wrap; */ /* Enable vertical scrolling if content exceeds viewport height */ } - .line-highlight { + /* .line-highlight { background-color: rgba(228, 239, 12, 0.07) !important; - /* z-index:-10; */ - } + } */ /* do this after jump */ /* .line-highlight { diff --git a/docs/data/0.json b/docs/data/0.json index badd127..6d66d2b 100644 --- a/docs/data/0.json +++ b/docs/data/0.json @@ -77,470 +77,470 @@ }, "14": { "file_id": 1, - "content": "/pyproject.toml", + "content": "/examples/README.md", "type": "filepath" }, "15": { "file_id": 1, - "content": "The code uses Hatchling to define project settings for the Python package \"graph_of_thoughts,\" including package details, dependencies, and URLs. It also includes a TOML configuration file setting up an entry point for executable scripts under the project's namespace within the \"scripts\" section of the \"project\" block.", + "content": "This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example, with prompt files available to test prompts manually in a console. Individual example directories provide more information on specific examples.", "type": "summary" }, "16": { "file_id": 1, - "content": "[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n[project]\nname = \"graph_of_thoughts\"\nversion = \"0.0.3\"\nauthors = [\n { name=\"Maciej Besta\", email=\"maciej.besta@inf.ethz.ch\" },\n { name=\"Nils Blach\", email=\"nils.blach@inf.ethz.ch\" },\n { name=\"Ales Kubicek\", email=\"akubicek@student.ethz.ch\" },\n { name=\"Robert Gerstenberger\", email=\"gerstenberger.robert@gmail.com\" },\n]\ndescription = \"Python package for Graph of Thoughts that enables solving elaborate problems with Large Language Models\"\nreadme = \"README.md\"\nlicense = {file = \"LICENSE\"}\nrequires-python = \">=3.8\"\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"Operating System :: OS Independent\",\n]\ndependencies = [\n \"backoff>=2.2.1,<3.0.0\",\n \"openai>=1.0.0,<2.0.0\",\n \"matplotlib>=3.7.1,<4.0.0\",\n \"numpy>=1.24.3,<2.0.0\",\n \"pandas>=2.0.3,<3.0.0\",\n \"sympy>=1.12,<2.0\",\n \"torch>=2.0.1,<3.0.0\",\n \"transformers>=4.31.0,<5.0.0\",\n \"accelerate>=0.21.0,<1.0.0\",\n \"bitsandbytes>=0.41.0,<1.0.0\",\n \"scipy>=1.10.1,<2.0.0\",\n]\n[project.urls]\nHomepage = \"https://github.com/spcl/graph-of-thoughts\"", + "content": "# Examples\nThis directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example.\nWe further include prompt files for each example that can be used to test prompts manually in a console.\nPlease refer to the individual example directories for more information on the specific example.", "type": "code", - "location": "/pyproject.toml:1-37" + "location": "/examples/README.md:1-7" }, "17": { "file_id": 1, - "content": "This code defines the project settings for a Python package called \"graph_of_thoughts\" using Hatchling as the build system. It specifies the package name, version, authors, description, dependencies, and URLs for further information.", + "content": "This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example, with prompt files available to test prompts manually in a console. Individual example directories provide more information on specific examples.", "type": "comment" }, "18": { - "file_id": 1, - "content": "[project.scripts]", - "type": "code", - "location": "/pyproject.toml:39-39" + "file_id": 2, + "content": "/examples/doc_merge/README.md", + "type": "filepath" }, "19": { - "file_id": 1, - "content": "The code snippet is a part of a TOML configuration file, specifically defining the \"scripts\" section within the \"project\" block. It sets up an entry point for executable scripts under the project's namespace.", - "type": "comment" + "file_id": 2, + "content": "The code showcases a document merging approach using various methods like IO, CoT, ToT, and GoT. It takes 50 sample documents from `documents.csv`, applies chosen techniques, and outputs results in an LLM-named directory with debug logs and separate JSON files for each approach.", + "type": "summary" }, "20": { "file_id": 2, - "content": "/examples/README.md", - "type": "filepath" + "content": "# Document Merging\nThe use case in this directory generates new Non-Disclosure Agreement (NDA) based on several input ones that partially overlap in terms of their contents. \nWe provide implementations of five different approaches:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT)\n- Graph of Thoughts (GoT):\n - GoT: aggregation of fully merged NDAs\n - GoT2: aggregation of partially merged NDAs\n## Data\nWe provide an input file with 50 samples: `documents.csv`.\n## Execution\nThe file to execute the use case is called\n`doc_merge.py`. In the main body, one can\nselect the specific samples to be run (variable samples) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are", + "type": "code", + "location": "/examples/doc_merge/README.md:1-28" }, "21": { "file_id": 2, - "content": "This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example, with prompt files available to test prompts manually in a console. Individual example directories provide more information on specific examples.", - "type": "summary" + "content": "This code demonstrates a document merging use case using different approaches, including IO, Chain-of-Thought (CoT), Tree of Thought (ToT), and Graph of Thoughts (GoT). It uses 50 sample documents from `documents.csv`, executes the selected samples with chosen approaches, and saves results in a directory named by the LLM, approaches, day, and start time.", + "type": "comment" }, "22": { "file_id": 2, - "content": "# Examples\nThis directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example.\nWe further include prompt files for each example that can be used to test prompts manually in a console.\nPlease refer to the individual example directories for more information on the specific example.", + "content": "created. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 158 of `plot.py` and run `python3\nplot.py` to plot your data.", "type": "code", - "location": "/examples/README.md:1-7" + "location": "/examples/doc_merge/README.md:29-38" }, "23": { "file_id": 2, - "content": "This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example, with prompt files available to test prompts manually in a console. Individual example directories provide more information on specific examples.", + "content": "This code generates a configuration file named `config.json` that contains input data, selected approaches, LLM name, and budget information. Additionally, it logs prompts, responses, and debug data in `log.log`. Each approach directory holds separate JSON files for every sample with the Graph Reasoning State (GRS) included. To plot the data, change the results directory at line 158 of `plot.py` and run `python3 plot.py`.", "type": "comment" }, "24": { "file_id": 3, - "content": "/examples/doc_merge/README.md", + "content": "/examples/doc_merge/doc_merge.py", "type": "filepath" }, "25": { "file_id": 3, - "content": "The code showcases a document merging approach using various methods like IO, CoT, ToT, and GoT. It takes 50 sample documents from `documents.csv`, applies chosen techniques, and outputs results in an LLM-named directory with debug logs and separate JSON files for each approach.", + "content": "The code develops an efficient NDA merging class with language model prompts and redundancy handling, generating a graph for document merge and language model inference within budget limits. It utilizes input data from \"documents.csv\", manages exceptions, and scores output based on coverage.", "type": "summary" }, "26": { "file_id": 3, - "content": "# Document Merging\nThe use case in this directory generates new Non-Disclosure Agreement (NDA) based on several input ones that partially overlap in terms of their contents. \nWe provide implementations of five different approaches:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT)\n- Graph of Thoughts (GoT):\n - GoT: aggregation of fully merged NDAs\n - GoT2: aggregation of partially merged NDAs\n## Data\nWe provide an input file with 50 samples: `documents.csv`.\n## Execution\nThe file to execute the use case is called\n`doc_merge.py`. In the main body, one can\nselect the specific samples to be run (variable samples) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are", + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport os\nimport re\nimport logging\nimport datetime\nimport json\nimport csv\nfrom statistics import fmean\nfrom typing import Dict, List, Callable, Set, Union\nfrom graph_of_thoughts import controller, language_models, operations, prompter, parser\nclass DocMergePrompter(prompter.Prompter):\n \"\"\"\n DocMergePrompter provides the generation of prompts specific to the document\n merge example for the language models.\n Inherits from the Prompter class and implements its abstract methods.\n \"\"\"\n merge_doc_prompt_start = \"\"\"Merge the following {num} NDA documents - into a single NDA, maximizing retained information and minimizing redundancy. Output only the created NDA between the tags and , without any additional text.\nHere are NDAs - \n\"\"\"\n merge_doc_prompt_block = \"\"\"", "type": "code", - "location": "/examples/doc_merge/README.md:1-28" + "location": "/examples/doc_merge/doc_merge.py:1-31" }, "27": { "file_id": 3, - "content": "This code demonstrates a document merging use case using different approaches, including IO, Chain-of-Thought (CoT), Tree of Thought (ToT), and Graph of Thoughts (GoT). It uses 50 sample documents from `documents.csv`, executes the selected samples with chosen approaches, and saves results in a directory named by the LLM, approaches, day, and start time.", + "content": "This code defines a class DocMergePrompter that inherits from Prompter and provides prompts for merging NDA documents. It includes a merge_doc_prompt_start string for generating the prompt and a merge_doc_prompt_block string for displaying NDAs to be merged. The goal is to create a single NDA by maximizing information retention and minimizing redundancy, with the output between and .", "type": "comment" }, "28": { "file_id": 3, - "content": "created. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 158 of `plot.py` and run `python3\nplot.py` to plot your data.", - "type": "code", - "location": "/examples/doc_merge/README.md:29-38" - }, - "29": { - "file_id": 3, - "content": "This code generates a configuration file named `config.json` that contains input data, selected approaches, LLM name, and budget information. Additionally, it logs prompts, responses, and debug data in `log.log`. Each approach directory holds separate JSON files for every sample with the Graph Reasoning State (GRS) included. To plot the data, change the results directory at line 158 of `plot.py` and run `python3 plot.py`.", - "type": "comment" - }, - "30": { - "file_id": 4, - "content": "/examples/doc_merge/doc_merge.py", - "type": "filepath" - }, - "31": { - "file_id": 4, - "content": "The code develops an efficient NDA merging class with language model prompts and redundancy handling, generating a graph for document merge and language model inference within budget limits. It utilizes input data from \"documents.csv\", manages exceptions, and scores output based on coverage.", - "type": "summary" - }, - "32": { - "file_id": 4, - "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport os\nimport re\nimport logging\nimport datetime\nimport json\nimport csv\nfrom statistics import fmean\nfrom typing import Dict, List, Callable, Set, Union\nfrom graph_of_thoughts import controller, language_models, operations, prompter, parser\nclass DocMergePrompter(prompter.Prompter):\n \"\"\"\n DocMergePrompter provides the generation of prompts specific to the document\n merge example for the language models.\n Inherits from the Prompter class and implements its abstract methods.\n \"\"\"\n merge_doc_prompt_start = \"\"\"Merge the following {num} NDA documents - into a single NDA, maximizing retained information and minimizing redundancy. Output only the created NDA between the tags and , without any additional text.\nHere are NDAs - \n\"\"\"\n merge_doc_prompt_block = \"\"\"", - "type": "code", - "location": "/examples/doc_merge/doc_merge.py:1-31" - }, - "33": { - "file_id": 4, - "content": "This code defines a class DocMergePrompter that inherits from Prompter and provides prompts for merging NDA documents. It includes a merge_doc_prompt_start string for generating the prompt and a merge_doc_prompt_block string for displaying NDAs to be merged. The goal is to create a single NDA by maximizing information retention and minimizing redundancy, with the output between and .", - "type": "comment" - }, - "34": { - "file_id": 4, "content": "\n{document}\n\n\"\"\"\n merge_doc_prompt_cot_start = \"\"\"Merge the following {num} NDA documents - into a single NDA, maximizing retained information and minimizing redundancy.\nYou can generate any intermediate thoughts and documents you want, but the final output should be the merged NDA, placed between the two tags and .\nFor instance you might want to follow this approach:\n1. Split each NDA into their logical subparts.\n2. Merge the subparts of the {num} NDAs.\n3. Combine the merged subparts into a single NDA.\n4. Place the merged NDA between the tags and .\nHere are NDAs - :\n\"\"\"\n improve_summary_prompt_start = \"\"\"The following NDA merges initial NDAs - .\nPlease improve the summary NDA by adding more information and removing redundancy. Output only the improved NDA, placed between the two tags and , without any additional text.\nHere are NDAs - :\n\"\"\"\n improve_summary_prompt_block = \"\"\"", "type": "code", "location": "/examples/doc_merge/doc_merge.py:32-54" }, - "35": { - "file_id": 4, + "29": { + "file_id": 3, "content": "The code defines two prompts for merging and improving NDA documents. The first prompt instructs to merge the provided NDAs into a single one, preserving information and minimizing redundancy. It also provides an example approach. The second prompt asks to improve the merged document by adding more information and removing redundancies, with output placed between specific tags. Both prompts include the input NDAs as \"Doc1\" to \"Doc{num}\".", "type": "comment" }, - "36": { - "file_id": 4, + "30": { + "file_id": 3, "content": "\n{document}\n\n\"\"\"\n improve_summary_prompt_end = \"\"\"\nHere is the summary NDA :\n\n{summary}\n\n\"\"\"\n score_prompt_base = \"\"\"The following NDA merges NDAs - .\nPlease score the merged NDA in terms of how much redundant information is contained, independent of the original NDAs, as well as how much information is retained from the original NDAs.\nA score of 10 for redundancy implies that absolutely no information is redundant, while a score of 0 implies that at least half of the information is redundant (so everything is at least mentioned twice).\nA score of 10 for retained information implies that all information from the original NDAs is retained, while a score of 0 implies that no information is retained.\nYou may provide reasoning for your scoring, but the final score for redundancy should be between the tags and , and the final score for retained information should be between the tags and , without any additional text within any of those tags.", "type": "code", "location": "/examples/doc_merge/doc_merge.py:55-71" }, - "37": { - "file_id": 4, + "31": { + "file_id": 3, "content": "This code contains various prompts for different tasks, such as improving summaries and scoring merged documents. The prompts are designed to assist in the task of merging NDAs while considering redundancy and retained information scores, with specific tags provided for clarity.", "type": "comment" }, - "38": { - "file_id": 4, + "32": { + "file_id": 3, "content": "Here are NDAs - :\n\"\"\"\n score_prompt_block = \"\"\"\n\n{document}\n\n\"\"\"\n score_prompt_end = \"\"\"\nHere is the summary NDA :\n\n{summary}\n\n\"\"\"\n aggregate_full_prompt_base = \"\"\"The following NDAs - each merge the initial NDAs - .\nCombine the merged NDAs - into a new one, maximizing their advantages and overall information retention, while minimizing redundancy.\nOutput only the new NDA between the tags and , without any additional text. \nHere are the original NDAs - :\n\"\"\"\n aggregate_full_prompt_block1 = \"\"\"\n\n{document}\n\n\"\"\"\n aggregate_full_prompt_mid = \"\"\"\nHere are the summary NDAs - :\n\"\"\"\n aggregate_full_prompt_block2 = \"\"\"\n\n{summary}\n\n\"\"\"\n aggregate_sub_prompt_base = \"\"\"The following NDAs - are summaries of some other NDAs.\nCombine them into a new one, make sure to maximize their advantages and overall information retention, while minimizing redundancy.", "type": "code", "location": "/examples/doc_merge/doc_merge.py:73-112" }, - "39": { - "file_id": 4, + "33": { + "file_id": 3, "content": "This code appears to be part of a larger program that deals with merging and summarizing Non-Disclosure Agreements (NDAs). It uses string formatting to generate prompts for the user, asking them to provide NDAs in a specific format. The code snippet includes various placeholders (, ) for incorporating the user's provided information.", "type": "comment" }, - "40": { - "file_id": 4, + "34": { + "file_id": 3, "content": "Output only the new NDA between the tags and , without any additional text.\nHere are NDAs - :\n\"\"\"\n aggregate_sub_prompt_generate = \"\"\"\nNDA :\n{nda}\n\n\"\"\"\n def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str:\n \"\"\"\n Generate an aggregation prompt for the language model.\n :param state_dicts: The thought states that should be aggregated.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The aggregation prompt.\n :rtype: str\n \"\"\"\n if len(state_dicts[0][\"parts\"]) > 0 and len(state_dicts[0][\"parts\"]) < len(\n state_dicts[0][\"documents\"]\n ):\n prompt = self.aggregate_sub_prompt_base.format(\n num_ndas=len(state_dicts),\n )\n for i, state_dict in enumerate(state_dicts):\n prompt += self.aggregate_sub_prompt_generate.format(\n nda=state_dict[\"current\"], num=i + 1", "type": "code", "location": "/examples/doc_merge/doc_merge.py:113-143" }, - "41": { - "file_id": 4, + "35": { + "file_id": 3, "content": "This code generates an aggregation prompt for a language model, using the provided state_dicts. It concatenates NDAs from each state_dict and formats them into a final prompt. The output is a string containing the merged NDAs between \"\" and \"\".", "type": "comment" }, - "42": { - "file_id": 4, + "36": { + "file_id": 3, "content": " )\n return prompt\n else:\n prompt = self.aggregate_full_prompt_base.format(\n num_ndas=len(state_dicts[0][\"documents\"]),\n num_ndas_summary=len(state_dicts),\n )\n for i, document in enumerate(state_dicts[0][\"documents\"]):\n prompt += self.aggregate_full_prompt_block1.format(\n document=document, num=i + 1\n )\n prompt += self.aggregate_full_prompt_mid.format(\n num_ndas_summary=len(state_dicts),\n )\n for i, state_dict in enumerate(state_dicts):\n prompt += self.aggregate_full_prompt_block2.format(\n summary=state_dict[\"current\"], num=i + 1\n )\n return prompt\n def generate_prompt(\n self,\n num_branches: int,\n documents: List[str],\n method: str,\n parts: Set[str],\n current: str,\n **kwargs,\n ) -> str:\n \"\"\"\n Generate a generate prompt for the language model.", "type": "code", "location": "/examples/doc_merge/doc_merge.py:144-174" }, - "43": { - "file_id": 4, + "37": { + "file_id": 3, "content": "This code defines a class with methods for generating prompts. The `generate_prompt` method takes in parameters like number of branches, documents, and current state. It returns a prompt for the language model using string formatting based on input parameters.", "type": "comment" }, - "44": { - "file_id": 4, + "38": { + "file_id": 3, "content": " :param num_branches: The number of responses the prompt should ask the LM to generate.\n :type num_branches: int\n :param documents: The list of documents to be merged.\n :type documents: List[str]\n :param method: Method for which the generate prompt is generated.\n :type method: str\n :param parts: Indices of the already processed document parts.\n :type parts: Set[str]\n :param current: The intermediate solution.\n :type current: str\n :param kwargs: Additional keyword arguments.\n :return: The generate prompt.\n :rtype: str\n :raise AssertionError: If method is not implemented yet.\n \"\"\"\n prompt = \"\"\n if method.startswith(\"io\") or method.startswith(\"cot\"):\n if method.startswith(\"io\"):\n prompt += self.merge_doc_prompt_start.format(num=len(documents))\n else:\n prompt += self.merge_doc_prompt_cot_start.format(num=len(documents))\n for i, document in enumerate(documents):", "type": "code", "location": "/examples/doc_merge/doc_merge.py:176-198" }, - "45": { - "file_id": 4, + "39": { + "file_id": 3, "content": "This function takes in the number of responses, a list of documents to merge, method for generating the prompt, indices of already processed document parts, an intermediate solution, and additional keyword arguments. It returns the generate prompt used for merging the documents. If the method is not implemented yet, it raises AssertionError.", "type": "comment" }, - "46": { - "file_id": 4, + "40": { + "file_id": 3, "content": " prompt += self.merge_doc_prompt_block.format(\n document=document, num=i + 1\n )\n return prompt\n elif method.startswith(\"tot\"):\n if current is None or current == \"\":\n prompt += self.merge_doc_prompt_start.format(num=len(documents))\n for i, document in enumerate(documents):\n prompt += self.merge_doc_prompt_block.format(\n document=document, num=i + 1\n )\n return prompt\n else:\n prompt += self.improve_summary_prompt_start.format(\n num=len(documents),\n )\n for i, document in enumerate(documents):\n prompt += self.improve_summary_prompt_block.format(\n document=document, num=i + 1\n )\n prompt += self.improve_summary_prompt_end.format(summary=current)\n return prompt\n elif method.startswith(\"got\"):", "type": "code", "location": "/examples/doc_merge/doc_merge.py:199-221" }, - "47": { - "file_id": 4, + "41": { + "file_id": 3, "content": "The code provides a prompt for merging multiple documents or improving a given summary based on the specified method. It dynamically generates the prompt by concatenating predefined blocks of text with placeholders for document numbers and the original summary. If no current summary is provided, it creates a prompt to merge documents, otherwise, it improves the given summary using those documents.", "type": "comment" }, - "48": { - "file_id": 4, + "42": { + "file_id": 3, "content": " parts = (\n sorted(list(parts)) if len(parts) > 0 else list(range(len(documents)))\n )\n if current is None or current == \"\":\n prompt += self.merge_doc_prompt_start.format(num=len(parts))\n for i, part in enumerate(sorted(list(parts))):\n prompt += self.merge_doc_prompt_block.format(\n document=documents[part], num=i + 1\n )\n return prompt\n else:\n prompt += self.improve_summary_prompt_start.format(\n num=len(parts),\n )\n for i, part in enumerate(sorted(list(parts))):\n prompt += self.improve_summary_prompt_block.format(\n document=documents[part], num=i + 1\n )\n prompt += self.improve_summary_prompt_end.format(summary=current)\n return prompt\n else:\n assert False, \"Not implemented yet.\"\n def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str:", "type": "code", "location": "/examples/doc_merge/doc_merge.py:222-245" }, - "49": { - "file_id": 4, + "43": { + "file_id": 3, "content": "The code checks if the current summary is provided. If not, it generates a prompt for merging documents into one coherent summary. If the current summary is provided, it generates a prompt for improving an existing summary by incorporating information from multiple documents. The code also sorts the parts of the document and formats them in a specific way for the prompts.", "type": "comment" }, - "50": { - "file_id": 4, + "44": { + "file_id": 3, "content": " \"\"\"\n Generate a score prompt for the language model.\n :param state_dicts: The thought states that should be scored,\n if more than one, they should be scored together.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The score prompt.\n :rtype: str\n :raise AssertionError: If more than one thought state is supplied.\n \"\"\"\n if len(state_dicts) > 1:\n assert False, \"Not implemented yet.\"\n else:\n # perform individual scoring\n parts = (\n [\n state_dicts[0][\"documents\"][part]\n for part in sorted(list(state_dicts[0][\"parts\"]))\n ]\n if len(state_dicts[0][\"parts\"]) > 0\n else state_dicts[0][\"documents\"]\n )\n prompt = self.score_prompt_base.format(\n num=len(parts),\n )\n for i, part in enumerate(parts):\n prompt += self.score_prompt_block.format(document=part, num=i + 1)", "type": "code", "location": "/examples/doc_merge/doc_merge.py:246-274" }, - "51": { - "file_id": 4, + "45": { + "file_id": 3, "content": "This function generates a score prompt for the language model using a single thought state provided as an argument. It checks if only one thought state is supplied and handles the case where more than one is given. The prompt is created by formatting the base and block prompts with the number of documents.", "type": "comment" }, - "52": { - "file_id": 4, + "46": { + "file_id": 3, "content": " prompt += self.score_prompt_end.format(\n summary=state_dicts[0][\"current\"],\n )\n return prompt\n def improve_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate an improve prompt for the language model.\n :param kwargs: Additional keyword arguments.\n :return: The improve prompt.\n :rtype: str\n \"\"\"\n pass\n def validation_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate a validation prompt for the language model.\n :param kwargs: Additional keyword arguments.\n :return: The validation prompt.\n :rtype: str\n \"\"\"\n pass\nclass DocMergeParser(parser.Parser):\n \"\"\"\n DocMergeParser provides the parsing of language model reponses specific to the\n document merge example.\n Inherits from the Parser class and implements its abstract methods.\n \"\"\"\n def __init__(self) -> None:\n \"\"\"\n Inits the response cache.\n \"\"\"\n self.cache = {}\n def strip_answer_helper(self, text: str, tag: str = \"\") -> str:", "type": "code", "location": "/examples/doc_merge/doc_merge.py:275-315" }, - "53": { - "file_id": 4, + "47": { + "file_id": 3, "content": "This code defines a class DocMergeParser that extends the Parser class and provides specific functionality for parsing language model responses in the document merge example. It includes methods to generate improve prompt, validation prompt, and handles answer stripping with optional tags. The response cache is initialized in the constructor.", "type": "comment" }, - "54": { - "file_id": 4, + "48": { + "file_id": 3, "content": " \"\"\"\n Helper function to remove tags from a text.\n :param text: The input text.\n :type text: str\n :param tag: The tag to be stripped. Defaults to \"\".\n :type tag: str\n :return: The stripped text.\n :rtype: str\n \"\"\"\n text = text.strip()\n if \"Output:\" in text:\n text = text[text.index(\"Output:\") + len(\"Output:\") :].strip()\n if tag != \"\":\n start = text.rfind(f\"<{tag}>\")\n end = text.rfind(f\"\")\n if start != -1 and end != -1:\n text = text[start + len(f\"<{tag}>\") : end].strip()\n elif start != -1:\n logging.warning(\n f\"Only found the start tag <{tag}> in answer: {text}. Returning everything after the tag.\"\n )\n text = text[start + len(f\"<{tag}>\") :].strip()\n elif end != -1:\n logging.warning(\n f\"Only found the end tag in answer: {text}. Returning everything before the tag.\"", "type": "code", "location": "/examples/doc_merge/doc_merge.py:316-342" }, - "55": { - "file_id": 4, + "49": { + "file_id": 3, "content": "This function removes specified tags from a text. It first strips whitespace and checks if \"Output:\" is in the text. Then, it searches for start and end tags to remove the enclosed content while handling cases of only one tag found. If no matching tags are found, it logs a warning and returns everything after or before the found tag.", "type": "comment" }, - "56": { - "file_id": 4, + "50": { + "file_id": 3, "content": " )\n text = text[:end].strip()\n else:\n logging.warning(\n f\"Could not find any tag {tag} in answer: {text}. Returning the full answer.\"\n )\n return text\n def parse_aggregation_answer(\n self, states: List[Dict], texts: List[str]\n ) -> Union[Dict, List[Dict]]:\n \"\"\"\n Parse the response from the language model for an aggregation prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the respones from the language model.\n :rtype: Union[Dict, List[Dict]]\n \"\"\"\n new_states = []\n for text in texts:\n if len(states[0][\"parts\"]) < len(states[0][\"documents\"]):\n # subpart aggregation\n text = self.strip_answer_helper(text, \"Merged\")", "type": "code", "location": "/examples/doc_merge/doc_merge.py:343-369" }, - "57": { - "file_id": 4, + "51": { + "file_id": 3, "content": "The code is parsing the response from a language model for an aggregation prompt. It checks if there are enough thought states and performs subpart aggregation by stripping the answer to a single text using a helper function.", "type": "comment" }, - "58": { - "file_id": 4, + "52": { + "file_id": 3, "content": " new_state = states[0].copy()\n new_state[\"current\"] = text\n new_state[\"parts\"] = set()\n for state in states:\n new_state[\"parts\"] = new_state[\"parts\"] | state[\"parts\"]\n new_states.append(new_state)\n else:\n # full NDA aggregation\n text = self.strip_answer_helper(text, \"Merged\")\n new_state = states[0].copy()\n new_state[\"current\"] = text\n new_states.append(new_state)\n return new_states\n def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:\n \"\"\"\n Parse the response from the language model for a generate prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the respones from the language model.", "type": "code", "location": "/examples/doc_merge/doc_merge.py:370-393" }, - "59": { - "file_id": 4, + "53": { + "file_id": 3, "content": "The code appears to be a part of a larger function that generates new thought states by aggregating inputs from multiple sources. It seems to handle both partial and full non-disclosure agreement (NDA) cases, stripping the answer text and creating new states accordingly. The `parse_generate_answer` function processes response from the language model for generate prompts and returns new thought states after parsing the responses.", "type": "comment" }, - "60": { - "file_id": 4, + "54": { + "file_id": 3, "content": " :rtype: List[Dict]\n \"\"\"\n new_states = []\n for text in texts:\n text = self.strip_answer_helper(text, \"Merged\")\n new_state = state.copy()\n new_state[\"current\"] = text\n new_states.append(new_state)\n return new_states\n def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]:\n \"\"\"\n Parse the response from the language model for a score prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The scores for the thought states.\n :rtype: List[float]\n :raise AssertionError: If the number of thought states is not one.\n \"\"\"\n assert len(states) == 1, \"Only one state is allowed for scoring.\"\n if len(states) == 1:\n # individual scoring\n redundancy_scores = []\n retain_scores = []", "type": "code", "location": "/examples/doc_merge/doc_merge.py:394-420" }, - "61": { - "file_id": 4, + "55": { + "file_id": 3, "content": "The function `get_new_states()` takes a list of texts and returns a list of dictionaries, where each dictionary represents a thought state with the current text as its value.\n\nThe function `parse_score_answer()` takes a list of thought states and responses from the language model, asserts that only one thought state is allowed for scoring, and then initializes lists for redundancy and retain scores.", "type": "comment" }, - "62": { - "file_id": 4, + "56": { + "file_id": 3, "content": " for text in texts:\n answer = self.strip_answer_helper(text, \"Redundancy\")\n res = re.findall(r\"\\d+\\.?\\d*\", answer)\n if len(res) == 1:\n redundancy_scores.append(float(res[0]))\n elif len(res) > 1:\n logging.warning(\n f\"Found multiple redundancy scores in answer: {text}. Returning the last one.\"\n )\n redundancy_scores.append(float(res[-1]))\n else:\n logging.warning(\n f\"Could not find any redundancy score in answer: {text}. Ignoring this answer.\"\n )\n answer = self.strip_answer_helper(text, \"Retained\")\n res = re.findall(r\"\\d+\\.?\\d*\", answer)\n if len(res) == 1:\n retain_scores.append(float(res[0]))\n elif len(res) > 1:\n logging.warning(\n f\"Found multiple retained scores in answer: {text}. Returning the last one.\"", "type": "code", "location": "/examples/doc_merge/doc_merge.py:421-441" }, - "63": { - "file_id": 4, + "57": { + "file_id": 3, "content": "This code iterates through text inputs, extracts redundancy and retained scores using regex, handles multiple score cases by logging a warning and selecting the last one or ignoring if no scores found.", "type": "comment" }, - "64": { - "file_id": 4, + "58": { + "file_id": 3, "content": " )\n retain_scores.append(float(res[-1]))\n else:\n logging.warning(\n f\"Could not find any retained score in answer: {text}. Ignoring this answer.\"\n )\n if len(redundancy_scores) == 0 or len(retain_scores) == 0:\n logging.warning(\n f\"Could not find any valid score in any answer. Returning 0.0.\"\n )\n return [0.0]\n mean_redundancy = fmean(redundancy_scores)\n mean_retain = fmean(retain_scores)\n f1 = 2 * mean_redundancy * mean_retain / (mean_redundancy + mean_retain)\n return [f1]\n def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict:\n \"\"\"\n Parse the response from the language model for an improve prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.", "type": "code", "location": "/examples/doc_merge/doc_merge.py:442-464" }, - "65": { - "file_id": 4, + "59": { + "file_id": 3, "content": "This code snippet is a part of a function responsible for parsing the responses from a language model for an 'improve' prompt. It calculates redundancy and retain scores for each answer, then returns the F1 score based on these scores. If no valid scores are found in any answer, it returns 0.0.", "type": "comment" }, - "66": { - "file_id": 4, + "60": { + "file_id": 3, "content": " :type texts: List[str]\n :return: The new thought state after parsing the responses from the language model.\n :rtype: Dict\n \"\"\"\n pass\n def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool:\n \"\"\"\n Parse the response from the language model for a validation prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: Whether the thought state is valid or not.\n :rtype: bool\n \"\"\"\n pass\ndef io() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the IO method.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n operations_graph.append_operation(operations.Generate(1, 1))\n operations_graph.append_operation(operations.Score(3, False))\n return operations_graph", "type": "code", "location": "/examples/doc_merge/doc_merge.py:465-497" }, - "67": { - "file_id": 4, + "61": { + "file_id": 3, "content": "This code contains functions for thought state management, parsing responses from a language model, and generating the Graph of Operations for IO method. It uses Dict and List[str] as inputs and returns bool or Dict outputs. The code block defines three functions: update_thought_state, parse_validation_answer, and io. The last function generates the Graph of Operations by appending Generate and Score operations to an instance of operations.GraphOfOperations().", "type": "comment" }, - "68": { - "file_id": 4, + "62": { + "file_id": 3, "content": "def cot() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the CoT method.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n operations_graph.append_operation(operations.Generate(1, 1))\n operations_graph.append_operation(operations.Score(3, False))\n return operations_graph\ndef tot() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the ToT method.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n branch_factor = 10\n operations_graph.append_operation(operations.Generate(1, branch_factor))\n operations_graph.append_operation(operations.Score(3, False))\n keep_best_1 = operations.KeepBestN(1, True)\n operations_graph.append_operation(keep_best_1)\n for _ in range(2):\n operations_graph.append_operation(operations.Generate(1, branch_factor))\n operations_graph.append_operation(operations.Score(3, False))", "type": "code", "location": "/examples/doc_merge/doc_merge.py:500-533" }, - "69": { - "file_id": 4, + "63": { + "file_id": 3, "content": "The code defines two functions, `cot()` and `tot()`, which generate the Graph of Operations for CoT and ToT methods respectively. The CoT method involves generating one child node, scoring it, while the ToT method generates 10 children nodes initially, keeps the best one, then generates two additional children per iteration.", "type": "comment" }, - "70": { - "file_id": 4, + "64": { + "file_id": 3, "content": " keep_best_2 = operations.KeepBestN(1, True)\n keep_best_2.add_predecessor(keep_best_1)\n operations_graph.append_operation(keep_best_2)\n keep_best_1 = keep_best_2\n return operations_graph\ndef got() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the GoT method, where full documents\n are merged.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n operations_graph.append_operation(operations.Generate(1, 5))\n operations_graph.append_operation(operations.Score(3, False))\n keep_best = operations.KeepBestN(3, True)\n operations_graph.append_operation(keep_best)\n operations_graph.append_operation(operations.Aggregate(5))\n operations_graph.append_operation(operations.Score(3, False))\n keep_best2 = operations.KeepBestN(1, True)\n keep_best2.add_predecessor(keep_best)\n operations_graph.append_operation(keep_best2)\n operations_graph.append_operation(operations.Generate(1, 10))", "type": "code", "location": "/examples/doc_merge/doc_merge.py:534-561" }, - "71": { - "file_id": 4, + "65": { + "file_id": 3, "content": "This code generates a Graph of Operations for merging full documents. It first appends operations to generate, score, aggregate, and keep the best scores. The last two operations add a predecessor to keep_best and append an additional generate operation with parameters 1 and 10.", "type": "comment" }, - "72": { - "file_id": 4, + "66": { + "file_id": 3, "content": " operations_graph.append_operation(operations.Score(3, False))\n keep_best3 = operations.KeepBestN(1, True)\n keep_best3.add_predecessor(keep_best2)\n operations_graph.append_operation(keep_best3)\n return operations_graph\ndef got2() -> operations.GraphOfOperations:\n \"\"\"\n Generates the Graph of Operations for the GoT2 method, where partial\n documents are merged.\n :return: Graph of Operations\n :rtype: GraphOfOperations\n \"\"\"\n operations_graph = operations.GraphOfOperations()\n sub_parts = []\n for i in range(0, 4, 2): # should be at most 16 parts\n sub_text = operations.Selector(\n lambda thoughts, list_id=i: [\n operations.Thought(\n state={**thoughts[0].state, \"parts\": {list_id, list_id + 1}}\n )\n ]\n )\n operations_graph.add_operation(sub_text)\n gen_nda = operations.Generate(1, 5)\n gen_nda.add_predecessor(sub_text)\n operations_graph.add_operation(gen_nda)\n score_nda = operations.Score(3, False)", "type": "code", "location": "/examples/doc_merge/doc_merge.py:562-593" }, - "73": { - "file_id": 4, + "67": { + "file_id": 3, "content": "This code generates a Graph of Operations for the GoT2 method, which merges partial documents. It creates an initial GraphOfOperations object and iteratively adds operations such as Selectors, Generators, and Scorers to the graph. Each iteration consists of selecting specific thoughts, generating new documents, and scoring them. The resulting graph is returned.", "type": "comment" }, - "74": { - "file_id": 4, + "68": { + "file_id": 3, "content": " score_nda.add_predecessor(gen_nda)\n operations_graph.add_operation(score_nda)\n keep_best_nda = operations.KeepBestN(1, True)\n keep_best_nda.add_predecessor(score_nda)\n operations_graph.add_operation(keep_best_nda)\n sub_parts.append(keep_best_nda)\n while len(sub_parts) > 1:\n new_sub_parts = []\n for i in range(0, len(sub_parts), 2):\n if i + 1 == len(sub_parts):\n new_sub_parts.append(sub_parts[i])\n continue\n aggregate = operations.Aggregate(5)\n aggregate.add_predecessor(sub_parts[i])\n aggregate.add_predecessor(sub_parts[i + 1])\n operations_graph.add_operation(aggregate)\n score = operations.Score(3, False)\n score.add_predecessor(aggregate)\n operations_graph.add_operation(score)\n keep_best = operations.KeepBestN(1, True)\n keep_best.add_predecessor(score)\n operations_graph.add_operation(keep_best)\n gen_nda = operations.Generate(1, 5)", "type": "code", "location": "/examples/doc_merge/doc_merge.py:594-619" }, - "75": { - "file_id": 4, + "69": { + "file_id": 3, "content": "This code is creating an operations graph for a document merge process. It starts with adding Score and Generate nodes, then iteratively adds Aggregate, Score, and KeepBestN nodes until there's only one node left in the sub_parts list. The Score nodes are used to calculate similarity scores, while the KeepBestN nodes keep the best result from the previous operation. The operations graph is then built with these operations added in sequence.", "type": "comment" }, - "76": { - "file_id": 4, + "70": { + "file_id": 3, "content": " gen_nda.add_predecessor(keep_best)\n operations_graph.add_operation(gen_nda)\n score_nda = operations.Score(3, False)\n score_nda.add_predecessor(gen_nda)\n operations_graph.add_operation(score_nda)\n keep_best_nda = operations.KeepBestN(1, True)\n keep_best_nda.add_predecessor(score_nda)\n keep_best_nda.add_predecessor(keep_best)\n operations_graph.add_operation(keep_best_nda)\n new_sub_parts.append(keep_best_nda)\n sub_parts = new_sub_parts\n return operations_graph\ndef run(\n data_ids: List[int],\n methods: List[Callable[[], operations.GraphOfOperations]],\n budget: float,\n lm_name: str,\n) -> float:\n \"\"\"\n Controller function that executes each specified method for each specified\n sample while the budget is not exhausted.\n :param data_ids: Indices of the sample to be run.\n :type data_ids: List[int]\n :param methods: List of functions to generate Graphs of Operations.\n :type methods: Each function generates a Graph of Operation.", "type": "code", "location": "/examples/doc_merge/doc_merge.py:620-649" }, - "77": { - "file_id": 4, + "71": { + "file_id": 3, "content": "This code is creating a graph of operations for language model inference. It defines several nodes and adds them to the operations graph, including generation, scoring, and keeping the best node. The function run() executes methods for each specified sample within the budget limit.", "type": "comment" }, - "78": { - "file_id": 4, + "72": { + "file_id": 3, "content": " :param budget: Language model budget for the execution in dollars.\n :type budget: float\n :param lm_name: Name of the language model to be used.\n :type lm_name: str\n :return: Spent budget in dollars.\n :rtype: float\n \"\"\"\n orig_budget = budget\n data_path = os.path.join(os.path.dirname(__file__), \"documents.csv\")\n data = []\n with open(data_path, \"r\", encoding=\"utf8\") as f:\n reader = csv.reader(f)\n next(reader)\n for row in reader:\n row[0] = int(row[0])\n data.append(row)\n if data_ids is None or len(data_ids) == 0:\n data_ids = list(range(len(data)))\n selected_data = [data[i] for i in data_ids]\n results_dir = os.path.join(os.path.dirname(__file__), \"results\")\n if not os.path.exists(results_dir):\n os.makedirs(results_dir)\n timestamp = datetime.datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n extra_info = f\"{lm_name}_{'-'.join([method.__name__ for method in methods])}\"\n folder_name = f\"{extra_info}_{timestamp}\"\n results_folder = os.path.join(results_dir, folder_name)", "type": "code", "location": "/examples/doc_merge/doc_merge.py:650-679" }, - "79": { - "file_id": 4, + "73": { + "file_id": 3, "content": "This function takes a budget, language model name, and optional data IDs as input. It reads the \"documents.csv\" file, filters the data based on provided data IDs, and then creates folders to save results for different methods using the specified language model. The function returns the spent budget in dollars.", "type": "comment" }, - "80": { - "file_id": 4, + "74": { + "file_id": 3, "content": " os.makedirs(results_folder)\n config = {\n \"data\": selected_data,\n \"methods\": [method.__name__ for method in methods],\n \"lm\": lm_name,\n \"budget\": budget,\n }\n with open(os.path.join(results_folder, \"config.json\"), \"w\") as f:\n json.dump(config, f)\n logging.basicConfig(\n filename=os.path.join(results_folder, \"log.log\"),\n filemode=\"w\",\n format=\"%(name)s - %(levelname)s - %(message)s\",\n level=logging.DEBUG,\n )\n for method in methods:\n os.makedirs(os.path.join(results_folder, method.__name__))\n for data in selected_data:\n logging.info(f\"Running data {data[0]}: {data[1]}\")\n if budget <= 0.0:\n logging.error(\n f\"Budget has been depleted, stopping. Data {data[0]} has not been run.\"\n )\n break\n for method in methods:\n logging.info(f\"Running method {method.__name__}\")\n logging.info(f\"Budget left: {budget}\")\n if budget <= 0.0:\n logging.error(", "type": "code", "location": "/examples/doc_merge/doc_merge.py:680-712" }, - "81": { - "file_id": 4, + "75": { + "file_id": 3, "content": "This code sets up a results folder, saves the configuration file in JSON format, and initializes logging. It then iterates over selected data and methods, keeping track of remaining budget. If the budget becomes zero, it stops execution and logs an error message.", "type": "comment" }, - "82": { - "file_id": 4, + "76": { + "file_id": 3, "content": " f\"Budget has been depleted, stopping. Method {method.__name__} has not been run.\"\n )\n break\n lm = language_models.ChatGPT(\n os.path.join(\n os.path.dirname(__file__),\n \"../../graph_of_thoughts/language_models/config.json\",\n ),\n model_name=lm_name,\n cache=True,\n )\n operations_graph = method()\n executor = controller.Controller(\n lm,\n operations_graph,\n DocMergePrompter(),\n DocMergeParser(),\n {\n \"documents\": [data[2], data[3], data[4], data[5]],\n \"parts\": set(),\n \"current\": \"\",\n \"method\": method.__name__,\n },\n )\n try:\n executor.run()\n except Exception as e:\n logging.error(f\"Exception: {e}\")\n path = os.path.join(", "type": "code", "location": "/examples/doc_merge/doc_merge.py:713-741" }, - "83": { - "file_id": 4, + "77": { + "file_id": 3, "content": "This code chunk initializes a language model, creates an operations graph, and sets up an executor for running the method. If the budget is depleted, it will stop execution. The code then attempts to run the executor and logs any exceptions that occur during execution.", "type": "comment" }, - "84": { - "file_id": 4, + "78": { + "file_id": 3, "content": " results_folder,\n method.__name__,\n f\"{data[0]}.json\",\n )\n for operation in operations_graph.operations:\n for thought in operation.thoughts:\n thought.state[\"parts\"] = list(thought.state[\"parts\"])\n executor.output_graph(path)\n budget -= lm.cost\n return orig_budget - budget\nif __name__ == \"__main__\":\n \"\"\"\n Input (x1, x2, x3, x4): Four NDAs\n Output (y): A new combined NDA\n Evaluation: According to information coverage without repetition (scored by the LLM)\n \"\"\"\n budget = 30\n samples = [item for item in range(0, 50)]\n approaches = [io, cot, tot, got, got2]\n spent = run(samples, approaches, budget, \"chatgpt\")\n logging.info(f\"Spent {spent} out of {budget} budget.\")", "type": "code", "location": "/examples/doc_merge/doc_merge.py:742-767" }, - "85": { - "file_id": 4, + "79": { + "file_id": 3, "content": "This code takes input NDAs, combines them, and evaluates the combined result using an LLM (Language Model). The output is scored based on information coverage without repetition. A budget of 30 is set, with sampling from range(0, 50), and approaches io, cot, tot, got, and got2 are used. The code logs the spent budget after running the function \"run\".", "type": "comment" }, - "86": { - "file_id": 5, + "80": { + "file_id": 4, "content": "/examples/doc_merge/plot.py", "type": "filepath" }, - "87": { - "file_id": 5, + "81": { + "file_id": 4, "content": "The code imports libraries, defines a get_complete_results() function, reads JSON data and stores it in a dictionary, sorts the keys, retrieves final scores for each method using results_complete dictionary, and includes functions to retrieve plotting data and plot boxplots for scores with total cost bar plots on a secondary y-axis. It also sets custom y-axis positions and labels for plotting the solved status of various methods, saving it as a PDF, and generates data from given results while initializing an instance of the DocMerge class with a cost_upper limit of 15.", "type": "summary" }, - "88": { - "file_id": 5, + "82": { + "file_id": 4, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():\n results_complete[key] = sorted(", "type": "code", "location": "/examples/doc_merge/plot.py:1-29" }, - "89": { - "file_id": 5, + "83": { + "file_id": 4, "content": "The code imports necessary libraries, defines a function get_complete_results(), and reads data from JSON files in specified directories. It collects this information into a dictionary, sorts the keys, and returns the complete results for further processing.", "type": "comment" }, - "90": { - "file_id": 5, + "84": { + "file_id": 4, "content": " results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 0\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in reversed(result[\"data\"]):\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n if \"operation\" in op and op[\"operation\"] == \"score\":\n try:\n score = max(op[\"scores\"])\n break\n except:\n continue\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]\n )\n scores[method] = sorted(scores[method], key=lambda x: x[0])", "type": "code", "location": "/examples/doc_merge/plot.py:30-59" }, - "91": { - "file_id": 5, + "85": { + "file_id": 4, "content": "This code retrieves and sorts final scores for each method in the results_complete dictionary. It loops through each method, then through each result for that method, calculating the score, solved status, prompt/completion tokens, and cost from the reversed data list. Finally, it appends these values to the corresponding method's scores list, then sorts those scores by key.", "type": "comment" }, - "92": { - "file_id": 5, + "86": { + "file_id": 4, "content": " return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"got\", \"got2\"],\n model=\"GPT-3.5\",\n num_ndas=4,\n y_lower=0,\n y_upper=10,\n cost_upper=1.8,\n display_solved=True,\n annotation_offset=1,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n scores_ordered = [\n [score for score in results[method][\"scores\"]] for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis", "type": "code", "location": "/examples/doc_merge/plot.py:60-96" }, - "93": { - "file_id": 5, + "87": { + "file_id": 4, "content": "Function get_plotting_data returns a dictionary of plotting data for different methods, which includes scores, number of solved problems, and costs. Function plot_results plots the results using given parameters like methods order, model, number of nodes, y-axis limits, cost upper limit, etc. The function first ensures that the specified methods are in the result dictionary and then extracts ordered scores and total costs for each method from the results dictionary.", "type": "comment" }, - "94": { - "file_id": 5, + "88": { + "file_id": 4, "content": " fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n methods_labels = [\"IO\", \"CoT\", \"ToT\", \"GoT\", \"GoT2\"]\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(methods_labels)\n # ax.set_xlabel(\"Approach\")\n ax.set_ylim(y_lower, 12 if display_solved else 9.75)\n plt.yticks(fontsize=fig_fontsize)\n if display_left_ylabel:\n ax.set_ylabel(\n f\"Score (out of 10); the higher the better\", fontsize=fig_fontsize\n )\n # ax.set_title(f\"Document Merging\")\n ax2 = ax.twinx()\n ax2.bar(\n positions,\n total_costs,\n alpha=0.5,\n color=\"blue\",\n label=\"Total Cost ($); the lower the better\",\n )\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())", "type": "code", "location": "/examples/doc_merge/plot.py:97-132" }, - "95": { - "file_id": 5, + "89": { + "file_id": 4, "content": "Creates a boxplot for scores, sets ticks and labels for x-axis, adjusts y-limits, adds a blue bar plot with total costs on the right y-axis, and sets corresponding tick colors and limits.", "type": "comment" }, - "96": { - "file_id": 5, + "90": { + "file_id": 4, "content": " tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]\n ax.text(\n count, annotation_height, f\"Solved: {solved}\", ha=\"center\", va=\"bottom\"\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"doc_merge_{model}_{num_ndas}.pdf\", bbox_inches=\"tight\")\nplot_results(\n get_plotting_data(\"results/\"),\n num_ndas=4,\n display_solved=False,\n model=\"GPT-3.5\",\n y_upper=10,\n display_left_ylabel=True,", "type": "code", "location": "/examples/doc_merge/plot.py:133-168" }, + "91": { + "file_id": 4, + "content": "This code is setting custom tick positions and labels for the y-axis of a plot, displaying the solved status of various methods, saving the plot as a PDF, and generating plotting data from given results.", + "type": "comment" + }, + "92": { + "file_id": 4, + "content": " cost_upper=15,\n)", + "type": "code", + "location": "/examples/doc_merge/plot.py:169-170" + }, + "93": { + "file_id": 4, + "content": "This code snippet is initializing a function, specifically an instance of the class \"DocMerge\", with the parameter 'cost_upper' set to 15. The purpose of this function might be to perform document merging or some similar operation with a specified upper cost limit.", + "type": "comment" + }, + "94": { + "file_id": 5, + "content": "/examples/doc_merge/pure_documents.json", + "type": "filepath" + }, + "95": { + "file_id": 5, + "content": "Both comments discuss company-supplier agreements, covering aspects such as NDAs, IT system maintenance, late delivery penalties, termination provisions, confidentiality clauses, and governing laws.", + "type": "summary" + }, + "96": { + "file_id": 5, + "content": "[\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\n1. Agreement between [Your Company Name] and [Recipient Name] on [Date].\\n2. Information sharing for the purpose of [specific project or purpose].\\n3. \\\"Confidential Information\\\" includes all potentially commercially valuable information, specifically software development tactics, processes, and in-house research results.\\n4. Receiving party is obligated to protect the Confidential Information, use it solely for the disclosed purpose, and not disclose it without consent.\\n5. Breach penalties include injunctive relief, other remedies, and a $200,000 fee per breach.\\n6. The Agreement applies to the Parties and their successors and assigns. It contains all related agreements and lack of enforcement doesn't imply waiver.\\n7. The Agreement is under the laws of [State].\\n8. Signed by [Your Company Name] and [Recipient Name] at the above date.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective from [Effective Date], this NDA involves [Your Company Name]", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:1-3" + }, "97": { "file_id": 5, - "content": "This code is setting custom tick positions and labels for the y-axis of a plot, displaying the solved status of various methods, saving the plot as a PDF, and generating plotting data from given results.", + "content": "This code appears to contain two example Non-Disclosure Agreements (NDA) in JSON format, with fields such as parties involved, purpose of information sharing, definition of Confidential Information, obligations and penalties for breaching the agreement, applicable laws, and signatures.", "type": "comment" }, "98": { "file_id": 5, - "content": " cost_upper=15,\n)", + "content": " (\\\"Disclosing Party\\\"), and [Recipient Name] (\\\"Receiving Party\\\").\\n\\n1. Purpose: The Disclosing Party will disclose confidential information related to [Topic of Research] to the Receiving Party for [Purpose].\\n\\n2. Confidential Information: Defined as all non-public reports, data, designs, and other materials provided by the Disclosing Party to the Receiving Party.\\n\\n3. Receiving Party's Obligations:\\n a. Use, reproduce, or distribute the confidential information only for the agreed purpose.\\n b. Restrict access to the information to necessary parties, ensuring they abide by strict confidentiality.\\n c. Return or destroy all confidential information upon request or at the end of the agreement.\\n\\n4. Exclusions: Information will not be classified as confidential if it is already known to the Receiving Party, publicly known, or independently developed by the Receiving Party.\\n\\n5. Non-Competition: The Receiving Party will not engage in any competing business against the Disclo", "type": "code", - "location": "/examples/doc_merge/plot.py:169-170" + "location": "/examples/doc_merge/pure_documents.json:3-3" }, "99": { "file_id": 5, - "content": "This code snippet is initializing a function, specifically an instance of the class \"DocMerge\", with the parameter 'cost_upper' set to 15. The purpose of this function might be to perform document merging or some similar operation with a specified upper cost limit.", + "content": "The code is a confidentiality agreement between a Disclosing Party and a Recipient (Receiving Party) involving the exchange of confidential information related to a specific topic for a stated purpose. It outlines obligations for the Receiving Party, exclusions, and non-competition clauses.", "type": "comment" } } \ No newline at end of file diff --git a/docs/data/1.json b/docs/data/1.json index d31dc52..eb3f941 100644 --- a/docs/data/1.json +++ b/docs/data/1.json @@ -1,551 +1,552 @@ { "100": { - "file_id": 6, - "content": "/examples/doc_merge/pure_documents.json", - "type": "filepath" - }, - "101": { - "file_id": 6, - "content": "Both comments discuss company-supplier agreements, covering aspects such as NDAs, IT system maintenance, late delivery penalties, termination provisions, confidentiality clauses, and governing laws.", - "type": "summary" - }, - "102": { - "file_id": 6, - "content": "[\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\n1. Agreement between [Your Company Name] and [Recipient Name] on [Date].\\n2. Information sharing for the purpose of [specific project or purpose].\\n3. \\\"Confidential Information\\\" includes all potentially commercially valuable information, specifically software development tactics, processes, and in-house research results.\\n4. Receiving party is obligated to protect the Confidential Information, use it solely for the disclosed purpose, and not disclose it without consent.\\n5. Breach penalties include injunctive relief, other remedies, and a $200,000 fee per breach.\\n6. The Agreement applies to the Parties and their successors and assigns. It contains all related agreements and lack of enforcement doesn't imply waiver.\\n7. The Agreement is under the laws of [State].\\n8. Signed by [Your Company Name] and [Recipient Name] at the above date.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective from [Effective Date], this NDA involves [Your Company Name]", - "type": "code", - "location": "/examples/doc_merge/pure_documents.json:1-3" - }, - "103": { - "file_id": 6, - "content": "This code appears to contain two example Non-Disclosure Agreements (NDA) in JSON format, with fields such as parties involved, purpose of information sharing, definition of Confidential Information, obligations and penalties for breaching the agreement, applicable laws, and signatures.", - "type": "comment" - }, - "104": { - "file_id": 6, - "content": " (\\\"Disclosing Party\\\"), and [Recipient Name] (\\\"Receiving Party\\\").\\n\\n1. Purpose: The Disclosing Party will disclose confidential information related to [Topic of Research] to the Receiving Party for [Purpose].\\n\\n2. Confidential Information: Defined as all non-public reports, data, designs, and other materials provided by the Disclosing Party to the Receiving Party.\\n\\n3. Receiving Party's Obligations:\\n a. Use, reproduce, or distribute the confidential information only for the agreed purpose.\\n b. Restrict access to the information to necessary parties, ensuring they abide by strict confidentiality.\\n c. Return or destroy all confidential information upon request or at the end of the agreement.\\n\\n4. Exclusions: Information will not be classified as confidential if it is already known to the Receiving Party, publicly known, or independently developed by the Receiving Party.\\n\\n5. Non-Competition: The Receiving Party will not engage in any competing business against the Disclo", - "type": "code", - "location": "/examples/doc_merge/pure_documents.json:3-3" - }, - "105": { - "file_id": 6, - "content": "The code is a confidentiality agreement between a Disclosing Party and a Recipient (Receiving Party) involving the exchange of confidential information related to a specific topic for a stated purpose. It outlines obligations for the Receiving Party, exclusions, and non-competition clauses.", - "type": "comment" - }, - "106": { - "file_id": 6, + "file_id": 5, "content": "sing Party during the agreement and one year after its termination.\\n\\n6. Term and Termination: The agreement is valid for [e.g., \\\"two years\\\"], unless terminated earlier with [e.g., \\\"30 days\\\"] written notice. The Receiving Party's non-disclosure and non-competition obligations persist post-termination.\\n\\n7. General Provisions:\\n a. Governing Law: [Your State]'s laws apply.\\n b. Amendments: Only valid if written and signed by both parties.\\n c. Entire Agreement: This contract overrules previous related agreements.\\n\\nSigned as of the Effective Date by [Your Company Name] - Disclosing Party [Recipient Name] - Receiving Party.\",\n \"CONFIDENTIALITY & NON-DISCLOSURE AGREEMENT\\n\\n Entities Involved:\\n Effective [Date], between [AquaBlue Innovations], established in [State], and [PineTree Solutions], a registered entity.\\n\\n Objective:\\n To safeguard classified data during talks of a potential technological alliance.\\n\\n Specification of Protected Information:\\n ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:3-4" }, - "107": { - "file_id": 6, + "101": { + "file_id": 5, "content": "The code defines a confidentiality and non-disclosure agreement between AquaBlue Innovations and PineTree Solutions, with provisions for protecting classified information during potential technological alliance discussions.", "type": "comment" }, - "108": { - "file_id": 6, + "102": { + "file_id": 5, "content": "Particularly:\\n\\na. System designs and architectural schematics.\\nb. Proprietary computational algorithms.\\n\\n Receiver's Obligations:\\n a. Maintain strict non-disclosure using best practices.\\n b. Employ solely for the aforementioned aim.\\n c. No unveiling without explicit authorization.\\n\\n Violation Ramifications:\\n A charge of $280,000 for every infringement, plus possible legal proceedings.\\n\\n General Terms:\\n Binding for both parties and any successors. This encapsulates the entire accord.\\n\\n Legal Reference:\\n Governed as per [State]'s legal framework.\\n\\n Attestation:\\n Duly signed on [Date].\\n\\n[AquaBlue Innovations] [PineTree Solutions]\",\n \"SECRECY & DISCLOSURE AGREEMENT\\n\\n Contracting Parties:\\n Dated [Date], drawn between [AquaBlue Innovations], a [State]-based corporation, and [PineTree Solutions], a licensed organization.\\n\\n Aim:\\n To protect exclusive insights amidst dialogues for a technological partnership.\\n\\n C", "type": "code", "location": "/examples/doc_merge/pure_documents.json:4-5" }, - "109": { - "file_id": 6, + "103": { + "file_id": 5, "content": "Code snippet describes a confidentiality agreement between AquaBlue Innovations and PineTree Solutions, detailing non-disclosure obligations, violation consequences, and legal reference.", "type": "comment" }, - "110": { - "file_id": 6, + "104": { + "file_id": 5, "content": "ategorization of Sensitive Data:\\n Includes:\\n\\na. Internal software blueprints.\\nb. Intellectual property awaiting patents.\\n\\n Commitments of Recipient:\\n a. Uphold confidentiality, ensuring data integrity.\\n b. Utilize strictly for collaborative ventures.\\n c. No exposure without prior consensus.\\n\\n Repercussions for Non-Compliance:\\n $295,000 fine for each transgression, and the option for legal recourse.\\n\\n Overall Provisions:\\n Legally enforceable for signatories and successors. Complete and sole agreement.\\n\\n Juridical Standpoint:\\n Under the auspices of [State] laws.\\n\\n Ratification:\\n Confirmed and endorsed on [Date].\\n\\n[AquaBlue Innovations] [PineTree Solutions]\",\n \"This Non-Disclosure and Non-Competition Agreement is made between [Your Company Name] and [Contractor Name/Company].\\n\\n1. Confidentiality: The Contractor acknowledges access to the Company's confidential information during their relationship.\\n\\n2. Non-Disclosure: The", "type": "code", "location": "/examples/doc_merge/pure_documents.json:5-6" }, - "111": { - "file_id": 6, + "105": { + "file_id": 5, "content": "The code provides the details of a non-disclosure and non-competition agreement between AquaBlue Innovations and PineTree Solutions. It outlines the categorization of sensitive data, commitments of the recipient, repercussions for non-compliance, overall provisions, jurisdictional standpoint, and ratification details.", "type": "comment" }, - "112": { - "file_id": 6, + "106": { + "file_id": 5, "content": " Contractor agrees not to disclose, use, reproduce, or distribute this confidential information unless necessary for their obligations.\\n\\n3. Non-Competition: The Contractor agrees not to compete with the company or assist others in doing so for one year after the termination of their relationship. They also agree not to solicit the company's clients or customers for the benefit of a competitor for one year.\\n\\n4. Return of Confidential Information: At the end of the relationship or upon the company's request, the Contractor will return all confidential information and copies thereof.\\n\\n5. Remedies: For any breach, the Company may seek specific performance and injunctive relief, in addition to other remedies.\\n\\n6. Governing Law: The Agreement is governed by the laws of [Your State].\\n\\n7. Entire Agreement: This document replaces all previous agreements and understandings on the subject.\\n\\nBoth parties acknowledge understanding and voluntarily accepting the Agreement.\\n\\nSignatures required from [Your Company Name] and [Contractor Name/Company].\",", "type": "code", "location": "/examples/doc_merge/pure_documents.json:6-6" }, - "113": { - "file_id": 6, + "107": { + "file_id": 5, "content": "This code contains the terms of a contract between a company and a contractor, outlining confidentiality obligations, non-competition agreement, return of information, legal remedies, governing law, and acknowledgment by both parties.", "type": "comment" }, - "114": { - "file_id": 6, + "108": { + "file_id": 5, "content": " \"This Loyalty Agreement is between [Company Name] and [Employee Full Name], where the company agrees to provide specialized training at no cost to the employee, who in turn commits to work for the company for a specified period. If the employee leaves the company within two years after completing training, they must pay $50,000 as compensation for training costs, payable within 30 days of termination. Exceptions to this repayment include termination without cause, resignation due to breach of agreement by the company, or other agreed upon circumstances. Any changes to this agreement must be in writing and signed by both parties, and the agreement will be governed by the laws of [State/Country]. This agreement is binding to all involved parties and their successors. Both the company and the employee sign to attest to these terms.\",\n \"EMPLOYEE LOYALTY AGREEMENT\\n\\nThis agreement is entered into by [Company Name] and [Employee Name] to protect the company's business interests, goodw", "type": "code", "location": "/examples/doc_merge/pure_documents.json:7-8" }, - "115": { - "file_id": 6, + "109": { + "file_id": 5, "content": "This is a loyalty agreement between a company and an employee, outlining the specialized training provided to the employee at no cost in exchange for their commitment to work for the company for a specified period. The employee may need to pay compensation if they leave within two years after completing training, subject to certain exceptions. The agreement is governed by the laws of [State/Country] and is binding to all involved parties and their successors.", "type": "comment" }, - "116": { - "file_id": 6, + "110": { + "file_id": 5, "content": "ill, and confidential information, and affirm employee's loyalty. \\n\\n1. Non-disclosure: Employee agrees to not disclose or use company's confidential information, during or post-employment. \\n\\n2. Non-competition: Employee will not work for or establish a competitor within [e.g., \\\"50\\\"] miles from the company for [e.g., \\\"12\\\"] months post-employment.\\n\\n3. Non-solicitation: Employee will not solicit clients or employees of the company for [e.g., \\\"12\\\"] months post-employment.\\n\\n4. Return of Property: Employee will return all company property upon termination.\\n\\n5. Remedies: Company can seek injunction for a breach or potential breach of this agreement.\\n\\n6. Severability: If any provision of this agreement is held invalid, the remainder of the Agreement will continue.\\n\\n7. Governing Law: This agreement will be governed by the laws of [State, e.g., \\\"California\\\"].\\n\\n8. Agreement: This is the entire agreement and supersedes prior negotiations.\\n\\n9. Amendments: Any changes must ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:8-8" }, - "117": { - "file_id": 6, + "111": { + "file_id": 5, "content": "This code outlines the terms and conditions of an employment agreement, including non-disclosure, non-competition, non-solicitation clauses, property return policy, legal remedies, severability, governing law, and amendment procedures.", "type": "comment" }, - "118": { - "file_id": 6, + "112": { + "file_id": 5, "content": "be in writing and signed by both parties.\\n\\nSignatures of both parties indicate agreement to these terms.\\n\\n[Company Name] - Authorized Signatory [Employee Name]\",\n \"This Loyalty Agreement is between [Company Name] and [Contractor Company Name]. The Agreement ensures the Contractor's loyalty and confidentiality towards the Company during and post engagement. Contractor agrees not to use or disclose the Company's confidential information, or engage in competing business or solicitation for a period of [e.g., \\\"12\\\"] months post termination. Contractor must return all Company property upon termination. In case of breach, Company can seek legal remedies including injunction. The Agreement remains valid even if a provision is held invalid. The Agreement follows [State, e.g., \\\"California\\\"] laws and replaces all previous understandings. It can be amended only in writing with both parties' signature.\",\n \"B2B CONTRACTOR LOYALTY AGREEMENT\\n\\nThis Agreement is made on _____ day of ______", "type": "code", "location": "/examples/doc_merge/pure_documents.json:8-10" }, - "119": { - "file_id": 6, + "113": { + "file_id": 5, "content": "Code snippet represents a loyalty agreement template between a company and a contractor. It includes confidentiality, non-compete and non-solicitation clauses, return of property upon termination, legal remedies in case of breach, and adherence to specified state laws. The agreement can only be amended with both parties' written signature.", "type": "comment" }, - "120": { - "file_id": 6, + "114": { + "file_id": 5, "content": ", 20, between [Company Name], located at [Company Address] (\\\"Company\\\"), and [Contractor Company Name], located at [Contractor Address] (\\\"Contractor\\\").\\n\\n1. CONFIDENTIALITY\\n\\nContractor agrees not to disclose, use, or allow the use of the Company's confidential information during or after the relationship, except as required for their services to the Company.\\n\\n2. NON-COMPETITION\\n\\nFor 12 months post-relationship, the Contractor won't provide similar services to any entity competing with the Company within a 50-mile radius of any Company location.\\n\\n3. NON-SOLICITATION\\n\\nFor 12 months post-relationship, the Contractor won't solicit or induce any entity or individual connected to the Company to cease or reduce their relationship with the Company.\\n\\n4. RETURN OF PROPERTY\\n\\nUpon relationship termination or on Company's request, the Contractor will immediately return all Company property and data.\\n\\n5. PENALTY FOR BREACH\\n\\nIn the event of a breach of this Agreement, the Contra", "type": "code", "location": "/examples/doc_merge/pure_documents.json:10-10" }, - "121": { - "file_id": 6, + "115": { + "file_id": 5, "content": "This code is a contractual agreement between Company Name and Contractor Company Name, outlining confidentiality, non-competition, non-solicitation, property return, and penalty for breach clauses.", "type": "comment" }, - "122": { - "file_id": 6, + "116": { + "file_id": 5, "content": "ctor shall pay the Company a penalty of $50,000.\\n\\n6. GOVERNING LAW\\n\\nThis Agreement is governed by [State, e.g., \\\"California\\\"] laws.\\n\\n7. ENTIRE AGREEMENT\\n\\nThis Agreement supersedes prior discussions and agreements between the parties.\\n\\nBy signing below, the parties agree to these terms.\\n\\n[Company Name] - Signatory [Contractor Company Name] - Signatory\\nDate: _______________________ Date: _______________________\",\n \"B2B CONTRACTOR LOYALTY AGREEMENT\\n\\nThis Agreement is made on _____ day of ______, 20, between [Company Name], located at [Company Address] (\\\"Company\\\"), and [Contractor Company Name], located at [Contractor Address] (\\\"Contractor\\\").\\n\\n1. DEFINITION OF CONFIDENTIAL INFORMATION\\n\\nFor the purposes of this Agreement, \\\"confidential information\\\" shall refer to research results, software created, devices produced by the Company, and any other information deemed proprietary or not generally known to the public.\\n\\n2. CONFIDENTIALITY\\n\\nContractor agrees not to ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:10-11" }, - "123": { - "file_id": 6, + "117": { + "file_id": 5, "content": "This code represents a B2B contractor loyalty agreement between Company Name and Contractor Company Name, effective on a specific date. The contract outlines terms including the definition of confidential information, non-disclosure obligations, payment terms, governing law, and agreement superseding prior discussions or agreements. Both parties must sign below to agree to these terms.", "type": "comment" }, - "124": { - "file_id": 6, + "118": { + "file_id": 5, "content": "disclose, use, or allow the use of the Company's confidential information, as defined herein, during or after the relationship, except as required for their services to the Company.\\n\\n3. NON-COMPETITION\\n\\nFor 12 months post-relationship, the Contractor won't provide similar services to any entity competing with the Company within a 50-mile radius of any Company location.\\n\\n4. NON-SOLICITATION\\n\\nFor 12 months post-relationship, the Contractor won't solicit or induce any entity or individual connected to the Company to cease or reduce their relationship with the Company.\\n\\n5. RETURN OF PROPERTY\\n\\nUpon relationship termination or on Company's request, the Contractor will immediately return all Company property, including all items containing or pertaining to confidential information.\\n\\n6. PENALTY FOR BREACH\\n\\nIn the event of a breach of this Agreement, the Contractor shall pay the Company a penalty of $50,000.\\n\\n7. GOVERNING LAW\\n\\nThis Agreement is governed by [State, e.g., \\\"Ca", "type": "code", "location": "/examples/doc_merge/pure_documents.json:11-11" }, - "125": { - "file_id": 6, + "119": { + "file_id": 5, "content": "Contract specifying confidentiality, non-compete, non-solicitation, property return, breach penalty, and governing law for a relationship between a company and contractor.", "type": "comment" }, - "126": { - "file_id": 6, + "120": { + "file_id": 5, "content": "lifornia\\\"] laws.\\n\\n8. ENTIRE AGREEMENT\\n\\nThis Agreement supersedes prior discussions and agreements between the parties.\\n\\nBy signing below, the parties agree to these terms.\\n\\n[Company Name] - Signatory [Contractor Company Name] - Signatory\",\n \"The Non-Disclosure Agreement (NDA) dated [Date] is between [Company], based in [Country/State], and [Supplier], also incorporated in [Country/State]. The Company intends to disclose confidential information to the Supplier for [purpose]. This confidential data can include business strategies, financial data, customer information, and product designs. The Supplier agrees to refrain from sharing this information, barring any legal requirements. Exceptions to this confidentiality are in cases where the information becomes public or was already known by the Supplier before the Company's disclosure. If the Supplier breaches this agreement, they face a financial penalty of [$]. The NDA is valid for [X years], unless the Company provides writte", "type": "code", "location": "/examples/doc_merge/pure_documents.json:11-12" }, - "127": { - "file_id": 6, + "121": { + "file_id": 5, "content": "The code contains the text of a Non-Disclosure Agreement (NDA) between Company and Supplier, including terms such as purpose of sharing confidential information, non-disclosure obligations for Supplier, exceptions, penalties, and agreement validity.", "type": "comment" }, - "128": { - "file_id": 6, + "122": { + "file_id": 5, "content": "n termination. Upon the Company's request, the Supplier must return or destroy all copies of Confidential Information. This agreement supersedes previous agreements and can only be altered by a written document approved by both parties. The NDA is governed by the laws of [specific country/state].\",\n \"NON-DISCLOSURE AND NON-COMPETE AGREEMENT\\n\\nEffective Date: [Date]\\n\\nPARTIES:\\n\\n Company: [Full Legal Name of Company], located at [Company Address].\\n Supplier: [Full Legal Name of Supplier], located at [Supplier Address].\\n\\n1. CONFIDENTIALITY:\\n\\nSupplier shall not disclose Company's confidential information, which includes business strategies, financial data, and customer details, to any third party. This confidentiality obligation lasts for [X years, e.g., \\\"5 years\\\"] from the date of disclosure.\\n\\n2. NON-COMPETITION:\\n\\nFor [X years, e.g., \\\"3 years\\\"] following the termination of their business relationship, Supplier agrees not to engage in or start any business that dir", "type": "code", "location": "/examples/doc_merge/pure_documents.json:12-13" }, - "129": { - "file_id": 6, + "123": { + "file_id": 5, "content": "This code contains a non-disclosure and non-compete agreement between a company and a supplier, with details including the effective date, parties involved, confidentiality obligations, non-competition restrictions, termination procedures, and governing laws.", "type": "comment" }, - "130": { - "file_id": 6, + "124": { + "file_id": 5, "content": "ectly competes with Company within a [X mile/km radius, e.g., \\\"50-mile radius\\\"] of Company's primary business location.\\n\\n3. PENALTY FOR BREACH:\\n\\nShould Supplier breach this Agreement, they shall pay Company a penalty of [specific amount, e.g., \\\"$50,000\\\"], in addition to any other legal remedies available to Company.\\n\\n4. RETURN OF INFORMATION:\\n\\nUpon request, Supplier shall return or destroy all of Company's confidential information and confirm its deletion in writing.\\n\\n5. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of New York\\\"].\\n\\nAGREEMENT ACKNOWLEDGEMENT:\\n\\n__________ [Company] __________ [Supplier]\",\n \"DATA ANALYSIS EMPLOYEE AGREEMENT\\n\\nThis Agreement (\\\"Agreement\\\") is made and entered into as of [Date], by and between [Company Name], a [legal structure, e.g., \\\"corporation\\\"] incorporated under the laws of [State/Country], with its principal place of business at [Company Address], herein referred to as ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:13-14" }, - "131": { - "file_id": 6, + "125": { + "file_id": 5, "content": "This code represents a contract or agreement between a company and a supplier, outlining the terms of their business relationship. It includes provisions such as competition limits, penalty for breach, return of information, governing law, and acknowledgement by both parties.", "type": "comment" }, - "132": { - "file_id": 6, + "126": { + "file_id": 5, "content": "the \\\"Company,\\\" and [Employee Name], an individual residing at [Employee Address], herein referred to as the \\\"Employee.\\\"\\n\\n Position and Duties:\\n a. The Company hereby employs Employee in the capacity of Data Analyst.\\n b. The Employee's primary duties will be to [specific data analysis tasks, e.g., \\\"analyze sales data, forecast trends, and produce reports for managerial review\\\"].\\n\\n Term: The Employee's engagement will commence on [Start Date] and will terminate on [End Date].\\n\\n Compensation: For the services rendered by the Employee under this Agreement, the Company will pay Employee a total sum of [specific amount, e.g., \\\"$5,000\\\"] payable on [payment schedule, e.g., \\\"a monthly basis\\\"].\\n\\n Confidentiality: The Employee agrees not to disclose or use, either during or after the term of employment, any proprietary or confidential information or data of the Company without the Company's prior written consent, except as necessary in the course of performin", "type": "code", "location": "/examples/doc_merge/pure_documents.json:14-14" }, - "133": { - "file_id": 6, + "127": { + "file_id": 5, "content": "This code defines an employment agreement for a Data Analyst, specifying the employee's duties, term, compensation, and confidentiality obligations.", "type": "comment" }, - "134": { - "file_id": 6, + "128": { + "file_id": 5, "content": "g their duties for the Company.\\n\\n Intellectual Property: Any works, developments, or inventions created by the Employee in the course of this employment related to the Company's business will remain the sole property of the Company.\\n\\n Termination: Either party may terminate this Agreement with [e.g., \\\"30\\\"] days written notice. Upon termination, Employee agrees to return all company property and data.\\n\\n Governing Law: This Agreement shall be governed by and construed under the laws of [State/Country].\\n\\n Amendments: This Agreement may only be amended in writing and signed by both parties.\\n\\n Entire Agreement: This Agreement contains the entire agreement between the parties and supersedes all prior negotiations, understandings, and agreements between the parties.\\n\\nThe parties hereto have executed this Agreement as of the date first above written.\\n\\n[Company Name or Authorized [Employee Name]\\nRepresentative Name, Title]\",\n \"DATA ANALYSIS SERVICE AGREEMENT\\n\\", "type": "code", "location": "/examples/doc_merge/pure_documents.json:14-15" }, - "135": { - "file_id": 6, + "129": { + "file_id": 5, "content": "Code is an agreement between a company and an employee, defining intellectual property ownership, termination terms, governing law, amendments process, and more. It outlines the legal relationship and responsibilities for both parties.", "type": "comment" }, - "136": { - "file_id": 6, + "130": { + "file_id": 5, "content": "nThis Agreement (\\\"Agreement\\\") is made and entered into as of [Date], by and between [Company Name], a [legal structure, e.g., \\\"corporation\\\"] incorporated under the laws of [State/Country], with its principal place of business at [Company Address], herein referred to as the \\\"Company,\\\" and [Contractor Business Name], a [legal structure, e.g., \\\"limited liability company\\\"] organized under the laws of [State/Country], with its principal place of business at [Contractor Business Address], herein referred to as the \\\"Contractor.\\\"\\n\\n Scope of Work:\\n a. The Contractor agrees to provide data analysis services to the Company.\\n b. The specific services will include [specific data analysis tasks, e.g., \\\"analyzing sales data, forecasting trends, and producing reports for managerial review\\\"].\\n\\n Term: The Contractor's engagement will commence on [Start Date] and will terminate on [End Date].\\n\\n Compensation: For the services rendered by the Contractor under this Agreeme", "type": "code", "location": "/examples/doc_merge/pure_documents.json:15-15" }, - "137": { - "file_id": 6, + "131": { + "file_id": 5, "content": "This code snippet is the beginning of a legal agreement between a company and a contractor, outlining the scope of work, term, and compensation for data analysis services.", "type": "comment" }, - "138": { - "file_id": 6, + "132": { + "file_id": 5, "content": "nt, the Company will pay the Contractor a total sum of [specific amount, e.g., \\\"$5,000\\\"] payable on [payment schedule, e.g., \\\"a monthly basis\\\"].\\n\\n Confidentiality: The Contractor agrees not to disclose or use, either during or after the term of this Agreement, any proprietary or confidential information or data of the Company without the Company's prior written consent, except as necessary in the course of providing the services.\\n\\n Intellectual Property: Any works, developments, or inventions created by the Contractor in the course of providing the services related to the Company's business will remain the sole property of the Company.\\n\\n Termination: Either party may terminate this Agreement with [e.g., \\\"30\\\"] days written notice. Upon termination, Contractor agrees to return all company data and any other proprietary materials.\\n\\n Governing Law: This Agreement shall be governed by and construed under the laws of [State/Country].\\n\\n Amendments: This Agreemen", "type": "code", "location": "/examples/doc_merge/pure_documents.json:15-15" }, - "139": { - "file_id": 6, + "133": { + "file_id": 5, "content": "This code is a contract between the Company and Contractor, outlining payment terms, confidentiality agreements, intellectual property ownership, termination conditions, governing law, and amendments to the agreement.", "type": "comment" }, - "140": { - "file_id": 6, + "134": { + "file_id": 5, "content": "t may only be amended in writing and signed by both parties.\\n\\n Entire Agreement: This Agreement contains the entire agreement between the parties and supersedes all prior negotiations, understandings, and agreements between the parties.\\n\\nThe parties hereto have executed this Agreement as of the date first above written.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nDate: [Insert Date]\\n\\nParties: [University Name], [University Address] (\\\"Disclosing Party\\\") and [Researcher's Full Name], [Researcher's Address] (\\\"Receiving Party\\\").\\n\\n1. Purpose: For the research of [Briefly Describe the Research or Project].\\n\\n2. Confidential Information: Includes data, studies, reports, patents, and other valuable business-related material.\\n\\n3. Obligations:\\na. Confidential Information must remain secret.\\nb. It's for the specified purpose only.\\nc. No third-party disclosure without consent.\\n\\n4. Exceptions: Public knowledge, third-party shared info, or independently developed.\\n\\n5. Duration: ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:15-16" }, - "141": { - "file_id": 6, + "135": { + "file_id": 5, "content": "This code appears to be a non-disclosure agreement (NDA) between a researcher and a university. The NDA outlines the terms of confidentiality, purpose, obligations, exceptions, and duration for sharing research-related materials. It also mentions the possibility of amending the agreement in writing and signed by both parties, and that this agreement contains the entire understanding between them, superseding any prior agreements.", "type": "comment" }, - "142": { - "file_id": 6, + "136": { + "file_id": 5, "content": "Confidentiality lasts [X years, e.g., 2 years] from disclosure.\\n\\n6. Return: All Confidential Information must be returned or destroyed upon request.\\n\\n7. No Transfer: This doesn't grant property rights or licenses.\\n\\n8. Law: Governed by [State/Country] laws.\\n\\n9. Amendments: Only in writing and signed.\\n\\nAgreement: By signing, parties agree to the terms.\",\n \"UNIVERSITY-BUSINESS COOPERATION AGREEMENT\\n\\nThis Cooperation Agreement (the \\\"Agreement\\\") is made and entered into on [Insert Date], by and between [Business Name], a [business type, e.g., \\\"corporation\\\"] located at [Business Address], hereinafter referred to as the \\\"Business', and [University Name], a higher education institution located at [University Address], hereinafter referred to as the \\\"University\\\".\\n\\n1. Objective:\\n\\nThe purpose of this Agreement is to define the terms under which the Business and the University will cooperate to [Objective e.g., \\\"jointly conduct research, promote innovation, and facilitate", "type": "code", "location": "/examples/doc_merge/pure_documents.json:16-17" }, - "143": { - "file_id": 6, + "137": { + "file_id": 5, "content": "This code represents a legal agreement between a business and a university, outlining the terms of their cooperation for research, innovation, and facilitation. The agreement includes provisions regarding confidentiality, return/destruction of information, transfer restrictions, governing law, amendment process, and signing parties' agreement to the terms.", "type": "comment" }, - "144": { - "file_id": 6, + "138": { + "file_id": 5, "content": " the exchange of knowledge in the field of _________\\\"].\\n\\n2. Scope of Cooperation:\\n\\na. Research Collaborations: Joint research initiatives, sharing of resources, and publications.\\n\\nb. Internships and Placements: Facilitation of student internships, projects, and job placements.\\n\\nc. Seminars and Workshops: Organizing joint seminars, conferences, and workshops.\\n\\nd. Facilities and Resource Sharing: Providing access to labs, equipment, libraries, etc.\\n\\n3. Intellectual Property:\\n\\nAll intellectual property developed jointly will be shared, and a separate agreement will detail the rights, ownership, and any revenue distribution.\\n\\n4. Funding and Resources:\\n\\nBoth parties agree to jointly contribute [Specify Amount or Percentage], and additional fund sourcing details will be determined on a project-by-project basis.\\n\\n5. Confidentiality:\\n\\nBoth parties agree to maintain the confidentiality of shared proprietary information.\\n\\n6. Duration and Termination:\\n\\nThis Agreement wi", "type": "code", "location": "/examples/doc_merge/pure_documents.json:17-17" }, - "145": { - "file_id": 6, + "139": { + "file_id": 5, "content": "This code is outlining the terms and conditions for a research collaboration between two organizations, including areas of cooperation, intellectual property sharing, funding contributions, confidentiality, and agreement duration.", "type": "comment" }, - "146": { - "file_id": 6, + "140": { + "file_id": 5, "content": "ll remain in effect for [e.g., \\\"three years\\\"] from the date of signing, unless terminated earlier by either party with [e.g., \\\"30 days\\\"] written notice.\\n\\n7. Dispute Resolution:\\n\\nAny disputes arising from this Agreement will first attempt resolution through mediation. If unresolved, disputes will be subject to the jurisdiction of [State/Country].\\n\\n8. Amendments:\\n\\nChanges to this Agreement must be in writing and signed by both parties.\\n\\n9. Liability:\\n\\nEach party assumes responsibility for its actions and is not liable for the actions of the other party.\\n\\n10. Governing Law:\\n\\nThis Agreement is governed by the laws of [State/Country].\\n\\nIN WITNESS WHEREOF, both parties have executed this Agreement as of the date first mentioned above.\",\n \"SUPPLY AGREEMENT FOR UNIVERSITY LABORATORY\\n\\nThis Supply Agreement (the \\\"Agreement\\\"), made as of [Insert Date], is entered into by and between [Supplier Name], a [business entity type, e.g., \\\"corporation\\\"] having its principal o", "type": "code", "location": "/examples/doc_merge/pure_documents.json:17-18" }, - "147": { - "file_id": 6, + "141": { + "file_id": 5, "content": "This is a sample Supply Agreement for a university laboratory between the Supplier and the University. The agreement outlines terms, dispute resolution, amendments, liability, and governing law.", "type": "comment" }, - "148": { - "file_id": 6, + "142": { + "file_id": 5, "content": "ffice at [Supplier Address], hereinafter referred to as the \\\"Supplier', and [University Name], a higher education institution located at [University Address], acting through its [specific department or laboratory, e.g., \\\"Department of Chemistry\\\"], hereinafter referred to as the \\\"University\\\".\\n\\n1. Purpose:\\n\\nThe Supplier agrees to provide specific products/materials/equipment, as detailed in Annex A, to the University for use in its laboratory.\\n\\n2. Terms of Supply:\\n\\na. Description of Goods: The goods to be supplied are detailed in Annex A attached herewith.\\n\\nb. Delivery: Goods will be delivered to [University Address or specific lab address] within [specific timeframe].\\n\\nc. Pricing: The price for the goods is set out in Annex A and includes all packaging, transportation, and delivery costs unless otherwise specified.\\n\\n3. Payment Terms:\\n\\nPayments will be made by the University within [e.g., \\\"30 days\\\"] of receiving the invoice from the Supplier.\\n\\n4. Warranty:\\n\\nThe", "type": "code", "location": "/examples/doc_merge/pure_documents.json:18-18" }, - "149": { - "file_id": 6, + "143": { + "file_id": 5, "content": "The code outlines an agreement between a Supplier and a University for the provision of specific products/materials/equipment to be used in the University's laboratory, with details outlined in Annex A. It covers terms of supply, payment terms, and warranty information.", "type": "comment" }, - "150": { - "file_id": 6, + "144": { + "file_id": 5, "content": " Supplier warrants that all goods supplied under this Agreement will be free from defects for a period of [specific duration, e.g., \\\"12 months\\\"] from the date of delivery.\\n\\n5. No Disclosure Clause:\\n\\na. The University agrees not to disclose, reproduce, or distribute any proprietary information, trade secrets, or other confidential details related to the products/materials/equipment provided by the Supplier without the Supplier's prior written consent.\\n\\nb. This clause remains effective for a period of [e.g., \\\"5 years\\\"] from the date of the last delivery of the goods under this Agreement.\\n\\n6. Termination:\\n\\nEither party may terminate this Agreement with [e.g., \\\"30 days\\\"] written notice if the other party breaches any term of this Agreement and fails to remedy such breach within the notice period.\\n\\n7. Governing Law:\\n\\nThis Agreement shall be governed by and interpreted in accordance with the laws of [State/Country].\\n\\n8. Amendments:\\n\\nModifications to this Agreement mus", "type": "code", "location": "/examples/doc_merge/pure_documents.json:18-18" }, - "151": { - "file_id": 6, + "145": { + "file_id": 5, "content": "This code provides a template for an agreement between a supplier and the University, outlining warranties on goods, confidentiality terms, termination conditions, governing law, and amendment procedures.", "type": "comment" }, - "152": { - "file_id": 6, + "146": { + "file_id": 5, "content": "t be in writing and signed by both parties.\\n\\nIN WITNESS WHEREOF, the parties hereto have executed this Supply Agreement as of the date first above written.\",\n \"LABORATORY SUPPLY AGREEMENT\\n\\nDate: [Insert Date]\\n\\nParties:\\n\\n Supplier: [Supplier Name], [Supplier Address]\\n University: [University Name], [University Address]\\n\\n1. Purpose: Supplier will provide goods as listed in Annex A to the University's laboratory.\\n\\n2. Delivery: Within [specific timeframe, e.g., \\\"30 days\\\"] to [specific lab address].\\n\\n3. Payment: University will pay within [e.g., \\\"30 days\\\"] of invoice receipt.\\n\\n4. Warranty: Goods are defect-free for [e.g., \\\"12 months\\\"] from delivery.\\n\\n5. Non-disclosure: University will not disclose Supplier\\u2019s proprietary details for [e.g., \\\"5 years\\\"]. Breach will result in a penalty of [specific amount or formula, e.g., \\\"$5,000 per incident or actual damages, whichever is greater\\\"].\\n\\n6. Termination: [e.g., \\\"30 days\\\"] notice for breaches unresolve", "type": "code", "location": "/examples/doc_merge/pure_documents.json:18-19" }, - "153": { - "file_id": 6, + "147": { + "file_id": 5, "content": "This code represents a template for a Laboratory Supply Agreement, including sections on purpose, delivery, payment, warranty, non-disclosure, and termination. It provides a framework for suppliers and universities to establish contractual agreements regarding the supply of goods to laboratories.", "type": "comment" }, - "154": { - "file_id": 6, + "148": { + "file_id": 5, "content": "d within said period.\\n\\n7. Law: Governed by [State/Country] laws.\\n\\n8. Amendments: Both parties must sign written changes.\",\n \"FREELANCER AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Client: [Client Full Name or Company Name], located at [Client Address].\\n Freelancer: [Freelancer Full Name], located at [Freelancer Address].\\n\\n1. SERVICES:\\n\\nFreelancer agrees to provide the following services: [Brief description of services, e.g., \\\"web design, content creation, graphic design\\\"].\\n\\n2. PAYMENT TERMS:\\n\\nFor the services rendered, Client agrees to pay Freelancer a total of [Total Amount, e.g., \\\"$1,000\\\"]. Payments shall be made as follows: [Payment structure, e.g., \\\"50% upfront, 50% upon completion\\\"].\\n\\n3. DEADLINE:\\n\\nThe services will be completed by [End Date, e.g., \\\"December 31, 2023\\\"].\\n\\n4. CONFIDENTIALITY:\\n\\nFreelancer agrees to maintain confidentiality regarding all proprietary information of the Client.\\n\\n5. TERMINATION:\\n\\nEither party may terminate", "type": "code", "location": "/examples/doc_merge/pure_documents.json:19-20" }, - "155": { - "file_id": 6, + "149": { + "file_id": 5, "content": "This code represents a sample freelance agreement document with various sections such as services provided, payment terms, deadline, confidentiality, and termination. It is governed by the laws of a specific state/country and allows for amendments only through written changes signed by both parties.", "type": "comment" }, - "156": { - "file_id": 6, + "150": { + "file_id": 5, "content": " this agreement with [X days, e.g., \\\"14 days\\\"] written notice. Upon termination, payments will be adjusted for work completed.\\n\\n6. INDEPENDENT CONTRACTOR:\\n\\nFreelancer is an independent contractor and not an employee of the Client. No benefits, rights, or obligations of employment are conferred by this agreement.\\n\\n7. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of New York\\\"].\\n\\n8. AMENDMENTS:\\n\\nAny changes to this agreement must be in writing and signed by both parties.\",\n \"FREELANCER AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Client: [Client Full Name or Company Name], located at [Client Address].\\n Freelancer: [Freelancer Full Name], located at [Freelancer Address].\\n\\n1. SERVICES:\\n\\nFreelancer agrees to provide the following services: [Brief description of services, e.g., \\\"web design, content creation, graphic design\\\"].\\n\\n2. PAYMENT TERMS:\\n\\nFor the services rendered, Client agrees to pay Freel", "type": "code", "location": "/examples/doc_merge/pure_documents.json:20-21" }, - "157": { - "file_id": 6, + "151": { + "file_id": 5, "content": "This is a contractual agreement between a client and a freelancer, detailing the services provided by the freelancer, payment terms, termination notice period, independent contractor status, governing law, and amendment requirements.", "type": "comment" }, - "158": { - "file_id": 6, + "152": { + "file_id": 5, "content": "ancer a total of [Total Amount, e.g., \\\"$1,000\\\"]. Payments shall be made as follows: [Payment structure, e.g., \\\"50% upfront, 50% upon completion\\\"].\\n\\n3. DEADLINE:\\n\\nThe services will be completed by [End Date, e.g., \\\"December 31, 2023\\\"].\\n\\n4. PENALTIES:\\n\\na. Late Delivery: If Freelancer fails to deliver the completed service by the specified deadline, a penalty of [specific amount, e.g., \\\"$50\\\"] per day will be deducted from the final payment until the service is delivered.\\n\\nb. Confidentiality Breach: Breaching the confidentiality clause will result in a penalty of [specific amount, e.g., \\\"$2,000\\\"].\\n\\n5. CONFIDENTIALITY:\\n\\nFreelancer agrees to maintain confidentiality regarding all proprietary information of the Client.\\n\\n6. TERMINATION:\\n\\nEither party may terminate this agreement with [X days, e.g., \\\"14 days\\\"] written notice. Upon termination, payments will be adjusted for work completed.\\n\\n7. INDEPENDENT CONTRACTOR:\\n\\nFreelancer is an independent contractor and ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:21-21" }, - "159": { - "file_id": 6, + "153": { + "file_id": 5, "content": "This code is a template for a legal agreement between a client and freelancer, outlining payment terms, deadlines, penalties for late delivery or confidentiality breach, confidentiality agreement, termination clause, and the status of the freelancer as an independent contractor.", "type": "comment" }, - "160": { - "file_id": 6, + "154": { + "file_id": 5, "content": "not an employee of the Client. No benefits, rights, or obligations of employment are conferred by this agreement.\\n\\n8. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of New York\\\"].\\n\\n9. AMENDMENTS:\\n\\nAny changes to this agreement must be in writing and signed by both parties.\",\n \"This document outlines the terms of cooperation between Company A and Company B for a joint research project. The duties of each company are designated, with a detailed financial contribution outlined in Appendix A. Confidentiality is strictly enforced, and any intellectual property created will be jointly owned. All published findings will be reviewed by both parties for protection of proprietary information. Termination of this agreement requires 30 days' written notice, and each party assumes any risks or liabilities during this collaboration. Amendments must be in writing and signed by both parties. The duration of the agreement lasts from the s", "type": "code", "location": "/examples/doc_merge/pure_documents.json:21-22" }, - "161": { - "file_id": 6, + "155": { + "file_id": 5, "content": "This code outlines an agreement between Company A and Company B for a joint research project, detailing duties, financial contributions, confidentiality, intellectual property ownership, termination terms, risks and liabilities, amendment requirements, and the duration of the agreement.", "type": "comment" }, - "162": { - "file_id": 6, + "156": { + "file_id": 5, "content": "tart date to the end date, unless extended. By signing, both parties acknowledge and agree to these terms.\",\n \"BUSINESS AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Business B: [Full Legal Name of Business B], located at [Business B Address].\\n\\n1. PURPOSE:\\n\\nThis Agreement outlines the terms of the collaboration/project/service between Business A and Business B regarding [Brief Description of the Collaboration/Project/Service].\\n\\n2. TERMS OF SERVICE:\\n\\n Business A agrees to: [Specific tasks/responsibilities, e.g., \\\"Supply 500 units of Product X monthly.\\\"].\\n Business B agrees to: [Specific tasks/responsibilities, e.g., \\\"Pay $50 per unit of Product X within 30 days of delivery.\\\"].\\n\\n3. PAYMENT TERMS:\\n\\nPayments shall be made as follows: [Payment structure, e.g., \\\"Payment due within 30 days of invoice.\\\"].\\n\\n4. CONFIDENTIALITY:\\n\\nBoth parties commit to maintaining confidentia", "type": "code", "location": "/examples/doc_merge/pure_documents.json:22-23" }, - "163": { - "file_id": 6, + "157": { + "file_id": 5, "content": "Code snippet represents the start of a business agreement document, with fields for effective date and names and addresses of the businesses involved. It outlines the purpose of collaboration, terms of service, payment terms, and confidentiality commitments made by both parties upon signing.", "type": "comment" }, - "164": { - "file_id": 6, + "158": { + "file_id": 5, "content": "lity regarding all proprietary information exchanged during this agreement.\\n\\n5. TERMINATION:\\n\\nEither party may terminate this Agreement with [X days, e.g., \\\"30 days\\\"] written notice. If breached, the aggrieved party may seek remedies as per governing laws.\\n\\n6. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n7. AMENDMENTS:\\n\\nModifications to this Agreement must be in writing and signed by both parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBy signing below, both parties affirm their understanding and acceptance of this Business Agreement.\",\n \"CONFIDENTIALITY:\\n\\n4.1. Confidential Information: For the purposes of this Agreement, \\\"Confidential Information\\\" refers to any data or information, regardless of its form, proprietary to or maintained as confidential by either party, which is not publicly known and which is disclosed during the term of this Agreement or in relation to the collaboration/project/service.\\n\\n4.", "type": "code", "location": "/examples/doc_merge/pure_documents.json:23-24" }, - "165": { - "file_id": 6, + "159": { + "file_id": 5, "content": "This code is a Business Agreement template containing sections for confidentiality, termination, governing law, amendments, and acknowledgement. The agreement states that all proprietary information exchanged must be kept confidential, and the agreement can be terminated with X days notice. It's governed by specific country/state laws and any modifications must be in writing signed by both parties.", "type": "comment" }, - "166": { - "file_id": 6, + "160": { + "file_id": 5, "content": "2. Protection and Non-Disclosure: Both parties agree to use the Confidential Information solely for the purposes of the Agreement and will exert reasonable efforts to prevent the unauthorized disclosure or use of the Confidential Information. Neither party shall disclose, reproduce, or distribute any portion of the Confidential Information without the disclosing party's prior written consent.\\n\\n4.3. Exclusions: Confidential Information shall not include any data or information which:\\n\\n Is or becomes publicly known through no wrongful act of the receiving party;\\n Is independently developed by the receiving party without the use of the Confidential Information;\\n Is rightfully received from a third party without any obligation of confidentiality;\\n Is disclosed under legal requirement or order.\\n\\n4.4. Return or Destruction: Upon the termination of this Agreement, or at the request of the disclosing party, the receiving party shall return all copies of the Confidential In", "type": "code", "location": "/examples/doc_merge/pure_documents.json:24-24" }, - "167": { - "file_id": 6, + "161": { + "file_id": 5, "content": "This code is from a legal agreement, specifically addressing the protection and non-disclosure of confidential information between two parties. It outlines the exclusions for what constitutes as confidential information and stipulates that upon agreement termination or request, all copies must be returned.", "type": "comment" }, - "168": { - "file_id": 6, + "162": { + "file_id": 5, "content": "formation to the disclosing party or certify in writing that it has destroyed all such copies.\\n\\n4.5. Duration: The obligations set forth in this Confidentiality section shall survive the termination or expiration of this Agreement for a period of [specific time, e.g., \\\"five years\\\"].\",\n \"LOYALTY AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Party A: [Full Legal Name of Party A], located at [Party A Address].\\n Party B: [Full Legal Name of Party B], located at [Party B Address].\\n\\n1. LOYALTY COMMITMENT:\\n\\nBoth parties acknowledge the mutual value of their business relationship. They commit to work in good faith, ensuring a collaborative environment that prioritizes trust, loyalty, and shared objectives.\\n\\n2. NON-POACHING OF EMPLOYEES:\\n\\nFor the duration of this Agreement and [specific time after termination, e.g., \\\"for 12 months following its termination\\\"], neither Party A nor Party B shall, without the prior written consent of the other party:\\n\\na. Directly or i", "type": "code", "location": "/examples/doc_merge/pure_documents.json:24-25" }, - "169": { - "file_id": 6, + "163": { + "file_id": 5, "content": "This code contains a confidentiality agreement and a loyalty agreement, including details such as effective dates, parties involved, duration of obligations, non-poaching clauses, and survival of obligations after the termination of the agreement.", "type": "comment" }, - "170": { - "file_id": 6, + "164": { + "file_id": 5, "content": "ndirectly solicit, induce, or encourage any employees of the other party to terminate their employment or to engage in employment or other services elsewhere.\\nb. Hire, employ, or contract the services of any employee of the other party who has been employed by the said party within the last 12 months.\\n\\n3. BREACH:\\n\\nAny violation of the clauses in this Agreement will be deemed a material breach and may result in legal action or other remedies as available by law.\\n\\n4. GOVERNING LAW:\\n\\nThis Agreement is governed by the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n5. AMENDMENTS:\\n\\nAny modifications to this Agreement must be in writing and signed by both parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBy signing below, both parties affirm their understanding and acceptance of this Loyalty Agreement.\",\n \"NON-COMPETE AND LOYALTY AGREEMENT\\n\\nEffective Date: [Date]\\n\\nBETWEEN:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Busines", "type": "code", "location": "/examples/doc_merge/pure_documents.json:25-26" }, - "171": { - "file_id": 6, + "165": { + "file_id": 5, "content": "Non-compete and loyalty agreement between Business A and another party, outlining restrictions on soliciting employees, hiring former employees, breach consequences, governing law, amendment rules, and acknowledgment of understanding.", "type": "comment" }, - "172": { - "file_id": 6, + "166": { + "file_id": 5, "content": "s B: [Full Legal Name of Business B], located at [Business B Address].\\n\\n1. PURPOSE:\\n\\nThis Agreement is designed to protect the proprietary and business interests of both parties by ensuring loyalty and preventing competition during and after the period of collaboration or engagement.\\n\\n2. NON-COMPETE:\\n\\nFor the duration of this Agreement and [specific time after termination, e.g., \\\"for 24 months following its termination\\\"], neither party shall:\\n\\na. Engage in or support any venture that directly competes with the core business of the other party within [specific geographical region, e.g., \\\"the State of California\\\"].\\nb. Invest in, partner with, or advise any business entity that competes directly with the other party.\\n\\n3. LOYALTY AND NON-POACHING:\\n\\nBoth parties pledge their commitment to a loyal business relationship. Specifically:\\n\\na. Neither party will, without the prior written consent of the other, solicit, induce, or encourage any employees or contractors of the o", "type": "code", "location": "/examples/doc_merge/pure_documents.json:26-26" }, - "173": { - "file_id": 6, + "167": { + "file_id": 5, "content": "This code is a legal agreement ensuring loyalty and preventing competition between two parties during and after collaboration or engagement, with sections for non-compete, non-poaching, and loyalty.", "type": "comment" }, - "174": { - "file_id": 6, + "168": { + "file_id": 5, "content": "ther party to terminate their engagement or to join another business.\\nb. Neither party shall disparage or encourage others to disparage the other party, its products, services, or its employees.\\n\\n4. CONFIDENTIALITY:\\n\\nBoth parties agree to maintain confidentiality regarding any proprietary or business-sensitive information exchanged during the course of this Agreement, ensuring that such information isn't disclosed without the explicit consent of the party owning that information.\\n\\n5. BREACH AND REMEDIES:\\n\\nA violation of any provision in this Agreement will be deemed a significant breach. The aggrieved party shall be entitled to seek injunctive relief, damages, or any other remedies available under the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n6. GOVERNING LAW:\\n\\nThis Agreement shall be governed by and interpreted in accordance with the laws of [specific country/state, e.g., \\\"the State of California\\\"].\\n\\n7. AMENDMENTS:\\n\\nModifications or amend", "type": "code", "location": "/examples/doc_merge/pure_documents.json:26-26" }, - "175": { - "file_id": 6, + "169": { + "file_id": 5, "content": "This code snippet represents a legal agreement between two parties, outlining the terms and conditions of their engagement. It includes clauses on termination, confidentiality, breach remedies, governing law, and amendments.", "type": "comment" }, - "176": { - "file_id": 6, + "170": { + "file_id": 5, "content": "ments to this Agreement must be in writing and duly signed by authorized representatives of both parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBy signing below, representatives from both businesses affirm their understanding and acceptance of this Non-Compete and Loyalty Agreement.\",\n \"AMENDMENT TO CONTRACT: LENGTH OF ENGAGEMENT\\n\\nThis Amendment is made on [Date], and amends the Non-Compete and Loyalty Agreement dated [Original Agreement Date] between:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Business B: [Full Legal Name of Business B], located at [Business B Address].\\n\\nAMENDMENT:\\n\\nThe parties hereby agree to amend the Non-Compete and Loyalty Agreement as follows:\\n\\nSection [Specific Section Number, e.g., \\\"2\\\"] - Length of Engagement\\n\\nThe period of engagement between Business A and Business B as stipulated in the original Agreement is hereby extended/shortened/set to commence from [New Start Date] and conclude on [New End Date].\\n\\nGENERAL ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:26-27" }, - "177": { - "file_id": 6, + "171": { + "file_id": 5, "content": "This code represents an amendment to a Non-Compete and Loyalty Agreement between two businesses, specifically changing the length of engagement. It includes the updated agreement details, such as dates, business names, and locations. The amendment is made on a specific date and modifies the original agreement's section related to the duration of engagement.", "type": "comment" }, - "178": { - "file_id": 6, + "172": { + "file_id": 5, "content": "PROVISIONS:\\n\\n Except as specifically amended herein, all other terms and conditions of the original Agreement remain unchanged and in full effect.\\n This Amendment, together with the original Agreement, represents the entire understanding between the parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBoth parties, by their signatures below, indicate their acceptance and approval of this Amendment.\",\n \"AMENDMENT TO CONTRACT: FEES FOR LATE DELIVERY\\n\\nThis Amendment is made on [Date], and amends the Agreement dated [Original Agreement Date] between:\\n\\n Business A: [Full Legal Name of Business A], located at [Business A Address].\\n Business B: [Full Legal Name of Business B], located at [Business B Address].\\n\\nAMENDMENT:\\n\\nThe parties hereby agree to amend the Agreement as follows:\\n\\nSection [Specific Section Number, e.g., \\\"3\\\"] - Fees for Late Delivery\\n\\na. If Business A/B fails to deliver the products/services by the agreed-upon deadline, a late fee of [Specific Amount or Percentage,", "type": "code", "location": "/examples/doc_merge/pure_documents.json:27-28" }, - "179": { - "file_id": 6, + "173": { + "file_id": 5, "content": "This code is for creating an amendment to a contract, specifically for adjusting the fees for late delivery in case of delayed product/service delivery by either Business A or B. The original agreement and this amendment together represent the entire understanding between the parties involved. Both parties acknowledge and approve the amendment with their signatures.", "type": "comment" }, - "180": { - "file_id": 6, + "174": { + "file_id": 5, "content": " e.g., \\\"$100\\\" or \\\"5% of the total contract value\\\"] shall be applied for each [time period, e.g., \\\"day\\\"] of delay.\\n\\nb. The total late fees shall not exceed [Specific Maximum Amount or Percentage, e.g., \\\"$1,000\\\" or \\\"20% of the total contract value\\\"].\\n\\nc. The fees will be deducted from the final payment or invoiced separately, as deemed appropriate by the non-defaulting party.\\n\\nGENERAL PROVISIONS:\\n\\n Except as specifically amended herein, all other terms and conditions of the original Agreement remain unchanged and in full effect.\\n This Amendment, together with the original Agreement, represents the entire understanding between the parties.\\n\\nACKNOWLEDGEMENT:\\n\\nBoth parties, by their signatures below, indicate their acceptance and approval of this Amendment.\",\n \"AMENDMENT TO CONTRACT\\n\\nThis Amendment (the \\\"Amendment\\\") is entered into on [Date], between [Party One Name], hereinafter referred to as the \\\"First Party', and [Party Two Name], hereinafter referred", "type": "code", "location": "/examples/doc_merge/pure_documents.json:28-29" }, - "181": { - "file_id": 6, + "175": { + "file_id": 5, "content": "This code defines an amendment to a contract, specifying late fees for delayed payments, maximum limits, deduction methods, and preserves original agreement terms. It also includes acknowledgement from both parties upon signing.", "type": "comment" }, - "182": { - "file_id": 6, + "176": { + "file_id": 5, "content": " to as the \\\"Second Party'', collectively referred to as the \\\"Parties\\\".\\n\\nWHEREAS, the Parties entered into a contract dated [Original Contract Date], hereinafter referred to as the \\\"Original Contract', for [Brief Description of the Original Contract, e.g., \\\"provision of IT services to First Party\\\"];\\n\\nWHEREAS, the Parties now wish to amend the Original Contract to add additional responsibilities pertaining to the maintenance of existing IT systems;\\n\\nNOW, THEREFORE, in consideration of the mutual covenants contained herein and for other good and valuable consideration, the receipt and sufficiency of which is hereby acknowledged, the Parties agree as follows:\\n\\n Additional Responsibility:\\n\\n The Second Party shall assume the responsibility of maintaining and ensuring the smooth functioning of the existing IT systems of the First Party. This responsibility includes, but is not limited to:\\n\\n a. Regular monitoring of the IT systems for any anomalies or issues.\\n\\n ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:29-29" }, - "183": { - "file_id": 6, + "177": { + "file_id": 5, "content": "The code is a legal document amendment, referring to the Original Contract, adding additional responsibilities for maintaining and ensuring the smooth functioning of existing IT systems.", "type": "comment" }, - "184": { - "file_id": 6, + "178": { + "file_id": 5, "content": "b. Prompt troubleshooting and rectification of any issues identified.\\n\\n c. Routine updates and patches to ensure the systems are secure and up-to-date.\\n\\n d. Any other related tasks as deemed necessary by the First Party.\\n\\n Compensation:\\n\\n As a result of this additional responsibility, the Parties agree to a revised compensation of [New Compensation Details, e.g., \\\"$XXX per month\\\"]. All other payment terms as outlined in the Original Contract shall remain unchanged.\\n\\n Duration and Termination:\\n\\n The duration and termination clauses from the Original Contract shall remain applicable to this Amendment unless otherwise agreed upon in writing by the Parties.\\n\\n Miscellaneous:\\n\\n All other terms and conditions of the Original Contract, which are not specifically amended by this Amendment, shall remain in full force and effect. In the event of a conflict between this Amendment and the Original Contract, the terms of this Amendment shall prevail.\\n\\n ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:29-29" }, - "185": { - "file_id": 6, + "179": { + "file_id": 5, "content": "This code snippet outlines amendment details for a contract. It includes revised responsibilities such as prompt troubleshooting, routine updates and patches, additional tasks, new compensation terms, duration and termination clauses remaining the same unless agreed upon in writing, and existing terms not affected unless in conflict with the amendment.", "type": "comment" }, - "186": { - "file_id": 6, + "180": { + "file_id": 5, "content": "Entire Agreement:\\n\\n This Amendment, along with the Original Contract, constitutes the entire agreement between the Parties and supersedes any prior understandings, written or oral, relating to the subject matter of this Amendment.\\n\\n Governing Law:\\n\\n This Amendment shall be governed by the laws of [Jurisdiction, e.g., \\\"State of New York\\\"].\",\n \"This appendix, part of the Contract between Party One and Party Two, sets milestones and deadlines for Party Two. \\n\\nMilestone 1 involves tasks such as gathering requirements, designing user interface etc. with the objective of developing initial prototype of a software application. Delivery Deadline: September 15, 2023. \\n\\nMilestone 2 involves tasks like incorporating feedback and conducting beta testing, aiming at finalizing and testing the software application. Delivery Deadline: October 15, 2023. \\n\\nEach milestone's completion will be reviewed by Party One; if requirements aren't met, Party Two may correct and resubmit. P", "type": "code", "location": "/examples/doc_merge/pure_documents.json:29-30" }, - "187": { - "file_id": 6, + "181": { + "file_id": 5, "content": "This code section contains legal terms and conditions, specifically an amendment to a contract with governing law and milestones for software development.", "type": "comment" }, - "188": { - "file_id": 6, + "182": { + "file_id": 5, "content": "ayment terms and penalties are outlined in the main Contract. This appendix is executed adhering to main Contract's terms and conditions.\",\n \"APPENDIX B \\u2013 CHANGE IN TIME OF DELIVERY\\n\\nThis Appendix is an addendum to the contract (the \\\"Contract\\\") dated [Original Contract Date], entered into between [Party One Name], hereinafter referred to as the \\\"First Party', and [Party Two Name], hereinafter referred to as the \\\"Second Party\\\". The purpose of this Appendix is to amend and modify the delivery time as specified in the original Contract.\\n\\n Original Delivery Time: As per the terms of the original Contract, the delivery time was set for [Original Delivery Date, e.g., \\\"September 15, 2023\\\"].\\n\\n Revised Delivery Time: The Parties, through mutual agreement, have now decided to amend the delivery time. The new delivery date shall be [Revised Delivery Date, e.g., \\\"October 10, 2023\\\"].\\n\\n Reason for Change: [Provide a brief explanation for the change in delivery time, ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:30-31" }, - "189": { - "file_id": 6, + "183": { + "file_id": 5, "content": "This code defines an appendix to a contract that modifies the delivery time due to mutual agreement between parties. The original and revised delivery dates are provided, along with a brief explanation for the change.", "type": "comment" }, - "190": { - "file_id": 6, + "184": { + "file_id": 5, "content": "e.g., \\\"Due to unforeseen challenges in the production process, additional time is required to ensure that the deliverables meet the agreed-upon quality standards.\\\"]\\n\\n Consequences of Delay: Unless otherwise stated in the main body of the Contract:\\n\\n a. If the Second Party fails to meet the revised delivery time, penalties or consequences as outlined in the original Contract for late delivery will apply from the revised delivery date.\\n\\n b. All other terms related to late delivery, including but not limited to penalties, refunds, or rights to terminate, remain effective and unchanged by this Appendix.\\n\\n Prevailing Terms: All other terms and conditions of the original Contract not specifically amended by this Appendix shall remain in full force and effect. In the event of any inconsistency or conflict between the original Contract and this Appendix, the terms of this Appendix shall prevail with respect to the change in the delivery time.\\n\\n Acknowledgment: By sig", "type": "code", "location": "/examples/doc_merge/pure_documents.json:31-31" }, - "191": { - "file_id": 6, + "185": { + "file_id": 5, "content": "The code defines consequences and prevailing terms for a contract's delayed delivery, with the Appendix taking precedence in case of inconsistencies.", "type": "comment" }, - "192": { - "file_id": 6, + "186": { + "file_id": 5, "content": "ning this Appendix, the Parties acknowledge and agree to the revised delivery time and any associated consequences of delays.\\n\\nThis Appendix is executed as an acknowledgment and agreement to the revised delivery time and shall be considered an integral part of the original Contract.\",\n \"APPENDIX C \\u2013 ADDITIONAL CONFIDENTIAL INFORMATION\\n\\nThis Appendix is an extension of the contract (the \\\"Contract\\\") dated [Original Contract Date], between [Party One Name] (\\\"First Party\\\") and [Party Two Name] (\\\"Second Party\\\"). It outlines additional categories of confidential information beyond those detailed in the Contract.\\n\\n Additional Confidential Information Includes:\\n\\n a. Non-public financial data.\\n\\n b. Unpublished marketing strategies and materials.\\n\\n c. Upcoming product or service details.\\n\\n d. Proprietary software codes and processes.\\n\\n e. Personnel records.\\n\\n f. Any data labeled as \\\"Confidential\\\" or \\\"Proprietary\\\" after the Contract\\u2019s e", "type": "code", "location": "/examples/doc_merge/pure_documents.json:31-32" }, - "193": { - "file_id": 6, + "187": { + "file_id": 5, "content": "This code appears to be extracting text chunks from a file named \"pure_documents.json\" at lines 30-31, possibly representing various appendices or amendments within an existing contract. These sections outline revised delivery times and additional confidential information categories beyond the original scope of the agreement.", "type": "comment" }, - "194": { - "file_id": 6, + "188": { + "file_id": 5, "content": "xecution.\\n\\n Protection & Exclusions:\\n\\nBoth Parties shall extend the same protection to this Additional Confidential Information as previously agreed upon in the Contract. Information that becomes public, is received rightfully from a third party, is independently developed, or gets written release authorization is excluded from confidentiality obligations.\\n\\n Duration:\\n\\nThe confidentiality obligations for this Appendix shall persist as defined in the Contract or, if unspecified, for [e.g., \\\"five years\\\"] from the disclosure date.\\n\\n Prevailing Terms:\\n\\nIf there\\u2019s any conflict between this Appendix and the Contract concerning confidentiality, this Appendix takes precedence concerning Additional Confidential Information.\\n\\nExecuted as an integral part of the Contract.\",\n \"AMENDMENT TO NON-DISCLOSURE AGREEMENT\\n\\nThis Amendment (the \\u201cAmendment\\u201d) is made and entered into as of [Amendment Date], by and between [Party A Name], having an address at [Party ", "type": "code", "location": "/examples/doc_merge/pure_documents.json:32-33" }, - "195": { - "file_id": 6, + "189": { + "file_id": 5, "content": "This code defines a legal document containing confidentiality provisions, exclusions, duration, and precedence in case of conflict with the Contract.", "type": "comment" }, - "196": { - "file_id": 6, + "190": { + "file_id": 5, "content": "A Address] (\\u201cParty A\\u201d), and [Party B Name], having an address at [Party B Address] (\\u201cParty B\\u201d), collectively referred to as the \\u201cParties.\\u201d\\n\\nRECITALS\\n\\nWHEREAS, the Parties entered into a Non-Disclosure Agreement dated [Original NDA Date] (the \\u201cOriginal Agreement\\u201d);\\n\\nWHEREAS, the Parties desire to amend the Original Agreement to extend the duration of certain restrictions therein;\\n\\nNOW, THEREFORE, in consideration of the mutual covenants and promises made by the Parties hereto, the Parties agree as follows:\\n\\n Extension of Time Restrictions: The time restriction set forth in Section [X] of the Original Agreement, currently stating a period of [Original Time, e.g., \\\"two (2) years\\\"], is hereby amended and extended to [New Time, e.g., \\\"five (5) years\\\"] from the date of disclosure of the Confidential Information.\\n\\n Full Force and Effect: Except as expressly modified by this Amendment, all terms, conditions, and provisions of the Or", "type": "code", "location": "/examples/doc_merge/pure_documents.json:33-33" }, - "197": { - "file_id": 6, + "191": { + "file_id": 5, "content": "This code represents a legal document outlining the terms of an amendment to an existing Non-Disclosure Agreement (NDA) between Party A and Party B. The amendment extends the duration of certain time restrictions within the original agreement, with new time specified in Section [X].", "type": "comment" }, - "198": { - "file_id": 6, + "192": { + "file_id": 5, "content": "iginal Agreement shall remain in full force and effect. In the event of any conflict between the terms of this Amendment and the Original Agreement, the terms of this Amendment shall govern.\\n\\n Counterparts: This Amendment may be executed in counterparts, each of which shall be deemed an original and all of which together shall constitute one and the same instrument.\\n\\n Governing Law: This Amendment shall be governed by and construed in accordance with the laws of [Governing State or Country, e.g., \\\"the State of California\\\"], without regard to its conflict of laws principles.\\n\\nIN WITNESS WHEREOF, the Parties hereto have executed this Amendment as of the date first above written.\",\n \"BUSINESS COOPERATION AGREEMENT\\n\\nThis Agreement is between [Business A Name], at [Business A Address] (\\\"Business A\\\"), and [Business B Name], at [Business B Address] (\\\"Business B\\\"), effective [Day, Month, Year].\\n\\n1. Purpose:\\nBoth businesses will cooperate in [brief description, e.g., \\\"", "type": "code", "location": "/examples/doc_merge/pure_documents.json:33-34" }, - "199": { - "file_id": 6, + "193": { + "file_id": 5, "content": "This code represents an amendment to an original business agreement. It includes provisions for conflict resolution, counterparts execution, and governing law. The agreement is between two businesses - Business A and Business B, with a specified effective date.", "type": "comment" + }, + "194": { + "file_id": 5, + "content": "joint marketing\\\"].\\n\\n2. Responsibilities:\\n\\n Business A will: [Key obligation, e.g., \\\"Promote Business B in newsletters.\\\"]\\n Business B will: [Key obligation, e.g., \\\"Display Business A products.\\\"]\\n\\n3. Term:\\nEffective from the above date for [e.g., \\\"12 months\\\"]. Either party can terminate with [e.g., \\\"30 days\\\"] notice.\\n\\n4. Confidentiality:\\nConfidential information remains private, during and post-agreement.\\n\\n5. Governing Law:\\nGoverning laws of [State/Country, e.g., \\\"California\\\"].\\n\\n6. Amendments:\\nChanges must be written and signed by both parties.\",\n \"APPENDIX TO BUSINESS COOPERATION AGREEMENT\\n\\nEXTENSION OF CONFIDENTIALITY CONDITIONS\\n\\nThis Appendix is made as of [Day, Month, Year], and is appended to the Business Cooperation Agreement dated [Original Agreement Date] (\\\"Original Agreement\\\") between [Business A Name], located at [Business A Address] (\\\"Business A\\\") and [Business B Name], located at [Business B Address] (\\\"Business B\\\").\\n\\n1. Extensio", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:34-35" + }, + "195": { + "file_id": 5, + "content": "This code represents a business cooperation agreement between Business A and Business B, outlining their joint marketing responsibilities, term, confidentiality, governing law, and amendment processes. An appendix is also included to extend the confidentiality conditions of the original agreement.", + "type": "comment" + }, + "196": { + "file_id": 5, + "content": "n of Confidentiality Period:\\nThe confidentiality period stipulated in Section 4 (or the appropriate section number) of the Original Agreement is hereby extended. Previously set to expire [Original Expiry Date], it will now extend to [New Expiry Date].\\n\\n2. Continued Obligations:\\nAll other confidentiality obligations and conditions outlined in the Original Agreement remain unchanged and in full effect.\\n\\n3. Entire Agreement:\\nThis Appendix, in conjunction with the Original Agreement, constitutes the entire agreement between the parties regarding the subject matter herein.\\n\\n4. Governing Law:\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\nIN WITNESS WHEREOF, both parties hereto have executed this Appendix as of the date first above written.\",\n \"APPENDIX: LOYALTY CLAUSE\\n\\nEffective [Day, Month, Year], attached to the Agreement dated [Original Agreement Date] between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Loya", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:35-36" + }, + "197": { + "file_id": 5, + "content": "Section of code describes an amendment to a confidentiality agreement, extending the confidentiality period and keeping other obligations unchanged. It also outlines the governing law for the amendment.", + "type": "comment" + }, + "198": { + "file_id": 5, + "content": "lty Commitment:\\nFor one year from the Effective Date, both parties pledge loyalty by refraining from activities harmful or competitive to the other within the context of the Agreement.\\n\\n2. Consequences:\\nBreaches may result in Agreement termination and legal action as per the original terms.\\n\\n3. Governing Law:\\nGoverned by the laws of [State/Country, e.g., \\\"California\\\"].\",\n \"APPENDIX: CONFIDENTIALITY CLAUSE\\n\\nThis Appendix is appended to the B2B Contractor Agreement (\\\"Agreement\\\") dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"] between [Company Name], hereinafter referred to as \\\"Company', and [Contractor Name], hereinafter referred to as \\\"Contractor\\\".\\n\\n1. Confidentiality:\\n\\n1.1 Both Company and Contractor acknowledge that they may have access to or receive information during the term of the Agreement which is confidential to the disclosing party (\\\"Confidential Information\\\").\\n\\n1.2 Confidential Information shall not include information that:\\n\\n is or b", + "type": "code", + "location": "/examples/doc_merge/pure_documents.json:36-37" + }, + "199": { + "file_id": 5, + "content": "This code snippet represents a legal document with sections for commitment, consequences of breach, and governing law. The commitment section outlines loyalty pledge and restraints from competitive activities. The consequences section describes potential agreement termination and legal actions for breaches. Lastly, the governing law section states the applicable state or country's laws to govern the agreement.", + "type": "comment" } } \ No newline at end of file diff --git a/docs/data/2.json b/docs/data/2.json index bc2cfc0..2512b80 100644 --- a/docs/data/2.json +++ b/docs/data/2.json @@ -1,548 +1,547 @@ { "200": { - "file_id": 6, - "content": "joint marketing\\\"].\\n\\n2. Responsibilities:\\n\\n Business A will: [Key obligation, e.g., \\\"Promote Business B in newsletters.\\\"]\\n Business B will: [Key obligation, e.g., \\\"Display Business A products.\\\"]\\n\\n3. Term:\\nEffective from the above date for [e.g., \\\"12 months\\\"]. Either party can terminate with [e.g., \\\"30 days\\\"] notice.\\n\\n4. Confidentiality:\\nConfidential information remains private, during and post-agreement.\\n\\n5. Governing Law:\\nGoverning laws of [State/Country, e.g., \\\"California\\\"].\\n\\n6. Amendments:\\nChanges must be written and signed by both parties.\",\n \"APPENDIX TO BUSINESS COOPERATION AGREEMENT\\n\\nEXTENSION OF CONFIDENTIALITY CONDITIONS\\n\\nThis Appendix is made as of [Day, Month, Year], and is appended to the Business Cooperation Agreement dated [Original Agreement Date] (\\\"Original Agreement\\\") between [Business A Name], located at [Business A Address] (\\\"Business A\\\") and [Business B Name], located at [Business B Address] (\\\"Business B\\\").\\n\\n1. Extensio", - "type": "code", - "location": "/examples/doc_merge/pure_documents.json:34-35" - }, - "201": { - "file_id": 6, - "content": "This code represents a business cooperation agreement between Business A and Business B, outlining their joint marketing responsibilities, term, confidentiality, governing law, and amendment processes. An appendix is also included to extend the confidentiality conditions of the original agreement.", - "type": "comment" - }, - "202": { - "file_id": 6, - "content": "n of Confidentiality Period:\\nThe confidentiality period stipulated in Section 4 (or the appropriate section number) of the Original Agreement is hereby extended. Previously set to expire [Original Expiry Date], it will now extend to [New Expiry Date].\\n\\n2. Continued Obligations:\\nAll other confidentiality obligations and conditions outlined in the Original Agreement remain unchanged and in full effect.\\n\\n3. Entire Agreement:\\nThis Appendix, in conjunction with the Original Agreement, constitutes the entire agreement between the parties regarding the subject matter herein.\\n\\n4. Governing Law:\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\nIN WITNESS WHEREOF, both parties hereto have executed this Appendix as of the date first above written.\",\n \"APPENDIX: LOYALTY CLAUSE\\n\\nEffective [Day, Month, Year], attached to the Agreement dated [Original Agreement Date] between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Loya", - "type": "code", - "location": "/examples/doc_merge/pure_documents.json:35-36" - }, - "203": { - "file_id": 6, - "content": "Section of code describes an amendment to a confidentiality agreement, extending the confidentiality period and keeping other obligations unchanged. It also outlines the governing law for the amendment.", - "type": "comment" - }, - "204": { - "file_id": 6, - "content": "lty Commitment:\\nFor one year from the Effective Date, both parties pledge loyalty by refraining from activities harmful or competitive to the other within the context of the Agreement.\\n\\n2. Consequences:\\nBreaches may result in Agreement termination and legal action as per the original terms.\\n\\n3. Governing Law:\\nGoverned by the laws of [State/Country, e.g., \\\"California\\\"].\",\n \"APPENDIX: CONFIDENTIALITY CLAUSE\\n\\nThis Appendix is appended to the B2B Contractor Agreement (\\\"Agreement\\\") dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"] between [Company Name], hereinafter referred to as \\\"Company', and [Contractor Name], hereinafter referred to as \\\"Contractor\\\".\\n\\n1. Confidentiality:\\n\\n1.1 Both Company and Contractor acknowledge that they may have access to or receive information during the term of the Agreement which is confidential to the disclosing party (\\\"Confidential Information\\\").\\n\\n1.2 Confidential Information shall not include information that:\\n\\n is or b", - "type": "code", - "location": "/examples/doc_merge/pure_documents.json:36-37" - }, - "205": { - "file_id": 6, - "content": "This code snippet represents a legal document with sections for commitment, consequences of breach, and governing law. The commitment section outlines loyalty pledge and restraints from competitive activities. The consequences section describes potential agreement termination and legal actions for breaches. Lastly, the governing law section states the applicable state or country's laws to govern the agreement.", - "type": "comment" - }, - "206": { - "file_id": 6, + "file_id": 5, "content": "ecomes public knowledge without breach of this clause;\\n was known by the receiving party before receipt from the disclosing party;\\n is received from a third party without breach of any obligation of confidentiality.\\n\\n1.3 The receiving party shall:\\n\\n use the Confidential Information only for performing under the Agreement;\\n take all reasonable precautions to prevent any unauthorized disclosure of the Confidential Information;\\n not disclose, reproduce, or distribute Confidential Information without the written consent of the disclosing party.\\n\\n2. Duration:\\n\\nThe obligations set forth in this Appendix shall continue for a period of [e.g., \\\"two years\\\"] from the date of termination or expiration of the Agreement.\\n\\n3. Return or Destruction:\\n\\nUpon the expiration or termination of the Agreement, or upon the disclosing party's request, the receiving party shall return or, if directed by the disclosing party, destroy all copies of the Confidential Information.\\n\\n", "type": "code", "location": "/examples/doc_merge/pure_documents.json:37-37" }, - "207": { - "file_id": 6, + "201": { + "file_id": 5, "content": "This code snippet contains a confidentiality agreement clause, which outlines the rules for handling and protecting sensitive information. The receiving party is required to use the Confidential Information only for performing under the Agreement, take precautions to prevent unauthorized disclosure, and obtain written consent before reproducing or distributing it. The obligations of this clause continue for a specified duration (e.g., two years) after the expiration or termination of the Agreement, and the receiving party must return or destroy all copies upon request or termination.", "type": "comment" }, - "208": { - "file_id": 6, + "202": { + "file_id": 5, "content": "4. Governing Law:\\n\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the Agreement.\",\n \"APPENDIX: CONFIDENTIALITY CLAUSE\\n\\nThis Appendix is part of the Agreement dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"] between [Company Name] (\\\"Company\\\") and [Contractor Name] (\\\"Contractor\\\").\\n\\n1. Confidential Information:\\nBoth parties may access or receive the other's confidential information (\\\"Confidential Information\\\") during the Agreement term. Confidential Information excludes publicly known details, data known prior, or information obtained from third parties without confidentiality obligations.\\n\\n2. Obligations:\\nThe recipient shall:\\n\\n Use the Confidential Information solely for the Agreement's purpose.\\n Prevent unauthorized disclosures.\\n Not disclose without prior written consent.\\n\\n3. Duration:\\nObligations persist for [e.g., \\\"two years\\\"] post Agreement termination or expiration.\\n\\n4. Return/Des", "type": "code", "location": "/examples/doc_merge/pure_documents.json:37-38" }, - "209": { - "file_id": 6, + "203": { + "file_id": 5, "content": "This code snippet represents a Confidentiality Agreement between two parties, defining the scope of confidential information, obligations to protect it, and its duration post-agreement termination or expiration.", "type": "comment" }, - "210": { - "file_id": 6, + "204": { + "file_id": 5, "content": "truction:\\nUpon Agreement conclusion, or on request, all Confidential Information copies should be returned or destroyed.\\n\\n5. Governing Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective [Effective Date, e.g., \\\"August 15, 2023\\\"], between [Tech Company Name], located at [Tech Company Address], (\\\"Company\\\") and [Contractor's Full Name], located at [Contractor Address], (\\\"Contractor\\\").\\n\\nPurpose:\\nContractor will access Company's confidential information during their engagement.\\n\\n1. Definition:\\n\\\"Confidential Information\\\" means proprietary data related to the Company\\u2019s business, excluding publicly known details, prior known information, or data from third parties without confidentiality bounds.\\n\\n2. Obligation:\\nContractor shall:\\n\\n Use Confidential Information solely for engagement purposes.\\n Prevent unauthorized disclosure.\\n\\n3. Duration:\\nObligations persist for [e.g., \\\"two years\\\"] from disclosure", "type": "code", "location": "/examples/doc_merge/pure_documents.json:38-39" }, - "211": { - "file_id": 6, + "205": { + "file_id": 5, "content": "Non-Disclosure Agreement (NDA) between a tech company and a contractor, effective on [Effective Date], defining Confidential Information, its use, protection, and obligation duration.", "type": "comment" }, - "212": { - "file_id": 6, + "206": { + "file_id": 5, "content": " date.\\n\\n4. Return:\\nContractor shall return all Confidential Information items upon engagement completion or Company's request, retaining no copies.\\n\\n5. Remedies:\\nBreach may result in legal actions, damages, and costs.\\n\\n6. Governing Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"APPENDIX: EXTENSION OF CONTRACT DURATION\\n\\nThis Appendix is a part of the Agreement initially dated [Original Agreement Date, e.g., \\\"August 15, 2021\\\"], between [Party A Name], located at [Party A Address] (\\\"Party A\\\") and [Party B Name], located at [Party B Address] (\\\"Party B\\\").\\n\\n1. Duration Extension:\\nThe duration of the Agreement referenced above is hereby extended for an additional two (2) years from the original expiration date. With this extension, the new expiration date of the Agreement will be [New Expiration Date, e.g., \\\"August 15, 2025\\\"].\\n\\n2. All Other Terms Remain Unchanged:\\nExcept for the extension of the contract duration as described herein, all other term", "type": "code", "location": "/examples/doc_merge/pure_documents.json:39-40" }, - "213": { - "file_id": 6, + "207": { + "file_id": 5, "content": "Section describes terms for confidential information handling, remedies for breach, and governing law. Appendix extends contract duration by two years while keeping other terms unchanged.", "type": "comment" }, - "214": { - "file_id": 6, + "208": { + "file_id": 5, "content": "s and conditions of the Agreement remain unchanged and in full effect.\\n\\n3. Entire Agreement:\\nThis Appendix, in conjunction with the original Agreement, constitutes the entire agreement between Party A and Party B. Any previous understandings, written or oral, relating to the subject of this Appendix are superseded by the terms herein.\\n\\n4. Governing Law:\\nThis Appendix shall be governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the original Agreement.\",\n \"CONFIDENTIALITY AGREEMENT\\n\\nEffective [Effective Date, e.g., \\\"August 15, 2023\\\"], between [Company Name], located at [Company Address] (\\\"Company\\\"), and [Supplier Name], located at [Supplier Address] (\\\"Supplier\\\").\\n\\n1. Definition:\\n\\\"Confidential Information\\\" means proprietary data of the Company, excluding:\\n\\n Pre-disclosed or publicly known data.\\n Info from third parties without confidentiality bounds.\\n\\n2. Obligations:\\nSupplier will:\\n\\n Use Confidential Information solely for b", "type": "code", "location": "/examples/doc_merge/pure_documents.json:40-41" }, - "215": { - "file_id": 6, + "209": { + "file_id": 5, "content": "This code snippet represents a confidentiality agreement between Company and Supplier, with details such as governing law, effective date, obligations of the Supplier regarding Confidential Information, and superseding previous understandings.", "type": "comment" }, - "216": { - "file_id": 6, + "210": { + "file_id": 5, "content": "usiness purposes with the Company.\\n Protect its secrecy and prevent unauthorized disclosure.\\n Return or destroy all Confidential Information upon request or business completion.\\n\\n3. Duration:\\nObligations last for [e.g., \\\"two years\\\"] from disclosure date.\\n\\n4. Remedies:\\nBreaches may result in legal actions, damages, and costs by the Company.\\n\\n5. Governing Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"APPENDIX: BREACH CONSEQUENCES\\n\\nRelated to the Agreement on [Original Agreement Date, e.g., \\\"August 15, 2023\\\"], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Notification:\\nSuspected breaches must be reported in writing by the non-breaching party.\\n\\n2. Rectification:\\nThe breaching party has [e.g., \\\"14 days\\\"] from notification to rectify, unless irreparable.\\n\\n3. Fees:\\nBreaches incur a penalty of [e.g., \\\"$10,000\\\"], aside from claimed damages.\\n\\n4. Legal Actions:\\nUnresolved or damaging breaches may lead to lega", "type": "code", "location": "/examples/doc_merge/pure_documents.json:41-42" }, - "217": { - "file_id": 6, + "211": { + "file_id": 5, "content": "This code is part of a legal document agreement. It specifies the obligations, duration, remedies for breaches, and governing law in case of any violations. It also includes details about notifying suspected breaches, rectification timelines, fees for breaches, and potential legal actions if necessary.", "type": "comment" }, - "218": { - "file_id": 6, + "212": { + "file_id": 5, "content": "l actions, including injunctive relief, damages, and legal fees.\\n\\n5. Termination:\\nRepeated or severe breaches can cause Agreement termination by the non-breaching party.\\n\\n6. Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"APPENDIX: TERMS OF CONTRACT TERMINATION\\n\\nRelated to the Agreement on [Original Agreement Date, e.g., \\\"August 15, 2023\\\"], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Termination for Breach:\\nIf either party breaches any conditions of the Agreement, the non-breaching party may terminate the Agreement immediately upon written notice to the breaching party.\\n\\n2. Termination by Notice:\\nEither party may terminate the Agreement for any reason by providing a written notice to the other party. The termination will become effective 30 days after the receipt of such notice.\\n\\n3. Obligations Upon Termination:\\nUpon termination, all rights and obligations under the Agreement will cease, except for those which by thei", "type": "code", "location": "/examples/doc_merge/pure_documents.json:42-43" }, - "219": { - "file_id": 6, + "213": { + "file_id": 5, "content": "This code defines contract termination clauses, including termination for breach, termination by notice, and obligations upon termination. It also specifies that the Agreement is governed by specific state or country laws.", "type": "comment" }, - "220": { - "file_id": 6, + "214": { + "file_id": 5, "content": "r nature should survive termination (e.g., confidentiality, liability for prior breaches, etc.).\\n\\n4. Governing Law:\\nThis Appendix, and any disputes arising from it, will be governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the original Agreement.\",\n \"APPENDIX: OBLIGATIONS UPON TERMINATION\\n\\nPertaining to the Agreement dated [Original Agreement Date, e.g., \\\"August 15, 2023\\\"], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Return of Property:\\nUpon termination, each party shall promptly return to the other all property, materials, and assets belonging to the other party, unless otherwise specified in the Agreement.\\n\\n2. Confidential Information:\\nBoth parties shall continue to abide by any confidentiality obligations set forth in the Agreement. Any confidential information must be returned or destroyed, as instructed by the owning party.\\n\\n3. Outstanding Payments:\\nAll due payments must be settled within [e.g., \\\"14 day", "type": "code", "location": "/examples/doc_merge/pure_documents.json:43-44" }, - "221": { - "file_id": 6, + "215": { + "file_id": 5, "content": "This code represents an Appendix titled \"Obligations Upon Termination\" in a legal agreement between Party A and Party B. It outlines the responsibilities of both parties, such as returning property, maintaining confidentiality, and settling outstanding payments upon termination of the agreement.", "type": "comment" }, - "222": { - "file_id": 6, + "216": { + "file_id": 5, "content": "s\\\"] of termination, as per the terms of the original Agreement.\\n\\n4. Non-Disparagement:\\nBoth parties agree not to make any derogatory or disparaging statements about the other party post-termination.\\n\\n5. Survival of Provisions:\\nAny provisions in the Agreement that, by their nature, should persist beyond termination (e.g., indemnity, liability, confidentiality) will continue to be in effect.\\n\\n6. Notifications:\\nEach party must inform their respective stakeholders, if necessary, about the termination in a manner that maintains the goodwill and reputation of both parties.\\n\\n7. Transition Assistance:\\nTo ensure a smooth transition, both parties agree to cooperate, as reasonably requested by the other, for a period of [e.g., \\\"30 days\\\"] after termination.\\n\\n8. Governing Law:\\nThis Appendix is governed by the laws of [State/Country, e.g., \\\"California\\\"], consistent with the original Agreement.\",\n \"NON-DISCLOSURE AGREEMENT (NDA)\\n\\nEffective [Date, e.g., \\\"August 15, 2023\\\"], be", "type": "code", "location": "/examples/doc_merge/pure_documents.json:44-45" }, - "223": { - "file_id": 6, + "217": { + "file_id": 5, "content": "Non-Disclosure Agreement (NDA) with termination, non-disparagement, survival of provisions, notifications, transition assistance, and governing law clauses.", "type": "comment" }, - "224": { - "file_id": 6, + "218": { + "file_id": 5, "content": "tween [Client Name], (\\\"Client\\\") and [Business Name], (\\\"Business\\\").\\n\\nPurpose:\\nProtection of confidential information exchanged due to potential collaboration.\\n\\n1. Confidentiality:\\nBusiness agrees to keep secret all Confidential Information shared by Client.\\n\\n2. Definition:\\n\\\"Confidential Information\\\" is non-public data shared by either party, excluding info that's publicly available, already known, or received without confidentiality constraints.\\n\\n3. Duration:\\nObligations last [e.g., \\\"two years\\\"] from the date of disclosure.\\n\\n4. Return/Destruction:\\nUpon Client's request, Business will return or destroy all Confidential Information.\\n\\n5. Remedies:\\nUnauthorized disclosures may lead to legal action by Client, including damages.\\n\\n6. Law:\\nGoverned by [State/Country, e.g., \\\"California\\\"] laws.\",\n \"IT SERVICES AGREEMENT\\n\\nEffective Date: [Date, e.g., \\\"August 15, 2023\\\"]\\n\\nParties:\\n\\n [Client Name], located at [Client Address] (\\\"Client\\\")\\n [Service Prov", "type": "code", "location": "/examples/doc_merge/pure_documents.json:45-46" }, - "225": { - "file_id": 6, + "219": { + "file_id": 5, "content": "The code is for a confidentiality agreement between a client and a business. It outlines the purpose, terms of confidentiality, definition of confidential information, duration of obligations, return/destruction process, legal remedies, and governing laws.", "type": "comment" }, - "226": { - "file_id": 6, + "220": { + "file_id": 5, "content": "ider Name], located at [Service Provider Address] (\\\"Provider\\\")\\n\\nScope of Work:\\nProvider agrees to offer IT services, including [e.g., \\\"network setup, software installation, and routine maintenance\\\"], as detailed in Attachment A.\\n\\nPayment:\\nClient shall pay Provider [e.g., \\\"$1,000\\\"] per month. Invoices will be sent monthly and are due within [e.g., \\\"30 days\\\"].\\n\\nDuration:\\nThis Agreement starts on [Start Date] and ends on [End Date], unless terminated earlier.\\n\\nTermination:\\nEither party may terminate with [e.g., \\\"30 days\\\"] written notice. Upon termination, any unpaid fees for services rendered become immediately due.\\n\\nConfidentiality:\\nBoth parties agree to keep all business and technical information confidential.\\n\\nLimitation of Liability:\\nProvider's liability is limited to the amount paid by the Client for the specific service causing damage.\\n\\nGoverning Law:\\nThis Agreement is governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\nEntire Agreement:\\nThis constitutes the full agreement between both parties.\",", "type": "code", "location": "/examples/doc_merge/pure_documents.json:46-46" }, - "227": { - "file_id": 6, + "221": { + "file_id": 5, "content": "This code is defining the basic structure and content of a service agreement between a client and a service provider, including scope of work, payment terms, duration, termination clauses, confidentiality, limitation of liability, governing law, and stating that this constitutes the full agreement between both parties.", "type": "comment" }, - "228": { - "file_id": 6, + "222": { + "file_id": 5, "content": " \"CONFIDENTIALITY AMENDMENT TO NDA\\n\\nThis Amendment, effective [Date, e.g., \\\"August 15, 2023\\\"], modifies the NDA dated [Original Agreement Date] between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n\\n1. Responsibilities:\\n\\na) Protection: Parties must safeguard Confidential Information at least as they do their own.\\n\\nb) Access: Access is limited to those needing it who are also bound by confidentiality.\\n\\nc) Breach Notification: Parties must immediately inform the other of any breaches.\\n\\nd) Return/Destruction: Upon request or agreement end, parties must return or certify the destruction of Confidential Information.\\n\\ne) No Reverse Engineering: Receiving party shall not reverse engineer any provided items.\\n\\n2. Remedies:\\nUnauthorized disclosures permit injunctive relief and other legal remedies.\\n\\n3. Original Agreement:\\nExcept for this Amendment, the NDA remains unchanged.\\n\\nGoverning Law:\\nAs per the NDA.\",\n \"LOYALTY AGREEMENT\\n\\nThis Agreement (\\\"", "type": "code", "location": "/examples/doc_merge/pure_documents.json:47-48" }, - "229": { - "file_id": 6, + "223": { + "file_id": 5, "content": "This code contains different types of legal documents including a Confidentiality Amendment to NDA and a Loyalty Agreement, which outline the terms between two parties.", "type": "comment" }, - "230": { - "file_id": 6, + "224": { + "file_id": 5, "content": "Agreement\\\") is made as of [Date, e.g., \\\"August 15, 2023\\\"], between:\\n\\n [Party A Name], with its principal office at [Party A Address] (\\\"Party A\\\"),\\n [Party B Name], with its principal office at [Party B Address] (\\\"Party B\\\").\\n\\nPurpose:\\nThe parties wish to collaborate and establish a loyal relationship in their joint business endeavors.\\n\\n1. Loyalty Commitment:\\n\\na) Both parties commit to act in good faith and refrain from engaging in any activity or partnership that might conflict with the interests of the other party during the term of this Agreement.\\n\\nb) Neither party shall assist, collaborate, or engage with third parties that may cause harm or disrepute to the other party.\\n\\nc) Each party shall prioritize the other's interests in situations where opportunities arise from their collaboration.\\n\\n2. Non-Solicitation:\\nDuring the term of this Agreement, and for [e.g., \\\"one year\\\"] thereafter, neither party shall solicit or attempt to entice away any clients, cust", "type": "code", "location": "/examples/doc_merge/pure_documents.json:48-48" }, - "231": { - "file_id": 6, + "225": { + "file_id": 5, "content": "This code defines a contractual agreement between two parties (Party A and Party B) for collaboration, loyalty commitment, and non-solicitation. The agreement is made as of a specific date and aims to establish a loyal relationship in their joint business endeavors.", "type": "comment" }, - "232": { - "file_id": 6, + "226": { + "file_id": 5, "content": "omers, or employees of the other party.\\n\\n3. Duration:\\nThis Agreement will begin on the Effective Date and remain in effect for [e.g., \\\"two years\\\"] unless terminated earlier by mutual consent.\\n\\n4. Termination:\\nEither party may terminate this Agreement with [e.g., \\\"30 days\\\"] written notice if the other party breaches any term herein.\\n\\n5. Confidentiality:\\nBoth parties agree to maintain the confidentiality of all proprietary or non-public information obtained during the collaboration.\\n\\n6. Governing Law:\\nThis Agreement is governed by the laws of [State/Country, e.g., \\\"California\\\"].\\n\\n7. Entire Agreement:\\nThis document constitutes the full understanding between both parties, superseding all prior discussions, agreements, or understandings.\",\n \"BUSINESS CONSULTING CONTRACT\\n\\nThis Consulting Contract (\\\"Contract\\\") is made as of [Date, e.g., \\\"August 15, 2023\\\"], between:\\n\\n [Client Name], with its principal office at [Client Address] (\\\"Client\\\"),\\n [Consultant N", "type": "code", "location": "/examples/doc_merge/pure_documents.json:48-49" }, - "233": { - "file_id": 6, + "227": { + "file_id": 5, "content": "This is a business consulting contract between Client and Consultant. It includes clauses for scope of work, payment terms, duration, termination, confidentiality, governing law, and entire agreement.", "type": "comment" }, - "234": { - "file_id": 6, + "228": { + "file_id": 5, "content": "ame], with its principal office at [Consultant Address] (\\\"Consultant\\\").\\n\\nPurpose:\\nThe Consultant will provide professional consulting services to the Client as described below.\\n\\n1. Scope of Services:\\nConsultant agrees to offer services including, but not limited to:\\na) Business strategy development\\nb) Market analysis\\nc) [Other services as needed]\\nAny additional services will require an amendment to this Contract.\\n\\n2. Compensation:\\nFor services rendered, the Client shall pay the Consultant [e.g., \\\"$100\\\"] per hour. Invoices will be issued [e.g., \\\"monthly\\\"] and are due within [e.g., \\\"30 days\\\"] of receipt.\\n\\n3. Duration:\\nThis Contract begins on [Start Date] and ends on [End Date], unless extended by mutual agreement or terminated earlier.\\n\\n4. Termination:\\nEither party can terminate this Contract with [e.g., \\\"30 days\\\"] written notice. In case of termination, the Client will pay for services rendered up to the notice date.\\n\\n5. Confidentiality:\\nThe Consultant sh", "type": "code", "location": "/examples/doc_merge/pure_documents.json:49-49" }, - "235": { - "file_id": 6, + "229": { + "file_id": 5, "content": "Consulting contract between Client and Consultant, outlines services provided, compensation terms, duration, termination conditions, and confidentiality agreement.", "type": "comment" }, - "236": { - "file_id": 6, + "230": { + "file_id": 5, "content": "all maintain the confidentiality of all proprietary information received during the engagement, unless obligated by law to disclose.\\n\\n6. Non-compete:\\nFor [e.g., \\\"six months\\\"] after Contract termination, the Consultant agrees not to provide similar services to any direct competitor of the Client within [e.g., \\\"50 miles\\\"] of the Client's primary location.\\n\\n7. Independent Contractor:\\nThe Consultant is an independent contractor and not an employee of the Client.\\n\\n8. Governing Law:\\nThis Contract shall be governed by and interpreted under the laws of [State/Country, e.g., \\\"California\\\"].\\n\\n9. Entire Agreement:\\nThis Contract represents the entire understanding between both parties, superseding all prior negotiations, discussions, or agreements.\",\n \"APPENDIX A: CONFIDENTIALITY BREACH FEES\\n\\nThis Appendix is attached to and made part of the Contract (\\\"Original Contract\\\") dated [Original Contract Date], between [Party A Name] (\\\"Party A\\\") and [Party B Name] (\\\"Party B\\\").\\n", "type": "code", "location": "/examples/doc_merge/pure_documents.json:49-50" }, - "237": { - "file_id": 6, + "231": { + "file_id": 5, "content": "This code snippet represents an agreement between two parties, Party A and Party B, with sections covering confidentiality, non-compete clauses, independent contractor status, governing law, and entire agreement. It also references Appendix A regarding confidentiality breach fees.", "type": "comment" }, - "238": { - "file_id": 6, + "232": { + "file_id": 5, "content": "\\n1. Purpose:\\nThis Appendix defines the fees and penalties associated with any breach of confidentiality as stipulated in the Original Contract.\\n\\n2. Confidentiality Breach Fee:\\nIn the event of a breach of the confidentiality provisions in the Original Contract by either party:\\n\\na) The breaching party will be liable for an immediate penalty of [specific amount, e.g., \\\"$10,000\\\"].\\n\\nb) If the breach results in any direct financial loss to the non-breaching party, the breaching party shall additionally reimburse the non-breaching party for the full amount of such loss.\\n\\nc) The breaching party will also bear all costs, including legal fees, that the non-breaching party incurs while addressing or remedying the breach.\\n\\n3. Payment Terms:\\nPayment of any penalty or reimbursement as defined above shall be made within [e.g., \\\"30 days\\\"] of written notification of the breach.\\n\\n4. Disputes:\\nAny disputes related to this Appendix shall be resolved as stipulated in the dispute resolu", "type": "code", "location": "/examples/doc_merge/pure_documents.json:50-50" }, - "239": { - "file_id": 6, + "233": { + "file_id": 5, "content": "This code defines the fees and penalties for breaching confidentiality in the Original Contract, including immediate penalty amounts, reimbursement for direct financial losses, and coverage of legal fees and costs. Payment terms are outlined as well, with disputes to be resolved according to the dispute resolution stipulations in the contract.", "type": "comment" }, - "240": { - "file_id": 6, + "234": { + "file_id": 5, "content": "tion clause of the Original Contract.\\n\\n5. Continuation of Original Contract:\\nExcept as modified by this Appendix, the Original Contract remains in full force and effect.\\n\\n6. Governing Law:\\nThis Appendix, consistent with the Original Contract, is governed by the laws of [State/Country, e.g., \\\"California\\\"].\",\n \"APPENDIX A: STRICT CONFIDENTIALITY BREACH PENALTIES\\n\\nThis Appendix is annexed to the Contract dated [Original Contract Date] between [Party A Name] and [Party B Name].\\n\\n1. Breach Fees:\\nIf a party breaches confidentiality:\\n\\na) Immediate penalty: [e.g., \\\"$50,000\\\"].\\n\\nb) For reputational harm or business loss: Additional [e.g., \\\"$100,000\\\"].\\n\\nc) Full reimbursement for direct financial losses caused by the breach.\\n\\nd) All associated legal and remedy costs borne by the breaching party.\\n\\n2. Remedial Actions:\\nThe breaching party must swiftly rectify the breach, potentially including public apologies or recalling disclosed information.\\n\\n3. Payment:\\nDue withi", "type": "code", "location": "/examples/doc_merge/pure_documents.json:50-51" }, - "241": { - "file_id": 6, + "235": { + "file_id": 5, "content": "This code represents a legal document, specifically an appendix to a contract, which outlines strict confidentiality breach penalties. The appendix is attached to the Contract dated [Original Contract Date] between [Party A Name] and [Party B Name]. It states the immediate penalty upon breaching confidentiality, potential additional fees for reputational harm or business loss, full reimbursement for direct financial losses caused by the breach, all associated legal and remedy costs to be borne by the breaching party, and that the breaching party must swiftly rectify the breach. Payment is due within a certain period.", "type": "comment" }, - "242": { - "file_id": 6, + "236": { + "file_id": 5, "content": "n [e.g., \\\"15 days\\\"] of breach notification.\\n\\n4. Termination:\\nNon-breaching party can immediately terminate the main contract upon a breach.\\n\\n5. Governing Law:\\nThis Appendix adheres to [State/Country, e.g., \\\"California\\\"] laws.\"\n]", "type": "code", "location": "/examples/doc_merge/pure_documents.json:51-52" }, - "243": { - "file_id": 6, + "237": { + "file_id": 5, "content": "Code snippet outlines contractual terms for a breach notification, termination clause, and governing law.", "type": "comment" }, - "244": { - "file_id": 7, + "238": { + "file_id": 6, "content": "/examples/keyword_counting/README.md", "type": "filepath" }, - "245": { - "file_id": 7, + "239": { + "file_id": 6, "content": "The code offers a frequency computation method for countries in text using seven approaches, utilizing 'countries.csv' as input, and allows for custom samples, budgets, and directory creation with log files.", "type": "summary" }, - "246": { - "file_id": 7, + "240": { + "file_id": 6, "content": "# Keyword Counting\nThe use case in this directory computes the frequencies of occurring countries \nin a long passage of text. We provide implementations of seven different approaches:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT):\n - ToT: wider tree, meaning more branches per level\n - ToT2: tree with more levels, but fewer branches per level\n- Graph of Thoughts (GoT):\n - GoT4: split passage into 4 sub-passages\n - GoT8: split passage into 8 sub-passages\n - GoTx: split by sentences\n## Data\nWe provide an input file with 100 samples: `countries.csv`. It is also possible to use\nthe data generator `dataset_gen_countries.py` to generate additional or\ndifferent samples (using GPT-4). The parameters can be updated on line 54 (number of samples to be generated). \nNote that not every generated sample will be included in the dataset, as each sample is \nadditionally tested for validity (observe script output for details).\n## Execution\nThe file to execute the use case is called\n`keyword_counting.py`. In the main body, one can", "type": "code", "location": "/examples/keyword_counting/README.md:1-26" }, - "247": { - "file_id": 7, + "241": { + "file_id": 6, "content": "This code provides a use case for computing the frequencies of occurring countries in a long passage of text using seven different approaches including IO, Chain-of-Thought (CoT), Tree of Thought (ToT) with variations, and Graph of Thoughts (GoT) with variations. It uses an input file named 'countries.csv' and provides a data generator for additional or different samples. The code to execute the use case is called 'keyword_counting.py'.", "type": "comment" }, - "248": { - "file_id": 7, + "242": { + "file_id": 6, "content": "select the specific samples to be run (variable samples) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are\ncreated. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 150 of `plot.py` and run `python3\nplot.py` to plot your data.", "type": "code", "location": "/examples/keyword_counting/README.md:27-45" }, - "249": { - "file_id": 7, + "243": { + "file_id": 6, "content": "The code selects specific samples, approaches, and sets a budget for running Python scripts. It creates directories for each run with `config.json` and `log.log` files containing LLM prompts/responses and GRS data for samples. Change the results directory in line 150 of `plot.py` to plot data by running `python3 plot.py`.", "type": "comment" }, - "250": { - "file_id": 8, + "244": { + "file_id": 7, "content": "/examples/keyword_counting/dataset_gen_countries.py", "type": "filepath" }, - "251": { - "file_id": 8, + "245": { + "file_id": 7, "content": "The code generates a language model dataset by organizing country occurrences into popular and rest categories, cleaning paragraphs, checking for invalid elements, and storing the result in CSV format.", "type": "summary" }, - "252": { - "file_id": 8, + "246": { + "file_id": 7, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Ales Kubicek\nimport csv\nfrom typing import List, Tuple\nfrom graph_of_thoughts import controller\ndef find_country_indices(text: str, country: str) -> List[Tuple[int, str]]:\n \"\"\"\n Finds the indices of the occurences of a given country in the input text.\n :param text: Input text.\n :type text: str\n :param country: Country to search for.\n :type country: str\n :return: List of tuples, where each tuple consists of index and country.\n :rtype: List[Tuple[int, str]]\n \"\"\"\n indices = []\n index = text.find(country)\n while index != -1:\n indices.append(index)\n index = text.find(country, index + 1)\n return [(index, country) for index in indices]\nprimary_countries = [\n \"Afghanistan\",\n \"Argentina\",\n \"Australia\",\n \"Brazil\",\n \"Canada\",\n \"China\",\n \"Colombia\",\n \"Cuba\",\n \"Egypt\",", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:1-43" }, - "253": { - "file_id": 8, + "247": { + "file_id": 7, "content": "This function finds the indices of occurrences of a given country in an input text and returns them as a list of tuples containing index and country. The primary_countries variable is a list of countries used in the dataset.", "type": "comment" }, - "254": { - "file_id": 8, + "248": { + "file_id": 7, "content": " \"France\",\n \"Germany\",\n \"Greece\",\n \"India\",\n \"Indonesia\",\n \"Iran\",\n \"Iraq\",\n \"Ireland\",\n \"Israel\",\n \"Italy\",\n \"Japan\",\n \"Kenya\",\n \"Mexico\",\n \"Netherlands\",\n \"New Zealand\",\n \"Nigeria\",\n \"North Korea\",\n \"Pakistan\",\n \"Peru\",\n \"Philippines\",\n \"Poland\",\n \"Portugal\",\n \"Russia\",\n \"Saudi Arabia\",\n \"South Africa\",\n \"South Korea\",\n \"Spain\",\n \"Sweden\",\n \"Switzerland\",\n \"Thailand\",\n \"Turkey\",\n \"Ukraine\",\n \"United Arab Emirates\",\n \"United Kingdom\",\n \"United States\",\n \"Venezuela\",\n \"Vietnam\",\n \"Yemen\",\n \"Zimbabwe\",\n \"Belgium\",\n \"Norway\",\n]\nprimary_adjectives = [\n \"Afghan\",\n \"Argentine \",\n \"Argentinean\",\n \"Australian\",\n \"Brazilian\",\n \"Canadian\",\n \"Chinese\",\n \"Colombian\",\n \"Cuban\",\n \"Egyptian\",\n \"French\",\n \"German\",\n \"Greek\",\n \"Indian\",\n \"Indonesian\",\n \"Iranian\",\n \"Iraqi\",\n \"Irish\",\n \"Israeli\",\n \"Italian\",\n \"Japanese\",\n \"Kenyan\",\n \"Mexican\",\n \"Dutch\",\n \"New Zealander \",\n \"Kiwi\",", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:44-112" }, - "255": { - "file_id": 8, + "249": { + "file_id": 7, "content": "This code contains lists of countries and their corresponding primary adjectives. The countries list includes 46 nations, while the adjectives list has 28 items. These data can be used for keyword counting or other text processing tasks related to country-specific information.", "type": "comment" }, - "256": { - "file_id": 8, + "250": { + "file_id": 7, "content": " \"Nigerian\",\n \"North Korean\",\n \"Pakistani\",\n \"Peruvian\",\n \"Filipino\",\n \"Philippine\",\n \"Polish\",\n \"Portuguese\",\n \"Russian\",\n \"Saudi \",\n \"Saudi Arabian\",\n \"South African\",\n \"South Korean\",\n \"Spanish\",\n \"Swedish\",\n \"Swiss\",\n \"Thai\",\n \"Turkish\",\n \"Ukrainian\",\n \"United Arab Emirates\",\n \"Emirati\",\n \"British\",\n \"American\",\n \"Venezuelan\",\n \"Vietnamese\",\n \"Yemeni\",\n \"Zimbabwean\",\n \"Belgian\",\n \"Norwegian\",\n]\nrest_countries = [\n \"Albania\",\n \"Algeria\",\n \"Andorra\",\n \"Angola\",\n \"Antigua and Barbuda\",\n \"Armenia\",\n \"Austria\",\n \"Azerbaijan\",\n \"The Bahamas\",\n \"Bahrain\",\n \"Bangladesh\",\n \"Barbados\",\n \"Belarus\",\n \"Belize\",\n \"Benin\",\n \"Bhutan\",\n \"Bolivia\",\n \"Bosnia and Herzegovina\",\n \"Botswana\",\n \"Brunei\",\n \"Bulgaria\",\n \"Burkina Faso\",\n \"Burundi\",\n \"Cabo Verde\",\n \"Cambodia\",\n \"Cameroon\",\n \"Central African Republic\",\n \"Chad\",\n \"Chile\",\n \"Comoros\",\n \"Congo\",\n \"Costa Rica\",\n \"Côte d’Ivoire\",", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:113-176" }, - "257": { - "file_id": 8, + "251": { + "file_id": 7, "content": "The code provides a list of countries divided into two sections: \"popular_countries\" containing widely recognized nations, and \"rest_countries\" containing the remaining countries. It appears to be used for organizing or filtering country data in an application or dataset.", "type": "comment" }, - "258": { - "file_id": 8, + "252": { + "file_id": 7, "content": " \"Croatia\",\n \"Cyprus\",\n \"Czech Republic\",\n \"Czechia\",\n \"Denmark\",\n \"Djibouti\",\n \"Dominica\",\n \"Dominican Republic\",\n \"East Timor\",\n \"Timor-Leste\",\n \"Ecuador\",\n \"El Salvador\",\n \"Equatorial Guinea\",\n \"Eritrea\",\n \"Estonia\",\n \"Eswatini\",\n \"Ethiopia\",\n \"Fiji\",\n \"Finland\",\n \"Gabon\",\n \"The Gambia\",\n \"Georgia\",\n \"Ghana\",\n \"Grenada\",\n \"Guatemala\",\n \"Guinea\",\n \"Guinea-Bissau\",\n \"Guyana\",\n \"Haiti\",\n \"Honduras\",\n \"Hungary\",\n \"Iceland\",\n \"Jamaica\",\n \"Jordan\",\n \"Kazakhstan\",\n \"Kiribati\",\n \"Kosovo\",\n \"Kuwait\",\n \"Kyrgyzstan\",\n \"Laos\",\n \"Latvia\",\n \"Lebanon\",\n \"Lesotho\",\n \"Liberia\",\n \"Libya\",\n \"Liechtenstein\",\n \"Lithuania\",\n \"Luxembourg\",\n \"Madagascar\",\n \"Malawi\",\n \"Malaysia\",\n \"Maldives\",\n \"Mali\",\n \"Malta\",\n \"Marshall Islands\",\n \"Mauritania\",\n \"Mauritius\",\n \"Micronesia\",\n \"Moldova\",\n \"Monaco\",\n \"Mongolia\",\n \"Montenegro\",\n \"Morocco\",\n \"Mozambique\",\n \"Myanmar\",\n \"Burma\",\n \"Namibia\",", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:177-243" }, - "259": { - "file_id": 8, + "253": { + "file_id": 7, "content": "The code includes a list of country names in alphabetical order. Each country name is separated by a comma, and some countries have multiple names listed for different uses or recognition.", "type": "comment" }, - "260": { - "file_id": 8, + "254": { + "file_id": 7, "content": " \"Nauru\",\n \"Nepal\",\n \"Nicaragua\",\n \"Niger\",\n \"North Macedonia\",\n \"Oman\",\n \"Palau\",\n \"Panama\",\n \"Papua New Guinea\",\n \"Paraguay\",\n \"Qatar\",\n \"Romania\",\n \"Rwanda\",\n \"Saint Kitts and Nevis\",\n \"Saint Lucia\",\n \"Saint Vincent and the Grenadines\",\n \"Samoa\",\n \"San Marino\",\n \"Sao Tome and Principe\",\n \"Senegal\",\n \"Serbia\",\n \"Seychelles\",\n \"Sierra Leone\",\n \"Singapore\",\n \"Slovakia\",\n \"Slovenia\",\n \"Solomon Islands\",\n \"Somalia\",\n \"Sri Lanka\",\n \"Sudan\",\n \"Suriname\",\n \"Syria\",\n \"Taiwan\",\n \"Tajikistan\",\n \"Tanzania\",\n \"Togo\",\n \"Tonga\",\n \"Trinidad and Tobago\",\n \"Tunisia\",\n \"Turkmenistan\",\n \"Tuvalu\",\n \"Uganda\",\n \"Uruguay\",\n \"Uzbekistan\",\n \"Vanuatu\",\n \"Vatican City\",\n \"Zambia\",\n]\nrest_adjectives = [\n \"Albanian\",\n \"Algerian\",\n \"Andorran\",\n \"Angolan\",\n \"Antiguan and Barbudan\",\n \"Armenian\",\n \"Austrian\",\n \"Azerbaijani\",\n \"Bahamian\",\n \"Bahraini\",\n \"Bangladeshi\",\n \"Barbadian\",\n \"Belarusian\",\n \"Belizean\",", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:244-306" }, - "261": { - "file_id": 8, + "255": { + "file_id": 7, "content": "This code defines a list of countries and their corresponding adjectives, which can be used to generate diverse language datasets.", "type": "comment" }, - "262": { - "file_id": 8, + "256": { + "file_id": 7, "content": " \"Beninese\",\n \"Bhutanese\",\n \"Bolivian\",\n \"Bosnian and Herzegovinian\",\n \"Botswanan\",\n \"Bruneian\",\n \"Bulgarian\",\n \"Burkinabè\",\n \"Burundian\",\n \"Cape Verdean\",\n \"Cambodian\",\n \"Cameroonian\",\n \"Central African\",\n \"Chadian\",\n \"Chilean\",\n \"Comorian\",\n \"Congolese\",\n \"Costa Rican\",\n \"Ivorian\",\n \"Croatian\",\n \"Cypriot\",\n \"Czech\",\n \"Czech\",\n \"Danish\",\n \"Djiboutian\",\n \"Dominican\",\n \"Dominican\",\n \"East Timorese\",\n \"Timorese\",\n \"Ecuadorian\",\n \"Salvadoran\",\n \"Equatorial Guinean\",\n \"Eritrean\",\n \"Estonian\",\n \"Swazi\",\n \"Ethiopian\",\n \"Fijian\",\n \"Finnish\",\n \"Gabonese\",\n \"Gambian\",\n \"Georgian\",\n \"Ghanaian\",\n \"Grenadian\",\n \"Guatemalan\",\n \"Guinean\",\n \"Bissau-Guinean\",\n \"Guyanese\",\n \"Haitian\",\n \"Honduran\",\n \"Hungarian\",\n \"Icelandic\",\n \"Jamaican\",\n \"Jordanian\",\n \"Kazakh\",\n \"I-Kiribati\",\n \"Kosovar\",\n \"Kuwaiti\",\n \"Kyrgyz\",\n \"Laotian\",\n \"Latvian\",\n \"Lebanese\",\n \"Basotho\",\n \"Liberian\",\n \"Libyan\",", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:307-370" }, - "263": { - "file_id": 8, + "257": { + "file_id": 7, "content": "This code lists various country names and their corresponding adjective forms, used for identifying nationality or origin.", "type": "comment" }, - "264": { - "file_id": 8, + "258": { + "file_id": 7, "content": " \"Liechtensteiner\",\n \"Lithuanian\",\n \"Luxembourger\",\n \"Malagasy\",\n \"Malawian\",\n \"Malaysian\",\n \"Maldivian\",\n \"Malian\",\n \"Maltese\",\n \"Marshallese\",\n \"Mauritanian\",\n \"Mauritian\",\n \"Micronesian\",\n \"Moldovan\",\n \"Monégasque\",\n \"Mongolian\",\n \"Montenegrin\",\n \"Moroccan\",\n \"Mozambican\",\n \"Myanmarese\",\n \"Burmese\",\n \"Namibian\",\n \"Nauruan\",\n \"Nepali\",\n \"Nicaraguan\",\n \"Nigerien\",\n \"Macedonian\",\n \"Omani\",\n \"Palauan\",\n \"Panamanian\",\n \"Papua New Guinean\",\n \"Paraguayan\",\n \"Qatari\",\n \"Romanian\",\n \"Rwandan\",\n \"Kittitian\",\n \"Nevisian\",\n \"Saint Lucian\",\n \"Vincentian\",\n \"Samoan\",\n \"Sammarinese\",\n \"Santomean\",\n \"Senegalese\",\n \"Serbian\",\n \"Seychellois\",\n \"Sierra Leonean\",\n \"Singaporean\",\n \"Slovak\",\n \"Slovenian\",\n \"Solomon Islander\",\n \"Somali\",\n \"Sri Lankan\",\n \"Sudanese\",\n \"Surinamese\",\n \"Syrian\",\n \"Taiwanese\",\n \"Tajik\",\n \"Tanzanian\",\n \"Togolese\",\n \"Tongan\",\n \"Trinidadian \",\n \"Tobagonian\",\n \"Tunisian\",", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:371-433" }, - "265": { - "file_id": 8, + "259": { + "file_id": 7, "content": "This code defines a list of country names and their associated adjectival forms, used for keyword counting in a dataset.", "type": "comment" }, - "266": { - "file_id": 8, + "260": { + "file_id": 7, "content": " \"Turkmen\",\n \"Tuvaluan\",\n \"Ugandan\",\n \"Uruguayan\",\n \"Uzbek\",\n \"Ni-Vanuatu\",\n \"Vatican\",\n \"Zambian\",\n]\nlm = controller.ChatGPT(\n \"../../graph_of_thoughts/controller/config.json\", model_name=\"chatgpt4\"\n)\nprompt = \"\"\" Generate a continuous passage (single paragraph) of 16 sentences following the provided restrictions precisely. \n\nThe following restrictions must apply to the generated text:\n1. Single continuous passage of exactly 16 sentences without any paragraphs (line breaks).\n2. Countries appearing in the passage must be only from the provided list. No other countries can be mentioned.\n3. When a country is mentioned in the passage, it must be mentioned multiple times consecutively in the same or following sentences.\n4. Passage should be creative and coherent.\n5. Using adjectives of a country is NOT allowed (e.g., \"Colombian coffee\" should be \"coffee from Colombia\" instead)\n\n\nList of countries: [Afghanistan, Argentina", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:434-460" }, - "267": { - "file_id": 8, + "261": { + "file_id": 7, "content": "This code generates a prompt for an AI language model to create a continuous passage with 16 sentences using a provided list of countries and specific restrictions. The generated text should mention the countries multiple times consecutively, be creative and coherent, and avoid using adjectives for the countries.", "type": "comment" }, - "268": { - "file_id": 8, + "262": { + "file_id": 7, "content": ", Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway]\nPassage:\nWhile exploring the ancient ruins in Greece, Sam discovered manuscripts that hinted at the hidden treasures of Egypt. It seemed these treasures were once stolen from Egypt by rogue merchants and secretly moved to Greece, only to be buried under layers of time. Intrigued, he shared the findings with his friend Maya from India, who was an expert in decoding ancient languages. She pointed out that there was a similar legend in India about treasures from China that had somehow ended up in the southern parts of India, possibly", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:460-462" }, - "269": { - "file_id": 8, + "263": { + "file_id": 7, "content": "The code defines a list containing the names of countries. These country names are used in various parts of the program to handle data related to specific countries.", "type": "comment" }, - "270": { - "file_id": 8, + "264": { + "file_id": 7, "content": " through trade or conquest. She also recounted tales from China that spoke of incredible artifacts from Indonesia, suggesting a rich tapestry of cultural exchanges throughout history. Their conversation took an interesting turn when Sam mentioned a book he'd read about the mysterious connections between Argentina and Brazil. The book detailed how both Argentina and Brazil, despite their differences, shared tales of lost civilizations and forgotten cities deep within their jungles. Maya excitedly mentioned that she'd been to the Philippines and had heard local legends about ancient ties with Indonesia and how traders from the Philippines would journey to Indonesia in search of spices and other goods. Thinking of spices, Sam fondly recalled his trip to Spain, where he had learned about the country's historical links with Portugal. Spain and Portugal, both maritime giants of their time, had extensively explored unknown lands and established trade routes. Maya, remembering her travels, sai", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:462-462" }, - "271": { - "file_id": 8, + "265": { + "file_id": 7, "content": "Code snippet describes a conversation between Sam and Maya discussing historical connections between different countries through trade and cultural exchanges.", "type": "comment" }, - "272": { - "file_id": 8, + "266": { + "file_id": 7, "content": "d that she had been to Belgium once and was fascinated by its connections with the Netherlands. Both Belgium and the Netherlands, she explained, had rich histories of art, trade, and diplomacy that intertwined them for centuries. They both sat back, marveling at the interconnectedness of the world and how countries from Greece to the Netherlands shared tales of adventure, discovery, and mystery.\n\nList of countries: [Afghanistan, Argentina, Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway]\nPassage:\n\"\"\"\nnum_samples = 100\nsample_id = 0\nresult = [[\"ID\", \"Text\", \"Countries\", \"Sentences\", \"Characters\"]]", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:462-471" }, - "273": { - "file_id": 8, + "267": { + "file_id": 7, "content": "This code generates a dataset of samples, where each sample contains an ID, text, list of countries mentioned, number of sentences, and number of characters. It will generate 100 samples with incrementing IDs. The provided list of countries serves as the pool from which countries will be randomly selected for each sample's text.", "type": "comment" }, - "274": { - "file_id": 8, + "268": { + "file_id": 7, "content": "\"\"\"\nGenerate passages of text that contain country names to be used as input for the\nkeyword counting.\nInput(x) : Number of samples\nOutput(y) : Passages written to a file in the CSV format.\n File contains the sample ID, the passage, the countries the passage\n contains, the sentences of the passages, number of characters of the\n passage.\n\"\"\"\n# For x batches of y responses\nfor _ in range(num_samples):\n response = lm.query(prompt, 1)\n texts = lm.get_response_texts(response)\n for text in texts:\n # Clean paragraphs - single long passage\n text = text.strip().replace(\"\\n\", \"\")\n # Get all occurrences of all primary permissible countries\n occurrences = []\n for country in [country for country in primary_countries if country in text]:\n occurrences.extend(find_country_indices(text, country))\n # Order exactly how they appear in the text\n ordered_occurrences = [country[1] for country in sorted(occurrences)]\n # Check invalid countries and adjectives", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:473-499" }, - "275": { - "file_id": 8, + "269": { + "file_id": 7, "content": "This code generates passages containing country names for keyword counting. It iterates through a given number of samples, queries the language model (lm) for responses, cleans paragraphs by removing newlines and extra spaces, finds all occurrences of primary countries in each text, orders them based on their appearance in the text, and checks for invalid countries or adjectives.", "type": "comment" }, - "276": { - "file_id": 8, + "270": { + "file_id": 7, "content": " invalid_primary_adjective = [\n adjective for adjective in primary_adjectives if adjective in text\n ]\n invalid_rest_country = [\n country for country in rest_countries if country in text\n ]\n invalid_rest_adjective = [\n adjective for adjective in rest_adjectives if adjective in text\n ]\n invalid_count = (\n len(invalid_primary_adjective)\n + len(invalid_rest_country)\n + len(invalid_rest_adjective)\n )\n if invalid_count > 0:\n print(\n f\"Invalid countries or adjectives present: {invalid_primary_adjective}, {invalid_rest_country}, {invalid_rest_adjective}\"\n )\n continue\n result.append(\n [\n sample_id,\n text,\n \"[{0}]\".format(\", \".join(map(str, ordered_occurrences))),\n len(text.split(\".\")) - 1,\n len(text),\n ]\n )\n sample_id += 1\n# Writing to csv file", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:500-532" }, - "277": { - "file_id": 8, + "271": { + "file_id": 7, "content": "This code segment checks for invalid primary adjectives, rest countries, and rest adjectives in the text. It counts their occurrences, and if any of them are present, it prints a message with details about the invalid elements found. If there are no invalid elements, it adds the sample (with its ID, text, ordered occurrences, number of sentences, and total length) to the result list. The code continues to the next iteration, and after processing all samples, it will write the final result to a CSV file.", "type": "comment" }, - "278": { - "file_id": 8, + "272": { + "file_id": 7, "content": "with open(\"countries_script.csv\", \"w\") as csvfile:\n csvwriter = csv.writer(csvfile)\n csvwriter.writerows(result)", "type": "code", "location": "/examples/keyword_counting/dataset_gen_countries.py:533-535" }, - "279": { - "file_id": 8, + "273": { + "file_id": 7, "content": "This code writes the result to a CSV file named \"countries_script.csv\". It opens the file in write mode (\"w\"), creates a CSV writer object, and uses the writerows() method to write each row of the result variable to the CSV file.", "type": "comment" }, - "280": { - "file_id": 9, + "274": { + "file_id": 8, "content": "/examples/keyword_counting/plot.py", "type": "filepath" }, - "281": { - "file_id": 9, + "275": { + "file_id": 8, "content": "This code retrieves JSON data, organizes it in a dictionary and plots results using boxplots and bar charts with customizable titles. It also sets y-axis limits, handles missing results and displays solved values.", "type": "summary" }, - "282": { - "file_id": 9, + "276": { + "file_id": 8, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Ales Kubicek\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():", "type": "code", "location": "/examples/keyword_counting/plot.py:1-29" }, - "283": { - "file_id": 9, + "277": { + "file_id": 8, "content": "This code retrieves complete results from a given base directory, iterating through each folder and file. It collects JSON data from specified .json files, stores them in the \"results_complete\" dictionary with corresponding key and appends the data to its value.", "type": "comment" }, - "284": { - "file_id": 9, + "278": { + "file_id": 8, "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]", "type": "code", "location": "/examples/keyword_counting/plot.py:30-58" }, - "285": { - "file_id": 9, + "279": { + "file_id": 8, "content": "The code sorts the results dictionary by key, then retrieves final scores for each method in the results_complete dictionary. It appends a list of scores (including score, solved status, prompt tokens, completion tokens, and cost) to the corresponding method in the scores dictionary.", "type": "comment" }, - "286": { - "file_id": 9, + "280": { + "file_id": 8, "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"got4\", \"got8\", \"gotx\"],\n model=\"GPT-3.5\",\n y_lower=0,\n y_upper=40,\n cost_upper=1.8,\n display_solved=True,\n annotation_offset=1,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n scores_ordered = [\n [score for score in results[method][\"scores\"] if score != 100 and score != 300]", "type": "code", "location": "/examples/keyword_counting/plot.py:59-93" }, - "287": { - "file_id": 9, + "281": { + "file_id": 8, "content": "This code retrieves and prepares data for plotting keyword counting results. It first gets complete results from a specified base directory, then extracts final scores. The data is then organized into a dictionary format for plotting. The function `plot_results` takes this data, along with optional parameters to adjust the visualization. The code filters out irrelevant scores and orders them based on the input order.", "type": "comment" }, - "288": { - "file_id": 9, + "282": { + "file_id": 8, "content": " for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n fig, ax = plt.subplots(dpi=150, figsize=(3.75, 4))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n methods_labels = [\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT4\", \"GoT8\", \"GoTx\"]\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(methods_labels, fontsize=10)\n ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1)\n plt.yticks(fontsize=fig_fontsize)\n if display_left_ylabel:\n ax.set_ylabel(f\"Number of errors; the lower the better\", fontsize=fig_fontsize)\n ax.set_title(f\"Keyword Counting\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)", "type": "code", "location": "/examples/keyword_counting/plot.py:94-122" }, - "289": { - "file_id": 9, + "283": { + "file_id": 8, "content": "This code generates a boxplot of keyword counting results and adds a bar chart of total costs to the same axes. It uses the matplotlib library for plotting, sets tick and label positions, and allows for customization of y-axis labels and title. The total costs are calculated by summing the \"costs\" values from the \"results\" dictionary for each method in a specified order.", "type": "comment" }, - "290": { - "file_id": 9, + "284": { + "file_id": 8, "content": " ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]\n ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"keyword_counting_{model}.pdf\", bbox_inches=\"tight\")\nplot_results(", "type": "code", "location": "/examples/keyword_counting/plot.py:123-158" }, - "291": { - "file_id": 9, + "285": { + "file_id": 8, "content": "This code is setting the y-axis limits and ticks for a graph, adding annotations for solved solutions, labeling the y-axis, and saving the figure with a specific file name. It also handles missing results by continuing to the next method in case one is not available. The purpose of this code is likely related to plotting a graph that compares different methods or models based on their performance (cost) and whether they solved the problem or not.", "type": "comment" }, - "292": { - "file_id": 9, + "286": { + "file_id": 8, "content": " get_plotting_data(\"results/\"),\n display_solved=True,\n annotation_offset=-0.3,\n model=\"GPT-3.5\",\n y_upper=35,\n display_left_ylabel=True,\n display_right_ylabel=True,\n cost_upper=9,\n)", "type": "code", "location": "/examples/keyword_counting/plot.py:159-167" }, - "293": { - "file_id": 9, + "287": { + "file_id": 8, "content": "This code is calling a function named 'get_plotting_data' to retrieve data from the \"results/\" directory and generate a plot. The model used for this task is \"GPT-3.5\". The y-axis has an upper limit of 35, and the cost axis has an upper limit of 9. The function will display solved values on the plot and show left and right y-labels.", "type": "comment" }, - "294": { - "file_id": 10, + "288": { + "file_id": 9, "content": "/examples/set_intersection/README.md", "type": "filepath" }, - "295": { - "file_id": 10, + "289": { + "file_id": 9, "content": "The code provides a data generator for set intersections and allows users to customize parameters, storing results in JSON files. To visualize the data, modify the results directory and length parameter in `plot.py` before executing the script.", "type": "summary" }, - "296": { - "file_id": 10, + "290": { + "file_id": 9, "content": "# Set Intersection\nThe use case in this directory computes the intersection of two input\nsets. We provide implementations of five different approaches for 32, 64\nand 128 elements:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT):\n - ToT: wider tree, meaning more branches per level\n - ToT2: tree with more levels, but fewer branches per level\n- Graph of Thoughts (GoT)\n## Data\nWe provide input files with 100 precomputed samples for each set length:\n`set_intersection_.csv`. It is also possible to use\nthe data generator `dataset_gen_intersection.py` to generate additional or\ndifferent samples. The parameters can be updated in lines 24 to 28 of\nthe main body:\n- set_size = 32 # size of the generated sets\n- int_value_ubound = 64 # (exclusive) upper limit of generated numbers\n- seed = 42 # seed of the random number generator\n- num_sample = 100 # number of samples\n- filename = 'set_intersection_032.csv' # output filename\n## Execution\nThe files to execute the use case are called\n`set_intersection_.py`. In the main body, one can", "type": "code", "location": "/examples/set_intersection/README.md:1-29" }, - "297": { - "file_id": 10, + "291": { + "file_id": 9, "content": "This code provides different approaches for computing set intersection and offers a data generator to create samples. It includes IO, Chain-of-Thought (CoT), Tree of Thought (ToT) with two variations, and Graph of Thoughts (GoT). The user can specify the number of elements, upper bound, seed, number of samples, and output filename. The code is structured into separate files for each set length.", "type": "comment" }, - "298": { - "file_id": 10, + "292": { + "file_id": 9, "content": "select the specific samples to be run (variable sample) and the\napproaches (variable approaches). It is also possible to set a budget in\ndollars (variable budget).\nThe input filename for the samples is currently hardcoded to\n`set_intersection_.csv`, but can be updated in the\nfunction `run`.\nThe Python scripts will create the directory `result`, if it is not\nalready present. In the `result` directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are\ncreated. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.", "type": "code", "location": "/examples/set_intersection/README.md:30-46" }, + "293": { + "file_id": 9, + "content": "This code selects samples and approaches, allows budget setting, hardcodes input filename, creates directories for execution-specific files, and stores the Graph Reasoning State (GRS) for each sample in separate JSON files.", + "type": "comment" + }, + "294": { + "file_id": 9, + "content": "## Plot Data\nChange the results directory in line 170 of `plot.py` and update the\nlength parameter in the subsequent line and run `python3 plot.py` to\nplot your data.", + "type": "code", + "location": "/examples/set_intersection/README.md:48-52" + }, + "295": { + "file_id": 9, + "content": "This code snippet instructs the user to modify the results directory in line 170 of `plot.py` and adjust the length parameter accordingly before executing `python3 plot.py` to visualize their data.", + "type": "comment" + }, + "296": { + "file_id": 10, + "content": "/examples/set_intersection/dataset_gen_intersection.py", + "type": "filepath" + }, + "297": { + "file_id": 10, + "content": "The code defines a function \"scramble\" that shuffles array elements and generates random sets, calculating their intersection for specified samples. It uses numpy's default random generator with seed 42 to generate sets of size 32, writing the input, generated, and intersection sets in CSV format.", + "type": "summary" + }, + "298": { + "file_id": 10, + "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Robert Gerstenberger\nimport csv\nimport numpy as np\ndef scramble(array: np.ndarray, rng: np.random.Generator) -> None:\n \"\"\"\n Helper function to change the order of the elements in an array randomly.\n :param array: Array to be scrambled.\n :type: numpy.ndarray\n :param rng: Random number generator.\n :type rng: numpy.random.Generator\n \"\"\"\n size = array.shape[0]\n index_array = rng.integers(0, size, size)\n for i in range(size):\n temp = array[i]\n array[i] = array[index_array[i]]\n array[index_array[i]] = temp\nif __name__ == \"__main__\":\n \"\"\"\n Input(u) : Set size.\n Input(v) : Range of the integer numbers in the sets: 0..v (exclusive)\n Input(w) : Seed for the random number generator.\n Input(x) : Number of samples to be generated.\n Input(y) : Filename for the output CSV file.", + "type": "code", + "location": "/examples/set_intersection/dataset_gen_intersection.py:1-39" + }, "299": { "file_id": 10, - "content": "This code selects samples and approaches, allows budget setting, hardcodes input filename, creates directories for execution-specific files, and stores the Graph Reasoning State (GRS) for each sample in separate JSON files.", + "content": "The code snippet defines a function called \"scramble\" which shuffles the elements of an array randomly. It also contains main code block that specifies input parameters such as set size, range of integer numbers in sets, seed for random number generator, number of samples to be generated, and filename for output CSV file. The purpose is likely to generate a dataset by scrambling the order of elements within sets.", "type": "comment" } } \ No newline at end of file diff --git a/docs/data/3.json b/docs/data/3.json index 7881a04..c0eab65 100644 --- a/docs/data/3.json +++ b/docs/data/3.json @@ -1,542 +1,542 @@ { "300": { "file_id": 10, - "content": "## Plot Data\nChange the results directory in line 170 of `plot.py` and update the\nlength parameter in the subsequent line and run `python3 plot.py` to\nplot your data.", - "type": "code", - "location": "/examples/set_intersection/README.md:48-52" - }, - "301": { - "file_id": 10, - "content": "This code snippet instructs the user to modify the results directory in line 170 of `plot.py` and adjust the length parameter accordingly before executing `python3 plot.py` to visualize their data.", - "type": "comment" - }, - "302": { - "file_id": 11, - "content": "/examples/set_intersection/dataset_gen_intersection.py", - "type": "filepath" - }, - "303": { - "file_id": 11, - "content": "The code defines a function \"scramble\" that shuffles array elements and generates random sets, calculating their intersection for specified samples. It uses numpy's default random generator with seed 42 to generate sets of size 32, writing the input, generated, and intersection sets in CSV format.", - "type": "summary" - }, - "304": { - "file_id": 11, - "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Robert Gerstenberger\nimport csv\nimport numpy as np\ndef scramble(array: np.ndarray, rng: np.random.Generator) -> None:\n \"\"\"\n Helper function to change the order of the elements in an array randomly.\n :param array: Array to be scrambled.\n :type: numpy.ndarray\n :param rng: Random number generator.\n :type rng: numpy.random.Generator\n \"\"\"\n size = array.shape[0]\n index_array = rng.integers(0, size, size)\n for i in range(size):\n temp = array[i]\n array[i] = array[index_array[i]]\n array[index_array[i]] = temp\nif __name__ == \"__main__\":\n \"\"\"\n Input(u) : Set size.\n Input(v) : Range of the integer numbers in the sets: 0..v (exclusive)\n Input(w) : Seed for the random number generator.\n Input(x) : Number of samples to be generated.\n Input(y) : Filename for the output CSV file.", - "type": "code", - "location": "/examples/set_intersection/dataset_gen_intersection.py:1-39" - }, - "305": { - "file_id": 11, - "content": "The code snippet defines a function called \"scramble\" which shuffles the elements of an array randomly. It also contains main code block that specifies input parameters such as set size, range of integer numbers in sets, seed for random number generator, number of samples to be generated, and filename for output CSV file. The purpose is likely to generate a dataset by scrambling the order of elements within sets.", - "type": "comment" - }, - "306": { - "file_id": 11, "content": " Output(z) : Input sets and intersected set written a file in the CSV format.\n File contains the sample ID, input set 1, input set 2,\n intersection set.\n \"\"\"\n set_size = 32 # size of the generated sets\n int_value_ubound = 64 # (exclusive) upper limit of generated numbers\n seed = 42 # seed of the random number generator\n num_sample = 100 # number of samples\n filename = \"set_intersection_032.csv\" # output filename\n assert 2 * set_size <= int_value_ubound\n rng = np.random.default_rng(seed)\n intersection_sizes = rng.integers(set_size // 4, 3 * set_size // 4, num_sample)\n np.set_printoptions(\n linewidth=np.inf\n ) # no wrapping in the array fields in the output file\n with open(filename, \"w\") as f:\n fieldnames = [\"ID\", \"SET1\", \"SET2\", \"INTERSECTION\"]\n writer = csv.DictWriter(f, delimiter=\",\", fieldnames=fieldnames)\n writer.writeheader()\n for i in range(num_sample):\n intersection_size = intersection_sizes[i]", "type": "code", "location": "/examples/set_intersection/dataset_gen_intersection.py:40-67" }, - "307": { - "file_id": 11, + "301": { + "file_id": 10, "content": "Code generates random sets and calculates their intersection for a given number of samples. It uses numpy's default random generator, with seed 42, to generate sets of size 32. The intersected set sizes are also randomly determined (within certain bounds) for each sample. The code writes the input sets, generated sets, and intersection sets in CSV format.", "type": "comment" }, - "308": { - "file_id": 11, + "302": { + "file_id": 10, "content": " full_set = np.arange(0, int_value_ubound, dtype=np.int16)\n scramble(full_set, rng)\n intersection = full_set[:intersection_size].copy()\n sorted_intersection = np.sort(intersection)\n set1 = full_set[:set_size].copy()\n set2 = np.concatenate(\n [intersection, full_set[set_size : 2 * set_size - intersection_size]]\n )\n scramble(set1, rng)\n scramble(set2, rng)\n writer.writerow(\n {\n \"ID\": i,\n \"SET1\": set1.tolist(),\n \"SET2\": set2.tolist(),\n \"INTERSECTION\": sorted_intersection.tolist(),\n }\n )", "type": "code", "location": "/examples/set_intersection/dataset_gen_intersection.py:69-92" }, - "309": { - "file_id": 11, + "303": { + "file_id": 10, "content": "Code generates a full set of integers, scrambles it, takes an intersection of the set with a specified size, splits the full set into two sets, scramble each set, and writes a row to a CSV file containing ID, SET1, SET2, and sorted INTERSECTION.", "type": "comment" }, - "310": { - "file_id": 12, + "304": { + "file_id": 11, "content": "/examples/set_intersection/plot.py", "type": "filepath" }, - "311": { - "file_id": 12, + "305": { + "file_id": 11, "content": "The code collects and processes results from various AI methods, storing them in dictionaries for analysis or visualization. It generates boxplots to display the final scores of different methods with customizable y-axis settings and font size. The code also sets labels, plots a bar graph, adds annotations, adjustments, and text, saves as PDF, replaces characters in model names, and calls another function.", "type": "summary" }, - "312": { - "file_id": 12, + "306": { + "file_id": 11, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Robert Gerstenberger\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():", "type": "code", "location": "/examples/set_intersection/plot.py:1-29" }, - "313": { - "file_id": 12, + "307": { + "file_id": 11, "content": "This code retrieves complete results from a given base directory. It iterates through each folder in the directory, loads JSON files within each folder, and stores the key-value pairs as dictionaries within lists under each folder's name in a results dictionary. The code also checks if directories are not empty folders.", "type": "comment" }, - "314": { - "file_id": 12, + "308": { + "file_id": 11, "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]", "type": "code", "location": "/examples/set_intersection/plot.py:30-58" }, - "315": { - "file_id": 12, + "309": { + "file_id": 11, "content": "This code organizes and processes results from various AI methods, extracting scores, solved status, prompt/completion tokens, and cost for each method. It stores this information in a dictionary for further analysis or visualization.", "type": "comment" }, - "316": { - "file_id": 12, + "310": { + "file_id": 11, "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"got\"],\n model=\"GPT-3.5\",\n length=32,\n y_lower=0,\n cost_upper=0.0,\n display_solved=True,\n annotation_offset=0,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n scores_ordered = [\n [score for score in results[method][\"scores\"] if score != 1000]\n for method in methods_order", "type": "code", "location": "/examples/set_intersection/plot.py:59-94" }, - "317": { - "file_id": 12, + "311": { + "file_id": 11, "content": "The code retrieves final scores from complete results and organizes them into a dictionary for plotting. It then creates a new dictionary with scores, solved problems count, and costs for each method. This data is used to plot the results in a graph, considering options like method order, model, length, cost limits, and display settings.", "type": "comment" }, - "318": { - "file_id": 12, + "312": { + "file_id": 11, "content": " ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n methods_labels = [\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT\"]\n plt.yticks(fontsize=fig_fontsize)\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(methods_labels, fontsize=fig_fontsize)\n y_upper = length\n range_increase = 1\n if display_solved:\n if length < 48:\n range_increase = 2\n elif length < 96:\n range_increase = 4\n else:\n range_increase = 8\n ax.set_ylim(y_lower, y_upper + range_increase)\n ax1_yticks = range(\n y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)\n )\n ax.set_yticks(ax1_yticks)\n if display_left_ylabel:", "type": "code", "location": "/examples/set_intersection/plot.py:95-130" }, - "319": { - "file_id": 12, + "313": { + "file_id": 11, "content": "This code creates a boxplot to visualize the results of different methods. It sets the y-axis limits and ticks based on the length of the data, and customizes the font size for better readability. The code also handles the display of additional information (solved count) by adjusting the range of y-axis ticks accordingly.", "type": "comment" }, - "320": { - "file_id": 12, + "314": { + "file_id": 11, "content": " ax.set_ylabel(\n f\"#incorrect elements; the lower the better\", fontsize=fig_fontsize\n )\n ax.set_title(f\"{length} elements\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n if cost_upper > 0:\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]", "type": "code", "location": "/examples/set_intersection/plot.py:131-162" }, - "321": { - "file_id": 12, + "315": { + "file_id": 11, "content": "This code sets y-axis label, title, and twin axis for plotting. It then plots a bar graph using the twin axis, setting the y-axis limits and ticks based on specified conditions. Finally, it checks if certain conditions are met and adds annotations or adjusts the graph accordingly.", "type": "comment" }, - "322": { - "file_id": 12, + "316": { + "file_id": 11, "content": " ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"set_intersection_{model}_{length}.pdf\", bbox_inches=\"tight\")\nplot_results(\n get_plotting_data(\"results/\"),\n length=32,\n display_solved=True,\n model=\"GPT-3.5\",\n display_left_ylabel=True,\n display_right_ylabel=True,\n)", "type": "code", "location": "/examples/set_intersection/plot.py:163-184" }, - "323": { - "file_id": 12, + "317": { + "file_id": 11, "content": "This code is adding text annotations to a plot, incrementing a count variable, and saving the final plot as a PDF. It replaces certain characters in the model name and calls another function for more plotting results with specific parameters.", "type": "comment" }, - "324": { - "file_id": 13, + "318": { + "file_id": 12, "content": "/examples/set_intersection/utils.py", "type": "filepath" }, - "325": { - "file_id": 13, + "319": { + "file_id": 12, "content": "The code contains helper functions `string_to_list()` and `string_to_set()`, which convert a string-encoded list or set into Python integers. The `test_set_intersection` function compares the intersection of two sets with the sorted list from the input string, counting errors as a score, returning either total errors or 1000 for exceptions.", "type": "summary" }, - "326": { - "file_id": 13, + "320": { + "file_id": 12, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# The source code is adapted from the sorting source code written by\n# Nils Blach.\n#\n# main author: Robert Gerstenberger\nfrom typing import Dict, List, Set\ndef string_to_list(string: str) -> List[int]:\n \"\"\"\n Helper function to convert a list encoded inside a string into a Python\n list object of integer elements.\n :param string: Input string containing a list.\n :type string: str\n :return: List of integer elements.\n :rtype: List[int]\n :raise AssertionError: If input string does not contain a list.\n \"\"\"\n assert string[0] == \"[\" and string[-1] == \"]\", \"String is not a list.\"\n return [int(num) for num in string[1:-1].split(\",\")]\ndef string_to_set(string: str) -> Set[int]:\n \"\"\"\n Helper function to convert a list encoded inside a string into a Python\n set object of integer elements.\n :param string: Input string containing a list.", "type": "code", "location": "/examples/set_intersection/utils.py:1-36" }, - "327": { - "file_id": 13, + "321": { + "file_id": 12, "content": "This code defines two helper functions: `string_to_list()` and `string_to_set()`. These functions are used to convert a list encoded in a string into a Python list or set object of integer elements. The `string_to_list()` function converts the input string into an integer list, while the `string_to_set()` function converts it into a set of integers. The assertion is raised if the input string does not contain a list.", "type": "comment" }, - "328": { - "file_id": 13, + "322": { + "file_id": 12, "content": " :type string: str\n :return: Set of integer elements.\n :rtype: Set[int]\n :raise AssertionError: If input string does not contain a list.\n \"\"\"\n assert string[0] == \"[\" and string[-1] == \"]\", \"String is not a list.\"\n return {int(num) for num in string[1:-1].split(\",\")}\ndef test_set_intersection(state: Dict) -> bool:\n \"\"\"\n Function to test whether the final solution matches ground truth.\n :param state: Thought state that represents the final solution.\n :type state: Dict\n :return: Returns whether the solution matches the ground truth.\n :rtype: bool\n \"\"\"\n # convert string to list\n try:\n correct_list = string_to_list(state[\"result\"])\n sorted_list = sorted(string_to_list(state[\"current\"]))\n return sorted_list == correct_list\n except:\n return False\ndef num_errors(state: Dict) -> float:\n \"\"\"\n Function to locally count the number of errors that serves as a score.\n :param state: Thought state to be scored.\n :type state: Dict\n :return: Number of errors.", "type": "code", "location": "/examples/set_intersection/utils.py:37-72" }, - "329": { - "file_id": 13, + "323": { + "file_id": 12, "content": "Function `string_to_list` converts a string input into an integer set. Function `test_set_intersection` checks if the final solution matches the ground truth by converting the result and current states to lists, sorting them, and comparing. Finally, `num_errors` function calculates the number of errors in the given state as a score.", "type": "comment" }, - "330": { - "file_id": 13, + "324": { + "file_id": 12, "content": " :rtype: float\n \"\"\"\n try:\n set1 = string_to_set(state[\"set1\"])\n set2 = string_to_set(state[\"set2\"])\n if \"subset\" in state and state[\"subset\"] != \"\" and state[\"subset\"] is not None:\n set2 = string_to_set(state[\"subset\"])\n common = sorted(list(set1 & set2))\n llm_solution = sorted(string_to_list(state[\"current\"]))\n num_errors = 0\n common_idx = 0\n llm_idx = 0\n while common_idx < len(common) and llm_idx < len(llm_solution):\n if common[common_idx] == llm_solution[llm_idx]:\n common_idx += 1\n llm_idx += 1\n elif common[common_idx] < llm_solution[llm_idx]:\n common_idx += 1\n num_errors += 1\n elif common[common_idx] > llm_solution[llm_idx]:\n llm_idx += 1\n num_errors += 1\n num_errors += len(common) - common_idx + len(llm_solution) - llm_idx\n return num_errors\n except:\n return 1000", "type": "code", "location": "/examples/set_intersection/utils.py:73-99" }, - "331": { - "file_id": 13, + "325": { + "file_id": 12, "content": "This function takes in two sets and a string, calculates the intersection of the sets and compares it with the sorted list from the string. If there is a mismatch between the common elements and the sorted list, it counts the number of errors. Returns the total number of errors found or 1000 if an exception occurs.", "type": "comment" }, - "332": { - "file_id": 14, + "326": { + "file_id": 13, "content": "/examples/sorting/README.md", "type": "filepath" }, - "333": { - "file_id": 14, + "327": { + "file_id": 13, "content": "The code directory contains various sorting algorithm examples for numbers 0-9 with implementations for IO, CoT, ToT, and GoT. It includes data files, Python scripts to execute use cases, and organizes results by name, approaches, day, and time. The plot.py file visualizes the results after modification.", "type": "summary" }, - "334": { - "file_id": 14, + "328": { + "file_id": 13, "content": "# Sorting\nThe use case in this directory sorts the provided list of \nnumbers containing numbers from 0 to 9 (duplicates allowed). \nWe provide implementations of five different approaches for \n32, 64 and 128 elements:\n- IO\n- Chain-of-Thought (CoT)\n- Tree of Thought (ToT):\n - ToT: wider tree, meaning more branches per level\n - ToT2: tree with more levels, but fewer branches per level\n- Graph of Thoughts (GoT):\n - GoT: split into subarrays / sort / merge\n## Data\nWe provide input files with 100 precomputed samples for each list\nlength: `sorting_.csv`.\n## Execution\nThe files to execute the use case are called\n`sorting_.py`. In the main body, one can select the\nspecific samples to be run (variable sample) and the approaches\n(variable approaches). It is also possible to set a budget in dollars\n(variable budget).\nThe input filename for the samples is currently hardcoded to\n`sorting_.csv`, but can be updated in the function\n`run`.\nThe Python scripts will create the directory `result`, if it is not", "type": "code", "location": "/examples/sorting/README.md:1-31" }, - "335": { - "file_id": 14, + "329": { + "file_id": 13, "content": "This code directory contains examples of sorting algorithms for lists of numbers from 0 to 9. Implementations are provided for IO, Chain-of-Thought (CoT), Tree of Thought (ToT) with two variations, and Graph of Thoughts (GoT). Data includes input files with precomputed samples, and Python scripts execute the use case with options to select samples and approaches.", "type": "comment" }, - "336": { - "file_id": 14, + "330": { + "file_id": 13, "content": "already present. In the 'result' directory, another directory is created\nfor each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.\nInside each execution specific directory two files (`config.json`,\n`log.log`) and a separate directory for each selected approach are\ncreated. `config.json` contains the configuration of the run: input data,\nselected approaches, name of the LLM, and the budget. `log.log` contains\nthe prompts and responses of the LLM as well as additional debug data.\nThe approach directories contain a separate json file for every sample\nand the file contains the Graph Reasoning State (GRS) for that sample.\n## Plot Data\nChange the results directory in line 171 of `plot.py` and update the\nlength parameter in the subsequent line and run `python3 plot.py` to\nplot your data.", "type": "code", "location": "/examples/sorting/README.md:32-46" }, - "337": { - "file_id": 14, + "331": { + "file_id": 13, "content": "Code organizes results into separate directories for each run based on the name of LLM, list of approaches, day and start time. Inside these execution-specific directories, config.json contains the configuration, log.log has prompts & responses, and approach directories store GRS files for every sample. Plot data can be visualized by modifying the results directory in plot.py and running python3 plot.py.", "type": "comment" }, - "338": { - "file_id": 15, + "332": { + "file_id": 14, "content": "/examples/sorting/plot.py", "type": "filepath" }, - "339": { - "file_id": 15, + "333": { + "file_id": 14, "content": "The code reads and sorts JSON data, calculates scores for sorting algorithm performances, plots boxplots, customizes options, adjusts y-axis limits, adds annotations, saves as PDF, and calls function with GPT-3.5 parameters.", "type": "summary" }, - "340": { - "file_id": 15, + "334": { + "file_id": 14, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Robert Gerstenberger\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():", "type": "code", "location": "/examples/sorting/plot.py:1-29" }, - "341": { - "file_id": 15, + "335": { + "file_id": 14, "content": "This code reads a directory of JSON files, extracts their key and data, and stores them in a dictionary. It handles directories recursively and does not include non-JSON files or folders without .json files. This function may be used to collect and organize data from multiple sources.", "type": "comment" }, - "342": { - "file_id": 15, + "336": { + "file_id": 14, "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]", "type": "code", "location": "/examples/sorting/plot.py:30-58" }, - "343": { - "file_id": 15, + "337": { + "file_id": 14, "content": "Code sorts results by \"key\" and returns them in a new dictionary. The sorted results are then processed to calculate scores for each method, including score, solution status, prompt tokens, completion tokens, and cost.", "type": "comment" }, - "344": { - "file_id": 15, + "338": { + "file_id": 14, "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory):\n results_complete = get_complete_results(base_directory)\n scores = get_final_scores(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"got\"],\n model=\"GPT-3.5\",\n length=32,\n y_lower=0,\n cost_upper=0.0,\n display_solved=True,\n annotation_offset=0,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n scores_ordered = [\n [\n min(score, length)\n for score in results[method][\"scores\"]", "type": "code", "location": "/examples/sorting/plot.py:59-95" }, - "345": { - "file_id": 15, + "339": { + "file_id": 14, "content": "The code defines a function `get_plotting_data` that extracts and organizes data for plotting. It takes the base directory as input, retrieves complete results from it, then gets final scores. The final scores are organized into a dictionary called `results_plotting`, which contains scores, solved counts, and costs for each method. Another function, `plot_results`, is defined to handle the actual plotting of the data with customizable options. It extracts scores in the specified order, organizes them, and provides customizability such as display settings and annotations.", "type": "comment" }, - "346": { - "file_id": 15, + "340": { + "file_id": 14, "content": " if score != 100 and score != 300\n ]\n for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n method_labels = [\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT\"]\n plt.yticks(fontsize=fig_fontsize)\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticklabels(method_labels, fontsize=fig_fontsize)\n y_upper = length\n range_increase = 1\n if display_solved:\n if length < 48:\n range_increase = 2\n elif length < 96:\n range_increase = 4\n else:\n range_increase = 8\n ax.set_ylim(y_lower, y_upper + range_increase)\n ax1_yticks = range(\n y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)", "type": "code", "location": "/examples/sorting/plot.py:96-131" }, - "347": { - "file_id": 15, + "341": { + "file_id": 14, "content": "This code creates a boxplot to visualize the scores of different methods, sets the ticks and labels, adjusts the y-axis limits based on length, and defines the y-lower limit as y_lower.", "type": "comment" }, - "348": { - "file_id": 15, + "342": { + "file_id": 14, "content": " )\n ax.set_yticks(ax1_yticks)\n if display_left_ylabel:\n ax.set_ylabel(f\"#incorrectly sorted elements; the lower the better\")\n ax.set_title(f\"{length} elements\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n if cost_upper > 0:\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:\n ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue", "type": "code", "location": "/examples/sorting/plot.py:132-163" }, - "349": { - "file_id": 15, + "343": { + "file_id": 14, "content": "Setting the y-tick positions and labels for ax2, setting the y-label for ax2 if display_right_ylabel is True, setting the title of the plot to length elements, setting the lower limit of the y-axis for ax2 if cost_upper > 0, adjusting the y-ticks' values for ax2 based on the number of ticks and the upper cost limit, and finally adding annotations for solved methods.", "type": "comment" }, - "350": { - "file_id": 15, + "344": { + "file_id": 14, "content": " solved = results[method][\"solved\"]\n ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n fig.savefig(f\"sorting_{model}_{length}.pdf\", bbox_inches=\"tight\")\nplot_results(\n get_plotting_data(\"results/\"),\n length=32,\n display_solved=True,\n model=\"GPT-3.5\",\n display_left_ylabel=True,\n display_right_ylabel=True,\n)", "type": "code", "location": "/examples/sorting/plot.py:164-186" }, - "351": { - "file_id": 15, + "345": { + "file_id": 14, "content": "The code plots sorting algorithm performance data and displays the solved count for each method. It saves the plot as a PDF with the model name and length appended to its filename. The function is then called again with specific parameters, including GPT-3.5 as the model.", "type": "comment" }, - "352": { - "file_id": 16, + "346": { + "file_id": 15, "content": "/examples/sorting/utils.py", "type": "filepath" }, - "353": { - "file_id": 16, + "347": { + "file_id": 15, "content": "The code defines a function that converts string-encoded lists to Python integer lists and tests if the solution matches ground truth. A helper function checks sorted lists by comparing adjacent elements, returning error count as score; defaults to 300 in case of exception.", "type": "summary" }, - "354": { - "file_id": 16, + "348": { + "file_id": 15, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom typing import Dict, List\ndef string_to_list(string: str) -> List[int]:\n \"\"\"\n Helper function to convert a list encoded inside a string into a Python\n list object of string elements.\n :param string: Input string containing a list.\n :type string: str\n :return: List of string elements.\n :rtype: List[str]\n :raise AssertionError: If input string does not contain a list.\n \"\"\"\n assert string[0] == \"[\" and string[-1] == \"]\", \"String is not a list.\"\n return [int(num) for num in string[1:-1].split(\",\")]\ndef test_sorting(state: Dict) -> bool:\n \"\"\"\n Function to test whether the final solution matches ground truth.\n :param state: Thought state that represents the final solution.\n :type state: Dict\n :return: Returns whether the solution matches the ground truth.\n :rtype: bool", "type": "code", "location": "/examples/sorting/utils.py:1-35" }, - "355": { - "file_id": 16, + "349": { + "file_id": 15, "content": "This code defines a function to convert a list encoded inside a string into a Python list object of integer elements. It also contains a helper function that tests whether the final solution matches the ground truth, taking a thought state as input and returning a boolean result.", "type": "comment" }, - "356": { - "file_id": 16, + "350": { + "file_id": 15, "content": " \"\"\"\n try:\n correct_list = sorted(string_to_list(state[\"original\"]))\n sorted_list = string_to_list(state[\"current\"])\n return sorted_list == correct_list\n except:\n return False\ndef num_errors(state: Dict) -> float:\n \"\"\"\n Function to locally count the number of errors that serves as a score.\n :param state: Thought state to be scored.\n :type state: Dict\n :return: Number of errors.\n :rtype: float\n \"\"\"\n try:\n unsorted_list = state[\"original\"]\n if (\n \"unsorted_sublist\" in state\n and state[\"unsorted_sublist\"] != \"\"\n and state[\"unsorted_sublist\"] is not None\n and len(state[\"unsorted_sublist\"]) < len(unsorted_list) - 5\n ):\n unsorted_list = state[\"unsorted_sublist\"]\n correct_list = sorted(string_to_list(unsorted_list))\n current_list = string_to_list(state[\"current\"])\n num_errors = 0\n for i in range(10):\n num_errors += abs(\n sum([1 for num in current_list if num == i])", "type": "code", "location": "/examples/sorting/utils.py:36-70" }, - "357": { - "file_id": 16, + "351": { + "file_id": 15, "content": "Function to check if a given list is correctly sorted. If not, returns the number of errors as score.", "type": "comment" }, - "358": { - "file_id": 16, + "352": { + "file_id": 15, "content": " - sum([1 for num in correct_list if num == i])\n )\n num_errors += sum(\n [1 for num1, num2 in zip(current_list, current_list[1:]) if num1 > num2]\n )\n return num_errors\n except:\n return 300", "type": "code", "location": "/examples/sorting/utils.py:71-78" }, - "359": { - "file_id": 16, + "353": { + "file_id": 15, "content": "This code calculates the number of errors in a sorted list by comparing adjacent elements. It uses list comprehensions and built-in Python functions like zip() and sum(). If an exception occurs, it returns 300 as a default value for num_errors.", "type": "comment" }, - "360": { - "file_id": 17, + "354": { + "file_id": 16, "content": "/graph_of_thoughts/controller/README.md", "type": "filepath" }, - "361": { - "file_id": 17, + "355": { + "file_id": 16, "content": "The Controller class manages the execution of a graph of operations using an LLM and requires custom prompter, parser, GoO, and AbstractLanguageModel. The code initializes an instance with these parameters, runs the executor, and outputs the generated graph to file.", "type": "summary" }, - "362": { - "file_id": 17, + "356": { + "file_id": 16, "content": "# Controller\nThe Controller class is responsible for traversing the Graph of Operations (GoO), which is a static structure that is constructed once, before the execution starts.\nGoO prescribes the execution plan of thought operations and the Controller invokes their execution, generating the Graph Reasoning State (GRS). \nIn order for a GoO to be executed, an instance of Large Language Model (LLM) must be supplied to the controller (along with other required objects).\nPlease refer to the [Language Models](../language_models/README.md) section for more information about LLMs. \nThe following section describes how to instantiate the Controller to run a defined GoO. \n## Controller Instantiation\n- Requires custom `Prompter`, `Parser`, as well as instantiated `GraphOfOperations` and `AbstractLanguageModel` - creation of these is described separately.\n- Prepare initial state (thought) as dictionary - this can be used in the initial prompts by the operations.\n```\nlm = ...create\ngraph_of_operations = ...create", "type": "code", "location": "/graph_of_thoughts/controller/README.md:1-16" }, - "363": { - "file_id": 17, + "357": { + "file_id": 16, "content": "The Controller class manages the execution of the Graph of Operations (GoO) using a Large Language Model (LLM). It requires custom Prompter and Parser, along with instantiated GraphOfOperations and AbstractLanguageModel. The initial state is represented as a dictionary for prompts in operations.", "type": "comment" }, - "364": { - "file_id": 17, + "358": { + "file_id": 16, "content": "executor = controller.Controller(\n lm,\n graph_of_operations,\n ,\n ,\n ,\n)\nexecutor.run()\nexecutor.output_graph(\"path/to/output.json\")\n```\n- After the run the graph is written to an output file, which contains individual operations, their thoughts, information about scores and validity and total amount of used tokens / cost.", "type": "code", "location": "/graph_of_thoughts/controller/README.md:18-28" }, - "365": { - "file_id": 17, + "359": { + "file_id": 16, "content": "The code initializes an instance of the Controller class with necessary parameters, including a language model (lm), graph of operations, custom prompter and parser, and an initial state. It then runs the executor and writes the generated graph containing individual operations, thoughts, scores, validity, and token usage to an output file at the specified path.", "type": "comment" }, - "366": { - "file_id": 18, + "360": { + "file_id": 17, "content": "/graph_of_thoughts/controller/__init__.py", "type": "filepath" }, - "367": { - "file_id": 18, + "361": { + "file_id": 17, "content": "This line is importing the \"Controller\" class from the \"controller\" module, which is located in the same package directory. This likely means that this code is part of a larger application where different modules handle different aspects of the program's functionality, and the \"Controller\" class manages some specific part or feature of the app.", "type": "summary" }, - "368": { - "file_id": 18, + "362": { + "file_id": 17, "content": "from .controller import Controller", "type": "code", "location": "/graph_of_thoughts/controller/__init__.py:1-1" }, - "369": { - "file_id": 18, + "363": { + "file_id": 17, "content": "This line is importing the \"Controller\" class from the \"controller\" module, which is located in the same package directory. This likely means that this code is part of a larger application where different modules handle different aspects of the program's functionality, and the \"Controller\" class manages some specific part or feature of the app.", "type": "comment" }, - "370": { - "file_id": 19, + "364": { + "file_id": 18, "content": "/graph_of_thoughts/controller/controller.py", "type": "filepath" }, - "371": { - "file_id": 19, + "365": { + "file_id": 18, "content": "The code manages the execution flow of a graph's operations using language models and classes for processing, serialization, and debugging, resulting in an organized list written to a JSON file.", "type": "summary" }, - "372": { - "file_id": 19, + "366": { + "file_id": 18, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport json\nimport logging\nfrom typing import List\nfrom graph_of_thoughts.language_models import AbstractLanguageModel\nfrom graph_of_thoughts.operations import GraphOfOperations, Thought\nfrom graph_of_thoughts.prompter import Prompter\nfrom graph_of_thoughts.parser import Parser\nclass Controller:\n \"\"\"\n Controller class to manage the execution flow of the Graph of Operations,\n generating the Graph Reasoning State.\n This involves language models, graph operations, prompting, and parsing.\n \"\"\"\n def __init__(\n self,\n lm: AbstractLanguageModel,\n graph: GraphOfOperations,\n prompter: Prompter,\n parser: Parser,\n problem_parameters: dict,\n ) -> None:\n \"\"\"\n Initialize the Controller instance with the language model,\n operations graph, prompter, parser, and problem parameters.", "type": "code", "location": "/graph_of_thoughts/controller/controller.py:1-35" }, - "373": { - "file_id": 19, + "367": { + "file_id": 18, "content": "This code defines a Controller class to manage the execution flow of the Graph of Operations, utilizing language models, graph operations, prompting, and parsing. The Controller is initialized with an AbstractLanguageModel, GraphOfOperations, Prompter, Parser, and problem parameters.", "type": "comment" }, - "374": { - "file_id": 19, + "368": { + "file_id": 18, "content": " :param lm: An instance of the AbstractLanguageModel.\n :type lm: AbstractLanguageModel\n :param graph: The Graph of Operations to be executed.\n :type graph: OperationsGraph\n :param prompter: An instance of the Prompter class, used to generate prompts.\n :type prompter: Prompter\n :param parser: An instance of the Parser class, used to parse responses.\n :type parser: Parser\n :param problem_parameters: Initial parameters/state of the problem.\n :type problem_parameters: dict\n \"\"\"\n self.logger = logging.getLogger(self.__class__.__module__)\n self.lm = lm\n self.graph = graph\n self.prompter = prompter\n self.parser = parser\n self.problem_parameters = problem_parameters\n self.run_executed = False\n def run(self) -> None:\n \"\"\"\n Run the controller and execute the operations from the Graph of\n Operations based on their readiness.\n Ensures the program is in a valid state before execution.", "type": "code", "location": "/graph_of_thoughts/controller/controller.py:37-60" }, - "375": { - "file_id": 19, + "369": { + "file_id": 18, "content": "This function initializes a controller object with provided language model, graph of operations, prompter, parser, and problem parameters. It also sets the run_executed flag to False. The run method executes the operations from the Graph of Operations based on their readiness, ensuring the program is in a valid state before execution.", "type": "comment" }, - "376": { - "file_id": 19, + "370": { + "file_id": 18, "content": " :raises AssertionError: If the Graph of Operation has no roots.\n :raises AssertionError: If the successor of an operation is not in the Graph of Operations.\n \"\"\"\n self.logger.debug(\"Checking that the program is in a valid state\")\n assert self.graph.roots is not None, \"The operations graph has no root\"\n self.logger.debug(\"The program is in a valid state\")\n execution_queue = [\n operation\n for operation in self.graph.operations\n if operation.can_be_executed()\n ]\n while len(execution_queue) > 0:\n current_operation = execution_queue.pop(0)\n self.logger.info(\"Executing operation %s\", current_operation.operation_type)\n current_operation.execute(\n self.lm, self.prompter, self.parser, **self.problem_parameters\n )\n self.logger.info(\"Operation %s executed\", current_operation.operation_type)\n for operation in current_operation.successors:\n assert (", "type": "code", "location": "/graph_of_thoughts/controller/controller.py:61-82" }, - "377": { - "file_id": 19, + "371": { + "file_id": 18, "content": "This code snippet is checking the validity of the program state and executing operations in a queue. It raises AssertionError if the Graph of Operations has no roots or if a successor operation is not found in the graph. The code logs debug messages for state checks, information messages for executed operations, and asserts to ensure proper execution order.", "type": "comment" }, - "378": { - "file_id": 19, + "372": { + "file_id": 18, "content": " operation in self.graph.operations\n ), \"The successor of an operation is not in the operations graph\"\n if operation.can_be_executed():\n execution_queue.append(operation)\n self.logger.info(\"All operations executed\")\n self.run_executed = True\n def get_final_thoughts(self) -> List[List[Thought]]:\n \"\"\"\n Retrieve the final thoughts after all operations have been executed.\n :return: List of thoughts for each operation in the graph's leaves.\n :rtype: List[List[Thought]]\n :raises AssertionError: If the `run` method hasn't been executed yet.\n \"\"\"\n assert self.run_executed, \"The run method has not been executed\"\n return [operation.get_thoughts() for operation in self.graph.leaves]\n def output_graph(self, path: str) -> None:\n \"\"\"\n Serialize the state and results of the operations graph to a JSON file.\n :param path: The path to the output file.\n :type path: str", "type": "code", "location": "/graph_of_thoughts/controller/controller.py:83-106" }, - "379": { - "file_id": 19, + "373": { + "file_id": 18, "content": "Code snippet defines a class with methods to execute operations in a graph, retrieve final thoughts after execution, and serialize the graph state and results. The `run` method executes operations in the graph, checks if operation is in graph's operations, appends executable operations to an execution queue, logs information when all operations are executed, and sets `run_executed` flag to True. `get_final_thoughts` method retrieves final thoughts after execution of all operations by iterating through graph's leaves and getting thoughts from each operation. It raises AssertionError if the run method has not been executed yet. `output_graph` method serializes state and results of operations graph to a JSON file at specified path.", "type": "comment" }, - "380": { - "file_id": 19, + "374": { + "file_id": 18, "content": " \"\"\"\n output = []\n for operation in self.graph.operations:\n operation_serialized = {\n \"operation\": operation.operation_type.name,\n \"thoughts\": [thought.state for thought in operation.get_thoughts()],\n }\n if any([thought.scored for thought in operation.get_thoughts()]):\n operation_serialized[\"scored\"] = [\n thought.scored for thought in operation.get_thoughts()\n ]\n operation_serialized[\"scores\"] = [\n thought.score for thought in operation.get_thoughts()\n ]\n if any([thought.validated for thought in operation.get_thoughts()]):\n operation_serialized[\"validated\"] = [\n thought.validated for thought in operation.get_thoughts()\n ]\n operation_serialized[\"validity\"] = [\n thought.valid for thought in operation.get_thoughts()\n ]\n if any(", "type": "code", "location": "/graph_of_thoughts/controller/controller.py:107-128" }, - "381": { - "file_id": 19, + "375": { + "file_id": 18, "content": "This code iterates through the operations in a graph, serializes each operation with its thoughts, and adds extra information if any thoughts have been scored, validated, or are invalid. This is used for generating an output list of serialized operations and associated data.", "type": "comment" }, - "382": { - "file_id": 19, + "376": { + "file_id": 18, "content": " [\n thought.compared_to_ground_truth\n for thought in operation.get_thoughts()\n ]\n ):\n operation_serialized[\"compared_to_ground_truth\"] = [\n thought.compared_to_ground_truth\n for thought in operation.get_thoughts()\n ]\n operation_serialized[\"problem_solved\"] = [\n thought.solved for thought in operation.get_thoughts()\n ]\n output.append(operation_serialized)\n output.append(\n {\n \"prompt_tokens\": self.lm.prompt_tokens,\n \"completion_tokens\": self.lm.completion_tokens,\n \"cost\": self.lm.cost,\n }\n )\n with open(path, \"w\") as file:\n file.write(json.dumps(output, indent=2))", "type": "code", "location": "/graph_of_thoughts/controller/controller.py:129-152" }, - "383": { - "file_id": 19, + "377": { + "file_id": 18, "content": "This code iterates over the thoughts in each operation, compares them to ground truth, and determines if they were solved. The data is serialized and appended to a list, which is then written to a JSON file along with prompt, completion tokens, and cost information.", "type": "comment" }, - "384": { - "file_id": 20, + "378": { + "file_id": 19, "content": "/graph_of_thoughts/language_models/README.md", "type": "filepath" }, - "385": { - "file_id": 20, + "379": { + "file_id": 19, "content": "The Language Models module supports GPT-4/GPT-3.5 and Llama-2, with functionality for instantiating LLMs, adding new ones, and using OpenAI API features like pricing and response_token_cost. It is implemented in a base class for building language models that allows for querying and retrieving response texts.", "type": "summary" }, - "386": { - "file_id": 20, + "380": { + "file_id": 19, "content": "# Language Models\nThe Language Models module is responsible for managing the large language models (LLMs) used by the Controller.\nCurrently, the framework supports the following LLMs:\n- GPT-4 / GPT-3.5 (Remote - OpenAI API)\n- Llama-2 (Local - HuggingFace Transformers) \nThe following sections describe how to instantiate individual LLMs and how to add new LLMs to the framework.\n## LLM Instantiation\n- Create a copy of `config_template.json` named `config.json`.\n- Fill configuration details based on the used model (below).\n### GPT-4 / GPT-3.5\n- Adjust predefined `chatgpt`, `chatgpt4` or create new configuration with an unique key.\n| Key | Value |", "type": "code", "location": "/graph_of_thoughts/language_models/README.md:1-18" }, - "387": { - "file_id": 20, + "381": { + "file_id": 19, "content": "This code introduces the Language Models module and explains its purpose. It currently supports GPT-4/GPT-3.5 (Remote - OpenAI API) and Llama-2 (Local - HuggingFace Transformers). The following sections describe how to instantiate individual LLMs and add new ones to the framework. The LLM instantiation process involves creating a copy of `config_template.json`, filling in configuration details based on the used model, and adjusting predefined configurations or creating a new one with an unique key for GPT-4/GPT-3.5.", "type": "comment" }, - "388": { - "file_id": 20, + "382": { + "file_id": 19, "content": "|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| model_id | Model name based on [OpenAI model overview](https://platform.openai.com/docs/models/overview). |\n| prompt_token_cost | Price per 1000 prompt tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance. ", "type": "code", "location": "/graph_of_thoughts/language_models/README.md:19-21" }, - "389": { - "file_id": 20, + "383": { + "file_id": 19, "content": "This table maps model IDs to their respective OpenAI names and calculates prompt token costs based on OpenAI pricing, which is used for determining cumulative prices per language modeling (LLM) instance.", "type": "comment" }, - "390": { - "file_id": 20, + "384": { + "file_id": 19, "content": " |\n| response_token_cost | Price per 1000 response tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance. |\n| temperature | Parameter of OpenAI models that controls randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 2.0, default is 1.0. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature). |\n| max_tokens | The maximum number of tokens to generate in the chat completion. Value ", "type": "code", "location": "/graph_of_thoughts/language_models/README.md:21-24" }, - "391": { - "file_id": 20, + "385": { + "file_id": 19, "content": "The code defines 'response_token_cost', a variable representing the price per 1000 response tokens, which follows OpenAI's pricing. It also includes 'temperature', a parameter controlling randomness and creativity in responses. The value is between 0.0 and 2.0, defaulting to 1.0, with further details available in the OpenAI API reference. Lastly, 'max_tokens' sets the maximum number of tokens generated in chat completions.", "type": "comment" }, - "392": { - "file_id": 20, + "386": { + "file_id": 19, "content": "depends on the maximum context size of the model specified in the [OpenAI model overview](https://platform.openai.com/docs/models/overview). More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-max_tokens). |\n| stop | String or array of strings specifying sequence of characters which if detected, stops further generation of tokens. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-stop). |\n| organization | Organization to use for the API requests (may be empty). |", "type": "code", "location": "/graph_of_thoughts/language_models/README.md:24-26" }, - "393": { - "file_id": 20, + "387": { + "file_id": 19, "content": "This code defines three input parameters for the OpenAI API's chat creation endpoint: \"model\", \"stop\", and \"organization\". The model parameter specifies the language model to use, with its maximum context size determined by the OpenAI model overview. The stop parameter identifies a sequence of characters that halt further token generation, referencing the OpenAI API reference for more information. Lastly, organization is an optional field used for API requests, which can be left empty.", "type": "comment" }, - "394": { - "file_id": 20, + "388": { + "file_id": 19, "content": "| api_key | Personal API key that will be used to access OpenAI API. |\n- Instantiate the language model based on the selected configuration key (predefined / custom).\n```\nlm = controller.ChatGPT(\n \"path/to/config.json\", \n model_name=\n)\n```\n### Llama-2\n- Requires local hardware to run inference and a HuggingFace account.\n- Adjust predefined `llama7b-hf`, `llama13b-hf`, `llama70b-hf` or create a new configuration with an unique key.\n| Key | Value |\n|---------------------|----------------", "type": "code", "location": "/graph_of_thoughts/language_models/README.md:27-42" }, - "395": { - "file_id": 20, + "389": { + "file_id": 19, "content": "The code snippet is initializing a language model controller using the ChatGPT class. It takes in the path to a configuration file and a model name corresponding to the selected configuration key. The model can be predefined (llama7b-hf, llama13b-hf, llama70b-hf) or custom with a unique key.", "type": "comment" }, - "396": { - "file_id": 20, + "390": { + "file_id": 19, "content": "-----------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| model_id | Specifies HuggingFace Llama 2 model identifier (`meta-llama/`). |\n| cache_dir | Local directory where model will be downloaded and accessed. |\n| prompt_token_cost | Price per 1000 prompt tokens (currently not used - local model = no cost). |\n| response_token_cost | Price per 1000 response tokens (currently not used - local model = no cost). |\n| temperature | Parameter ", "type": "code", "location": "/graph_of_thoughts/language_models/README.md:42-47" }, - "397": { - "file_id": 20, + "391": { + "file_id": 19, "content": "This code block is defining the parameters for a language model, including the Llama 2 model identifier (`model_id`), the local directory where the model will be stored and accessed (`cache_dir`), the price per 1000 prompt tokens (`prompt_token_cost`), the price per 1000 response tokens (`response_token_cost`), and a parameter for temperature control. Note that currently, these costs are not used due to the local model being cost-free.", "type": "comment" }, - "398": { - "file_id": 20, + "392": { + "file_id": 19, "content": "that controls randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 1.0, default is 0.6. |\n| top_k | Top-K sampling method described in [Transformers tutorial](https://huggingface.co/blog/how-to-generate). Default value is set to 10. |\n| max_tokens | The maximum number of tokens to generate in the chat completion. More tokens require more memory. |\n- Instantiate the language model based on the selected configuration key (predefined / custom).\n```\nlm = controller.Llama2HF(\n \"path/to/config.json\", \n model_name=\n)\n```\n- Request access to Llama-2 via the [Meta form](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) using the same email address as for the HuggingFace account.\n- After the access is granted, go to [HuggingFace Llama-2 model ca", "type": "code", "location": "/graph_of_thoughts/language_models/README.md:47-59" }, - "399": { - "file_id": 20, + "393": { + "file_id": 19, "content": "The code initializes a language model (Llama2HF) with a specified configuration key, which determines the randomness and creativity of responses. It also sets top-K sampling method from Transformers tutorial and maximum tokens to generate in chat completion. Access to Llama-2 is requested via Meta form using the same email as HuggingFace account, then access HuggingFace Llama-2 model page.", "type": "comment" + }, + "394": { + "file_id": 19, + "content": "rd](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), log in and accept the license (_\"You have been granted access to this model\"_ message should appear).\n- Generate HuggingFace access token.\n- Log in from CLI with: `huggingface-cli login --token `.\nNote: 4-bit quantization is used to reduce the model size for inference. During instantiation, the model is downloaded from HuggingFace into the cache directory specified in the `config.json`. Running queries using larger models will require multiple GPUs (splitting across many GPUs is done automatically by the Transformers library).\n## Adding LLMs\nMore LLMs can be added by following these steps:\n- Create new class as a subclass of `AbstractLanguageModel`.\n- Use the constructor for loading configuration and instantiating the language model (if needed). \n```\nclass CustomLanguageModel(AbstractLanguageModel):\n def __init__(\n self,\n config_path: str = \"\",\n model_name: str = \"llama7b-hf\",\n cache: bool = False\n ) -> None:", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:59-76" + }, + "395": { + "file_id": 19, + "content": "This code provides instructions for adding a new LLM (Language Language Model) to the existing model. To do so, create a subclass of `AbstractLanguageModel` and use the constructor to load configuration and instantiate the language model if needed. The model is downloaded from HuggingFace into the cache directory specified in the config.json. Running queries with larger models may require multiple GPUs, which will be automatically split by the Transformers library.", + "type": "comment" + }, + "396": { + "file_id": 19, + "content": " super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # Load data from configuration into variables if needed\n # Instantiate LLM if needed\n```\n- Implement `query` abstract method that is used to get a list of responses from the LLM (call to remote API or local model inference).\n```\ndef query(self, query: str, num_responses: int = 1) -> Any:\n # Support caching \n # Call LLM and retrieve list of responses - based on num_responses \n # Return LLM response structure (not only raw strings) \n```\n- Implement `get_response_texts` abstract method that is used to get a list of raw texts from the LLM response structure produced by `query`.\n```\ndef get_response_texts(self, query_response: Union[List[Dict], Dict]) -> List[str]:\n # Retrieve list of raw strings from the LLM response structure \n```", + "type": "code", + "location": "/graph_of_thoughts/language_models/README.md:77-95" + }, + "397": { + "file_id": 19, + "content": "The code is a part of a class that serves as a base for building language models. It loads configuration and initializes the model. The `query` method calls the LLM to get responses based on a query, while `get_response_texts` retrieves raw texts from the response structure produced by `query`. These methods are abstract and need to be implemented in child classes.", + "type": "comment" + }, + "398": { + "file_id": 20, + "content": "/graph_of_thoughts/language_models/__init__.py", + "type": "filepath" + }, + "399": { + "file_id": 20, + "content": "This code imports the necessary classes (AbstractLanguageModel, ChatGPT, and Llama2HF) from their respective submodules in the language_models package.", + "type": "summary" } } \ No newline at end of file diff --git a/docs/data/4.json b/docs/data/4.json index ff1ad66..2942203 100644 --- a/docs/data/4.json +++ b/docs/data/4.json @@ -1,543 +1,544 @@ { "400": { "file_id": 20, - "content": "rd](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), log in and accept the license (_\"You have been granted access to this model\"_ message should appear).\n- Generate HuggingFace access token.\n- Log in from CLI with: `huggingface-cli login --token `.\nNote: 4-bit quantization is used to reduce the model size for inference. During instantiation, the model is downloaded from HuggingFace into the cache directory specified in the `config.json`. Running queries using larger models will require multiple GPUs (splitting across many GPUs is done automatically by the Transformers library).\n## Adding LLMs\nMore LLMs can be added by following these steps:\n- Create new class as a subclass of `AbstractLanguageModel`.\n- Use the constructor for loading configuration and instantiating the language model (if needed). \n```\nclass CustomLanguageModel(AbstractLanguageModel):\n def __init__(\n self,\n config_path: str = \"\",\n model_name: str = \"llama7b-hf\",\n cache: bool = False\n ) -> None:", + "content": "from .abstract_language_model import AbstractLanguageModel\nfrom .chatgpt import ChatGPT\nfrom .llamachat_hf import Llama2HF", "type": "code", - "location": "/graph_of_thoughts/language_models/README.md:59-76" + "location": "/graph_of_thoughts/language_models/__init__.py:1-3" }, "401": { "file_id": 20, - "content": "This code provides instructions for adding a new LLM (Language Language Model) to the existing model. To do so, create a subclass of `AbstractLanguageModel` and use the constructor to load configuration and instantiate the language model if needed. The model is downloaded from HuggingFace into the cache directory specified in the config.json. Running queries with larger models may require multiple GPUs, which will be automatically split by the Transformers library.", + "content": "This code imports the necessary classes (AbstractLanguageModel, ChatGPT, and Llama2HF) from their respective submodules in the language_models package.", "type": "comment" }, "402": { - "file_id": 20, - "content": " super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # Load data from configuration into variables if needed\n # Instantiate LLM if needed\n```\n- Implement `query` abstract method that is used to get a list of responses from the LLM (call to remote API or local model inference).\n```\ndef query(self, query: str, num_responses: int = 1) -> Any:\n # Support caching \n # Call LLM and retrieve list of responses - based on num_responses \n # Return LLM response structure (not only raw strings) \n```\n- Implement `get_response_texts` abstract method that is used to get a list of raw texts from the LLM response structure produced by `query`.\n```\ndef get_response_texts(self, query_response: Union[List[Dict], Dict]) -> List[str]:\n # Retrieve list of raw strings from the LLM response structure \n```", - "type": "code", - "location": "/graph_of_thoughts/language_models/README.md:77-95" - }, - "403": { - "file_id": 20, - "content": "The code is a part of a class that serves as a base for building language models. It loads configuration and initializes the model. The `query` method calls the LLM to get responses based on a query, while `get_response_texts` retrieves raw texts from the response structure produced by `query`. These methods are abstract and need to be implemented in child classes.", - "type": "comment" - }, - "404": { - "file_id": 21, - "content": "/graph_of_thoughts/language_models/__init__.py", - "type": "filepath" - }, - "405": { - "file_id": 21, - "content": "This code imports the necessary classes (AbstractLanguageModel, ChatGPT, and Llama2HF) from their respective submodules in the language_models package.", - "type": "summary" - }, - "406": { - "file_id": 21, - "content": "from .abstract_language_model import AbstractLanguageModel\nfrom .chatgpt import ChatGPT\nfrom .llamachat_hf import Llama2HF", - "type": "code", - "location": "/graph_of_thoughts/language_models/__init__.py:1-3" - }, - "407": { "file_id": 21, - "content": "This code imports the necessary classes (AbstractLanguageModel, ChatGPT, and Llama2HF) from their respective submodules in the language_models package.", - "type": "comment" - }, - "408": { - "file_id": 22, "content": "/graph_of_thoughts/language_models/abstract_language_model.py", "type": "filepath" }, - "409": { - "file_id": 22, + "403": { + "file_id": 21, "content": "This code defines an AbstractLanguageModel class with config file path, model name, and caching options for language models. It also includes two abstract methods: 'query' and 'get_response_texts', serving as placeholders for derived classes to implement their own functionality.", "type": "summary" }, - "410": { - "file_id": 22, + "404": { + "file_id": 21, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom abc import ABC, abstractmethod\nfrom typing import List, Dict, Union, Any\nimport json\nimport os\nimport logging\nclass AbstractLanguageModel(ABC):\n \"\"\"\n Abstract base class that defines the interface for all language models.\n \"\"\"\n def __init__(\n self, config_path: str = \"\", model_name: str = \"\", cache: bool = False\n ) -> None:\n \"\"\"\n Initialize the AbstractLanguageModel instance with configuration, model details, and caching options.\n :param config_path: Path to the config file. Defaults to \"\".\n :type config_path: str\n :param model_name: Name of the language model. Defaults to \"\".\n :type model_name: str\n :param cache: Flag to determine whether to cache responses. Defaults to False.\n :type cache: bool\n \"\"\"\n self.logger = logging.getLogger(self.__class__.__name__)", "type": "code", "location": "/graph_of_thoughts/language_models/abstract_language_model.py:1-34" }, - "411": { - "file_id": 22, + "405": { + "file_id": 21, "content": "This code snippet defines an abstract base class, AbstractLanguageModel, for language models with config file path, model name, and caching options in the initializer. It also initializes a logger for logging purposes.", "type": "comment" }, - "412": { - "file_id": 22, + "406": { + "file_id": 21, "content": " self.config: Dict = None\n self.model_name: str = model_name\n self.cache = cache\n if self.cache:\n self.respone_cache: Dict[str, List[Any]] = {}\n self.load_config(config_path)\n self.prompt_tokens: int = 0\n self.completion_tokens: int = 0\n self.cost: float = 0.0\n def load_config(self, path: str) -> None:\n \"\"\"\n Load configuration from a specified path.\n :param path: Path to the config file. If an empty path provided,\n default is `config.json` in the current directory.\n :type path: str\n \"\"\"\n if path == \"\":\n current_dir = os.path.dirname(os.path.abspath(__file__))\n path = os.path.join(current_dir, \"config.json\")\n with open(path, \"r\") as f:\n self.config = json.load(f)\n self.logger.debug(f\"Loaded config from {path} for {self.model_name}\")\n def clear_cache(self) -> None:\n \"\"\"\n Clear the response cache.\n \"\"\"\n self.respone_cache.clear()", "type": "code", "location": "/graph_of_thoughts/language_models/abstract_language_model.py:35-66" }, - "413": { - "file_id": 22, + "407": { + "file_id": 21, "content": "This code initializes an abstract language model object with optional cache and loads its configuration from a specified file. It also provides methods to clear the response cache.", "type": "comment" }, - "414": { - "file_id": 22, + "408": { + "file_id": 21, "content": " @abstractmethod\n def query(self, query: str, num_responses: int = 1) -> Any:\n \"\"\"\n Abstract method to query the language model.\n :param query: The query to be posed to the language model.\n :type query: str\n :param num_responses: The number of desired responses.\n :type num_responses: int\n :return: The language model's response(s).\n :rtype: Any\n \"\"\"\n pass\n @abstractmethod\n def get_response_texts(self, query_responses: Union[List[Any], Any]) -> List[str]:\n \"\"\"\n Abstract method to extract response texts from the language model's response(s).\n :param query_responses: The responses returned from the language model.\n :type query_responses: Union[List[Any], Any]\n :return: List of textual responses.\n :rtype: List[str]\n \"\"\"\n pass", "type": "code", "location": "/graph_of_thoughts/language_models/abstract_language_model.py:68-92" }, - "415": { - "file_id": 22, + "409": { + "file_id": 21, "content": "This code defines two abstract methods for a language model. The 'query' method takes a query and the desired number of responses, but doesn't specify what it should do with them. The 'get_response_texts' method expects response(s) from the language model, but doesn't clarify how to extract textual data. It serves as a placeholder for derived classes to implement their own functionality.", "type": "comment" }, - "416": { - "file_id": 23, + "410": { + "file_id": 22, "content": "/graph_of_thoughts/language_models/chatgpt.py", "type": "filepath" }, - "417": { - "file_id": 23, + "411": { + "file_id": 22, "content": "The code creates a ChatGPT class that inherits from AbstractLanguageModel, initializes with configuration and model details, sets query parameters, supports multiple responses, uses OpenAI's chat API, incorporates backoff and caching for optimization, logs response texts and costs, and utilizes `get_response_texts` to extract response strings.", "type": "summary" }, - "418": { - "file_id": 23, + "412": { + "file_id": 22, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nimport backoff\nimport os\nimport random\nimport time\nfrom typing import List, Dict, Union\nfrom openai import OpenAI, OpenAIError\nfrom openai.types.chat.chat_completion import ChatCompletion\nfrom .abstract_language_model import AbstractLanguageModel\nclass ChatGPT(AbstractLanguageModel):\n \"\"\"\n The ChatGPT class handles interactions with the OpenAI models using the provided configuration.\n Inherits from the AbstractLanguageModel and implements its abstract methods.\n \"\"\"\n def __init__(\n self, config_path: str = \"\", model_name: str = \"chatgpt\", cache: bool = False\n ) -> None:\n \"\"\"\n Initialize the ChatGPT instance with configuration, model details, and caching options.\n :param config_path: Path to the configuration file. Defaults to \"\".\n :type config_path: str\n ", "type": "code", "location": "/graph_of_thoughts/language_models/chatgpt.py:1-35" }, - "419": { - "file_id": 23, + "413": { + "file_id": 22, "content": "This code is the initialization of a class called ChatGPT. It inherits from AbstractLanguageModel and initializes with configuration, model details, and caching options. The config_path parameter is for the path to a configuration file and defaults to an empty string. The model_name parameter specifies the model to be used, defaulting to \"chatgpt\", and cache can be set to True or False for enabling or disabling caching respectively.", "type": "comment" }, - "420": { - "file_id": 23, + "414": { + "file_id": 22, "content": " :param model_name: Name of the model, default is 'chatgpt'. Used to select the correct configuration.\n :type model_name: str\n :param cache: Flag to determine whether to cache responses. Defaults to False.\n :type cache: bool\n \"\"\"\n super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # The model_id is the id of the model that is used for chatgpt, i.e. gpt-4, gpt-3.5-turbo, etc.\n self.model_id: str = self.config[\"model_id\"]\n # The prompt_token_cost and response_token_cost are the costs for 1000 prompt tokens and 1000 response tokens respectively.\n self.prompt_token_cost: float = self.config[\"prompt_token_cost\"]\n self.response_token_cost: float = self.config[\"response_token_cost\"]\n # The temperature of a model is defined as the randomness of the model's output.\n self.temperature: float = self.config[\"temperature\"]\n # The maximum number of tokens to generate in the chat completion.", "type": "code", "location": "/graph_of_thoughts/language_models/chatgpt.py:35-49" }, - "421": { - "file_id": 23, + "415": { + "file_id": 22, "content": "The code initializes a model with a specified name and sets the cache flag. It retrieves the model ID, prompt token cost, response token cost, temperature, and maximum number of tokens for chat completion from the configuration file.", "type": "comment" }, - "422": { - "file_id": 23, + "416": { + "file_id": 22, "content": " self.max_tokens: int = self.config[\"max_tokens\"]\n # The stop sequence is a sequence of tokens that the model will stop generating at (it will not generate the stop sequence).\n self.stop: Union[str, List[str]] = self.config[\"stop\"]\n # The account organization is the organization that is used for chatgpt.\n self.organization: str = self.config[\"organization\"]\n if self.organization == \"\":\n self.logger.warning(\"OPENAI_ORGANIZATION is not set\")\n self.api_key: str = os.getenv(\"OPENAI_API_KEY\", self.config[\"api_key\"])\n if self.api_key == \"\":\n raise ValueError(\"OPENAI_API_KEY is not set\")\n # Initialize the OpenAI Client\n self.client = OpenAI(api_key=self.api_key, organization=self.organization)\n def query(\n self, query: str, num_responses: int = 1\n ) -> Union[List[ChatCompletion], ChatCompletion]:\n \"\"\"\n Query the OpenAI model for responses.\n :param query: The query to be posed to the language model.", "type": "code", "location": "/graph_of_thoughts/language_models/chatgpt.py:50-69" }, - "423": { - "file_id": 23, + "417": { + "file_id": 22, "content": "This code initializes an instance of a language model and sets parameters such as maximum tokens, stop sequence, organization, API key, and initializes the OpenAI client. It also includes a query method to ask the language model for responses.", "type": "comment" }, - "424": { - "file_id": 23, + "418": { + "file_id": 22, "content": " :type query: str\n :param num_responses: Number of desired responses, default is 1.\n :type num_responses: int\n :return: Response(s) from the OpenAI model.\n :rtype: Dict\n \"\"\"\n if self.cache and query in self.respone_cache:\n return self.respone_cache[query]\n if num_responses == 1:\n response = self.chat([{\"role\": \"user\", \"content\": query}], num_responses)\n else:\n response = []\n next_try = num_responses\n total_num_attempts = num_responses\n while num_responses > 0 and total_num_attempts > 0:\n try:\n assert next_try > 0\n res = self.chat([{\"role\": \"user\", \"content\": query}], next_try)\n response.append(res)\n num_responses -= next_try\n next_try = min(num_responses, next_try)\n except Exception as e:\n next_try = (next_try + 1) // 2\n self.logger.warning(", "type": "code", "location": "/graph_of_thoughts/language_models/chatgpt.py:70-94" }, - "425": { - "file_id": 23, + "419": { + "file_id": 22, "content": "The code defines a function that takes a query and the number of desired responses. If the query is in the cache, it returns the corresponding response(s). If not, it calls the OpenAI chat model to generate responses for the given query. It supports generating multiple responses by repeatedly calling the OpenAI model until the required number of responses are obtained or an exception occurs. The function also logs any warnings during the process.", "type": "comment" }, - "426": { - "file_id": 23, + "420": { + "file_id": 22, "content": " f\"Error in chatgpt: {e}, trying again with {next_try} samples\"\n )\n time.sleep(random.randint(1, 3))\n total_num_attempts -= 1\n if self.cache:\n self.respone_cache[query] = response\n return response\n @backoff.on_exception(backoff.expo, OpenAIError, max_time=10, max_tries=6)\n def chat(self, messages: List[Dict], num_responses: int = 1) -> ChatCompletion:\n \"\"\"\n Send chat messages to the OpenAI model and retrieves the model's response.\n Implements backoff on OpenAI error.\n :param messages: A list of message dictionaries for the chat.\n :type messages: List[Dict]\n :param num_responses: Number of desired responses, default is 1.\n :type num_responses: int\n :return: The OpenAI model's response.\n :rtype: ChatCompletion\n \"\"\"\n response = self.client.chat.completions.create(\n model=self.model_id,\n messages=messages,", "type": "code", "location": "/graph_of_thoughts/language_models/chatgpt.py:95-119" }, - "427": { - "file_id": 23, + "421": { + "file_id": 22, "content": "This code is defining a class with a chat method that sends messages to the OpenAI model and retrieves the response. The method implements backoff on OpenAI error, allowing for multiple attempts if an error occurs. It also includes caching functionality to improve performance by storing previous responses in a cache.", "type": "comment" }, - "428": { - "file_id": 23, + "422": { + "file_id": 22, "content": " temperature=self.temperature,\n max_tokens=self.max_tokens,\n n=num_responses,\n stop=self.stop,\n )\n self.prompt_tokens += response.usage.prompt_tokens\n self.completion_tokens += response.usage.completion_tokens\n prompt_tokens_k = float(self.prompt_tokens) / 1000.0\n completion_tokens_k = float(self.completion_tokens) / 1000.0\n self.cost = (\n self.prompt_token_cost * prompt_tokens_k\n + self.response_token_cost * completion_tokens_k\n )\n self.logger.info(\n f\"This is the response from chatgpt: {response}\"\n f\"\\nThis is the cost of the response: {self.cost}\"\n )\n return response\n def get_response_texts(\n self, query_response: Union[List[ChatCompletion], ChatCompletion]\n ) -> List[str]:\n \"\"\"\n Extract the response texts from the query response.\n :param query_response: The response dictionary (or list of dictionaries) from the OpenAI model.", "type": "code", "location": "/graph_of_thoughts/language_models/chatgpt.py:120-146" }, - "429": { - "file_id": 23, + "423": { + "file_id": 22, "content": "This code interacts with an OpenAI model, specifically the ChatGPT API. It takes a query as input and generates multiple responses using the API. The code keeps track of usage costs in terms of prompt and completion tokens, and logs the response text along with the cost for each generated response. The `get_response_texts` method extracts the response texts from the query response dictionary or list of dictionaries returned by the OpenAI model.", "type": "comment" }, - "430": { - "file_id": 23, + "424": { + "file_id": 22, "content": " :type query_response: Union[List[ChatCompletion], ChatCompletion]\n :return: List of response strings.\n :rtype: List[str]\n \"\"\"\n if not isinstance(query_response, List):\n query_response = [query_response]\n return [\n choice.message.content\n for response in query_response\n for choice in response.choices\n ]", "type": "code", "location": "/graph_of_thoughts/language_models/chatgpt.py:147-157" }, - "431": { - "file_id": 23, + "425": { + "file_id": 22, "content": "This function converts a single ChatCompletion or list of them into a list of response strings by iterating over the choices within each completion and extracting their content.", "type": "comment" }, - "432": { - "file_id": 24, + "426": { + "file_id": 23, "content": "/graph_of_thoughts/language_models/config_template.json", "type": "filepath" }, - "433": { - "file_id": 24, + "427": { + "file_id": 23, "content": "The code provides a generic language model configuration template, including parameters for model ID, prompt and response token costs, temperature, max tokens, stop words, cache directory (\"/llama\"), and optional values (top-k=10). This is a user-specific config without API key or organization.", "type": "summary" }, - "434": { - "file_id": 24, + "428": { + "file_id": 23, "content": "{\n \"chatgpt\" : {\n \"model_id\": \"gpt-3.5-turbo\",\n \"prompt_token_cost\": 0.0015,\n \"response_token_cost\": 0.002,\n \"temperature\": 1.0,\n \"max_tokens\": 1536,\n \"stop\": null,\n \"organization\": \"\",\n \"api_key\": \"\"\n },\n \"chatgpt4\" : {\n \"model_id\": \"gpt-4\",\n \"prompt_token_cost\": 0.03,\n \"response_token_cost\": 0.06,\n \"temperature\": 1.0,\n \"max_tokens\": 4096,\n \"stop\": null,\n \"organization\": \"\",\n \"api_key\": \"\"\n },\n \"llama7b-hf\" : {\n \"model_id\": \"Llama-2-7b-chat-hf\",\n \"cache_dir\": \"/llama\",\n \"prompt_token_cost\": 0.0,\n \"response_token_cost\": 0.0,\n \"temperature\": 0.6,\n \"top_k\": 10,\n \"max_tokens\": 4096\n },\n \"llama13b-hf\" : {\n \"model_id\": \"Llama-2-13b-chat-hf\",\n \"cache_dir\": \"/llama\",\n \"prompt_token_cost\": 0.0,\n \"response_token_cost\": 0.0,\n \"temperature\": 0.6,\n \"top_k\": 10,\n \"max_tokens\": 4096\n },\n \"llama70b-hf\" : {\n \"model_id\": \"Llama-2-70b-chat-hf\",", "type": "code", "location": "/graph_of_thoughts/language_models/config_template.json:1-41" }, - "435": { - "file_id": 24, + "429": { + "file_id": 23, "content": "This code appears to be a configuration template for language models, with each model (such as \"chatgpt\", \"chatgpt4\", \"llama7b-hf\", etc.) defined by its own set of parameters including the model ID, prompt and response token costs, temperature, max tokens, and optional stop words. The \"cache_dir\" parameter is specific to Llama models, suggesting these models require local caching. The absence of an API key and organization suggests that this is a generic template for user-specific configurations.", "type": "comment" }, - "436": { - "file_id": 24, + "430": { + "file_id": 23, "content": " \"cache_dir\": \"/llama\",\n \"prompt_token_cost\": 0.0,\n \"response_token_cost\": 0.0,\n \"temperature\": 0.6,\n \"top_k\": 10,\n \"max_tokens\": 4096\n }\n}", "type": "code", "location": "/graph_of_thoughts/language_models/config_template.json:42-49" }, - "437": { - "file_id": 24, + "431": { + "file_id": 23, "content": "This code snippet contains a configuration template for a language model. It sets the cache directory path as \"/llama\", prompts and response tokens costs to 0, temperature at 0.6, top-k value as 10, and maximum generated token count as 4096.", "type": "comment" }, - "438": { - "file_id": 25, + "432": { + "file_id": 24, "content": "/graph_of_thoughts/language_models/llamachat_hf.py", "type": "filepath" }, - "439": { - "file_id": 25, + "433": { + "file_id": 24, "content": "The code initializes the LLaMA 2 model for text generation, sets up configurations and tokenizer, creates a pipeline, defines a method to generate responses by querying the model, formats responses into dictionaries, and extracts \"generated_text\" from multiple query response dictionaries.", "type": "summary" }, - "440": { - "file_id": 25, + "434": { + "file_id": 24, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Ales Kubicek\nimport os\nimport torch\nfrom typing import List, Dict, Union\nfrom .abstract_language_model import AbstractLanguageModel\nclass Llama2HF(AbstractLanguageModel):\n \"\"\"\n An interface to use LLaMA 2 models through the HuggingFace library.\n \"\"\"\n def __init__(\n self, config_path: str = \"\", model_name: str = \"llama7b-hf\", cache: bool = False\n ) -> None:\n \"\"\"\n Initialize an instance of the Llama2HF class with configuration, model details, and caching options.\n :param config_path: Path to the configuration file. Defaults to an empty string.\n :type config_path: str\n :param model_name: Specifies the name of the LLaMA model variant. Defaults to \"llama7b-hf\".\n Used to select the correct configuration.\n :type model_name: str\n :param cache: Flag to determine whether to cache responses. Defaults to False.", "type": "code", "location": "/graph_of_thoughts/language_models/llamachat_hf.py:1-31" }, - "441": { - "file_id": 25, + "435": { + "file_id": 24, "content": "The code imports necessary libraries, defines a class Llama2HF as an interface for using LLaMA 2 models through HuggingFace library, and initializes the class with configuration, model name, and caching options.", "type": "comment" }, - "442": { - "file_id": 25, + "436": { + "file_id": 24, "content": " :type cache: bool\n \"\"\"\n super().__init__(config_path, model_name, cache)\n self.config: Dict = self.config[model_name]\n # Detailed id of the used model.\n self.model_id: str = self.config[\"model_id\"]\n # Costs for 1000 tokens.\n self.prompt_token_cost: float = self.config[\"prompt_token_cost\"]\n self.response_token_cost: float = self.config[\"response_token_cost\"]\n # The temperature is defined as the randomness of the model's output.\n self.temperature: float = self.config[\"temperature\"]\n # Top K sampling.\n self.top_k: int = self.config[\"top_k\"]\n # The maximum number of tokens to generate in the chat completion.\n self.max_tokens: int = self.config[\"max_tokens\"]\n # Important: must be done before importing transformers\n os.environ[\"TRANSFORMERS_CACHE\"] = self.config[\"cache_dir\"]\n import transformers\n hf_model_id = f\"meta-llama/{self.model_id}\"\n model_config = transformers.AutoConfig.from_pretrained(hf_model_id)", "type": "code", "location": "/graph_of_thoughts/language_models/llamachat_hf.py:32-53" }, - "443": { - "file_id": 25, + "437": { + "file_id": 24, "content": "The code initializes a class and sets various attributes such as model_id, prompt and response token costs, temperature, top K sampling, and maximum tokens. It also sets the Transformers library cache environment variable before importing it to avoid conflicts with other caches.", "type": "comment" }, - "444": { - "file_id": 25, + "438": { + "file_id": 24, "content": " bnb_config = transformers.BitsAndBytesConfig(\n load_in_4bit=True,\n bnb_4bit_quant_type=\"nf4\",\n bnb_4bit_use_double_quant=True,\n bnb_4bit_compute_dtype=torch.bfloat16,\n )\n self.tokenizer = transformers.AutoTokenizer.from_pretrained(hf_model_id)\n self.model = transformers.AutoModelForCausalLM.from_pretrained(\n hf_model_id,\n trust_remote_code=True,\n config=model_config,\n quantization_config=bnb_config,\n device_map=\"auto\",\n )\n self.model.eval()\n torch.no_grad()\n self.generate_text = transformers.pipeline(\n model=self.model, tokenizer=self.tokenizer, task=\"text-generation\"\n )\n def query(self, query: str, num_responses: int = 1) -> List[Dict]:\n \"\"\"\n Query the LLaMA 2 model for responses.\n :param query: The query to be posed to the language model.\n :type query: str\n :param num_responses: Number of desired responses, default is 1.", "type": "code", "location": "/graph_of_thoughts/language_models/llamachat_hf.py:54-82" }, - "445": { - "file_id": 25, + "439": { + "file_id": 24, "content": "The code initializes an LLaMA model for text generation, loads the tokenizer and model configurations, and creates a text generation pipeline. It also provides a function to query the model with a given input query and can generate multiple responses depending on the provided number of desired responses.", "type": "comment" }, - "446": { - "file_id": 25, + "440": { + "file_id": 24, "content": " :type num_responses: int\n :return: Response(s) from the LLaMA 2 model.\n :rtype: List[Dict]\n \"\"\"\n if self.cache and query in self.respone_cache:\n return self.respone_cache[query]\n sequences = []\n query = f\"<>You are a helpful assistant. Always follow the intstructions precisely and output the response exactly in the requested format.<>\\n\\n[INST] {query} [/INST]\"\n for _ in range(num_responses):\n sequences.extend(\n self.generate_text(\n query,\n do_sample=True,\n top_k=self.top_k,\n num_return_sequences=1,\n eos_token_id=self.tokenizer.eos_token_id,\n max_length=self.max_tokens,\n )\n )\n response = [\n {\"generated_text\": sequence[\"generated_text\"][len(query) :].strip()}\n for sequence in sequences\n ]\n if self.cache:\n self.respone_cache[query] = response", "type": "code", "location": "/graph_of_thoughts/language_models/llamachat_hf.py:83-107" }, - "447": { - "file_id": 25, + "441": { + "file_id": 24, "content": "This code defines a method that generates responses from the LLaMA 2 language model. It first checks if the response is cached, then creates a query with system instructions and input. It generates multiple responses using the `generate_text` function, stores them in a list, and formats them into a response dictionary. Finally, it caches the response if necessary.", "type": "comment" }, - "448": { - "file_id": 25, + "442": { + "file_id": 24, "content": " return response\n def get_response_texts(self, query_responses: List[Dict]) -> List[str]:\n \"\"\"\n Extract the response texts from the query response.\n :param query_responses: The response list of dictionaries generated from the `query` method.\n :type query_responses: List[Dict]\n :return: List of response strings.\n :rtype: List[str]\n \"\"\"\n return [query_response[\"generated_text\"] for query_response in query_responses]", "type": "code", "location": "/graph_of_thoughts/language_models/llamachat_hf.py:108-119" }, - "449": { - "file_id": 25, + "443": { + "file_id": 24, "content": "This function takes a list of query response dictionaries, extracts the \"generated_text\" key from each dictionary and returns a list of those extracted texts.", "type": "comment" }, - "450": { - "file_id": 26, + "444": { + "file_id": 25, "content": "/graph_of_thoughts/operations/README.md", "type": "filepath" }, - "451": { - "file_id": 26, + "445": { + "file_id": 25, "content": "The Operations module manages thought manipulation with language models and helper classes, including 'ValidateAndImprove' and 'Generate' operations, as well as three additional operations: **KeepValid**, **Selector**, and **GroundTruth** for thought processing systems.", "type": "summary" }, - "452": { - "file_id": 26, + "446": { + "file_id": 25, "content": "# Operations\nThe Operations module contains operations to manipulate and process thoughts represented by the [Thought](thought.py) class. \nOperations interface with a language model and use other helper classes like [Prompter](../prompter/prompter.py) and [Parser](../parser/parser.py) for effective communication and extraction of results from the language model. \nThe [Graph of Operations](graph_of_operations.py) class is the main class of the module and is responsible for orchestrating the operations, defining their relationships and maintaining the state of the thought graph, also known as Graph Reasoning State.\n## Graph of Operations\nThe [GraphOfOperations](graph_of_operations.py) class facilitates the creation and management of a directed graph representing the sequence and interrelationships of operations on thoughts. Here’s how you can construct and work with the Graph of Operations:\n### Initialization\nCreating a new instance of GraphOfOperations:\n```python\nfrom graph_of_thoughts.operations import GraphOfOperations", "type": "code", "location": "/graph_of_thoughts/operations/README.md:1-14" }, - "453": { - "file_id": 26, + "447": { + "file_id": 25, "content": "This code snippet describes the Operations module, which contains operations for manipulating and processing thoughts represented by the Thought class. It uses a language model and helper classes like Prompter and Parser for communication and result extraction. The Graph of Operations is the main class that orchestrates operations and maintains thought graph state.", "type": "comment" }, - "454": { - "file_id": 26, + "448": { + "file_id": 25, "content": "graph = GraphOfOperations()\n```\nUpon initialization, the graph will be empty with no operations, roots, or leaves.\n### Adding Operations\n**Append Operation:** You can append operations to the end of the graph using the append_operation method. This ensures that the operation becomes a successor to all current leaf operations in the graph.\n```python\nfrom graph_of_thoughts.operations import Generate\noperationA = Generate()\ngraph.append_operation(operationA)\n```\n**Add Operation with Relationships:** If you want to define specific relationships for an operation, use the add_operation method.\n```python\noperationB = Generate()\noperationB.predecessors.append(operationA)\ngraph.add_operation(operationB)\n```\nRemember to set up the predecessors (and optionally successors) for your operation before adding it to the graph.\n## Available Operations\nThe following operations are available in the module:\n**Score:** Collect all thoughts from preceding operations and score them either using the LLM or a custom scoring function.", "type": "code", "location": "/graph_of_thoughts/operations/README.md:16-40" }, - "455": { - "file_id": 26, + "449": { + "file_id": 25, "content": "The code initializes a GraphOfOperations object, which starts empty and can be used to add operations with relationships. Operations can be appended at the end or added while specifying their predecessors. Available operations include the Score operation for scoring thoughts using LLM or custom scoring functions.", "type": "comment" }, - "456": { - "file_id": 26, + "450": { + "file_id": 25, "content": "- num_samples (Optional): The number of samples to use for scoring, defaults to 1.\n- combined_scoring (Optional): Whether to score all thoughts together in a single prompt or separately, defaults to False.\n- scoring_function (Optional): A function that takes in a list of thought states and returns a list of scores for each thought.\n**ValidateAndImprove:** For each thought, validate it and if it is invalid, improve it. \n- num_samples (Optional): The number of samples to use for validation, defaults to 1.\n- improve (Optional): Whether to improve the thought if it is invalid, defaults to True.\n- num_tries (Optional): The number of times to try improving the thought, before giving up, defaults to 3.\n- validate_function (Optional): A function that takes in a thought state and returns a boolean indicating whether the thought is valid.\n**Generate:** Generate new thoughts from the current thoughts. If no previous thoughts are available, the thoughts are initialized with the input to the [Controller](../controller/controller.py). ", "type": "code", "location": "/graph_of_thoughts/operations/README.md:41-51" }, - "457": { - "file_id": 26, + "451": { + "file_id": 25, "content": "This code describes several operations for a thought processing system. The 'ValidateAndImprove' operation validates each thought and attempts to improve it if invalid, while the 'Generate' operation generates new thoughts based on previous ones or initial input to the Controller. Optional parameters include number of samples, scoring function, validation function, and whether to improve or generate new thoughts.", "type": "comment" }, - "458": { - "file_id": 26, + "452": { + "file_id": 25, "content": "- num_branches_prompt (Optional): Number of responses that each prompt should generate (passed to prompter). Defaults to 1.\n- num_branches_response (Optional): Number of responses the LLM should generate for each prompt. Defaults to 1.\n**Improve:** Improve the current thoughts. This operation is similar to the ValidateAndImprove operation, but it does not validate the thoughts and always tries to improve them. \n**Aggregate:** Aggregate the current thoughts into a single thought. This operation is useful when you want to combine multiple thoughts into a single thought. \n- num_responses (Optional): Number of responses to request from the LLM (generates multiple new thoughts). Defaults to 1.\n**KeepBestN:** Keep the best N thoughts from the preceding thoughts. Assumes that the thoughts are already scored and throws an error if they are not.\n- n: The number of thoughts to keep in order of score.\n- higher_is_better (Optional): Whether higher scores are better (True) or lower scores are better (False). Defaults to True.", "type": "code", "location": "/graph_of_thoughts/operations/README.md:52-62" }, - "459": { - "file_id": 26, + "453": { + "file_id": 25, "content": "This code snippet provides details about the available operations and their respective parameters for generating, aggregating, or filtering thoughts. It allows users to generate multiple responses, combine them into a single thought, or keep the best N thoughts based on scores. The code also includes default values for optional parameters to ease usage.", "type": "comment" }, - "460": { - "file_id": 26, + "454": { + "file_id": 25, "content": "**KeepValid:** Keep only the valid thoughts from the preceding thoughts. Assumes that each thought has already been validated, if not, it will be considered valid.\n**Selector:** Select a number of thoughts from the preceding thoughts using a selection function. This is useful if subsequent operations should only be applied to a subset of the preceding thoughts.\n- selector: A function that takes in a list of thoughts and returns a list of thoughts to select.\n**GroundTruth**: Evaluates if the preceding/current thoughts solve the problem and equal the ground truth. This operation is useful for terminating the graph and checking if the final thoughts solve the problem, but is only useful if the ground truth is known.\n- ground_truth_evaluator: A function that takes in a thought state and returns a boolean indicating whether the thought solves the problem.", "type": "code", "location": "/graph_of_thoughts/operations/README.md:64-70" }, - "461": { - "file_id": 26, + "455": { + "file_id": 25, "content": "This code defines three operations: **KeepValid** retains valid thoughts, **Selector** selects a subset of thoughts using a selection function, and **GroundTruth** checks if the preceding/current thoughts solve the problem (requires known ground truth).", "type": "comment" }, - "462": { - "file_id": 27, + "456": { + "file_id": 26, "content": "/graph_of_thoughts/operations/__init__.py", "type": "filepath" }, - "463": { - "file_id": 27, + "457": { + "file_id": 26, "content": "This code imports classes from different modules within the \"graph-of-thoughts\" package to be used in operations. It includes classes for Thought, GraphOfOperations, Operation, Score, ValidateAndImprove, Generate, Aggregate, KeepBestN, KeepValid, Selector, GroundTruth, and Improve.", "type": "summary" }, - "464": { - "file_id": 27, + "458": { + "file_id": 26, "content": "from .thought import Thought\nfrom .graph_of_operations import GraphOfOperations\nfrom .operations import (\n Operation,\n Score,\n ValidateAndImprove,\n Generate,\n Aggregate,\n KeepBestN,\n KeepValid,\n Selector,\n GroundTruth,\n Improve,\n)", "type": "code", "location": "/graph_of_thoughts/operations/__init__.py:1-14" }, - "465": { - "file_id": 27, + "459": { + "file_id": 26, "content": "This code imports classes from different modules within the \"graph-of-thoughts\" package to be used in operations. It includes classes for Thought, GraphOfOperations, Operation, Score, ValidateAndImprove, Generate, Aggregate, KeepBestN, KeepValid, Selector, GroundTruth, and Improve.", "type": "comment" }, - "466": { - "file_id": 28, + "460": { + "file_id": 27, "content": "/graph_of_thoughts/operations/graph_of_operations.py", "type": "filepath" }, - "467": { - "file_id": 28, + "461": { + "file_id": 27, "content": "The Graph of Operations class manages operation execution plans, initializing with empty lists and providing a method to append operations. It iterates through predecessors, removing leaves and appending operations without successors.", "type": "summary" }, - "468": { - "file_id": 28, + "462": { + "file_id": 27, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom __future__ import annotations\nfrom typing import List\nfrom graph_of_thoughts.operations.operations import Operation\nclass GraphOfOperations:\n \"\"\"\n Represents the Graph of Operations, which prescribes the execution plan of thought operations.\n \"\"\"\n def __init__(self) -> None:\n \"\"\"\n Initializes a new Graph of Operations instance with empty operations, roots, and leaves.\n The roots are the entry points in the graph with no predecessors.\n The leaves are the exit points in the graph with no successors.\n \"\"\"\n self.operations: List[Operation] = []\n self.roots: List[Operation] = []\n self.leaves: List[Operation] = []\n def append_operation(self, operation: Operation) -> None:\n \"\"\"\n Appends an operation to all leaves in the graph and updates the relationships.", "type": "code", "location": "/graph_of_thoughts/operations/graph_of_operations.py:1-32" }, - "469": { - "file_id": 28, + "463": { + "file_id": 27, "content": "This code represents the Graph of Operations class, which is responsible for managing the execution plan of thought operations. It initializes with empty lists for operations, roots, and leaves, and provides a method to append an operation to all leaves in the graph while updating relationships.", "type": "comment" }, - "470": { - "file_id": 28, + "464": { + "file_id": 27, "content": " :param operation: The operation to append.\n :type operation: Operation\n \"\"\"\n self.operations.append(operation)\n if len(self.roots) == 0:\n self.roots = [operation]\n else:\n for leave in self.leaves:\n leave.add_successor(operation)\n self.leaves = [operation]\n def add_operation(self, operation: Operation) -> None:\n \"\"\"\n Add an operation to the graph considering its predecessors and successors.\n Adjust roots and leaves based on the added operation's position within the graph.\n :param operation: The operation to add.\n :type operation: Operation\n \"\"\"\n self.operations.append(operation)\n if len(self.roots) == 0:\n self.roots = [operation]\n self.leaves = [operation]\n assert (\n len(operation.predecessors) == 0\n ), \"First operation should have no predecessors\"\n else:\n if len(operation.predecessors) == 0:\n self.roots.append(operation)", "type": "code", "location": "/graph_of_thoughts/operations/graph_of_operations.py:34-64" }, - "471": { - "file_id": 28, + "465": { + "file_id": 27, "content": "This code appends an operation to the graph and adjusts roots and leaves accordingly. If there are no roots, it sets the added operation as both root and leaf with no predecessors. If the added operation has no predecessors, it adds it as a new root.", "type": "comment" }, - "472": { - "file_id": 28, + "466": { + "file_id": 27, "content": " for predecessor in operation.predecessors:\n if predecessor in self.leaves:\n self.leaves.remove(predecessor)\n if len(operation.successors) == 0:\n self.leaves.append(operation)", "type": "code", "location": "/graph_of_thoughts/operations/graph_of_operations.py:65-69" }, - "473": { - "file_id": 28, + "467": { + "file_id": 27, "content": "Iterates through predecessors of an operation, removes leaves if they are also operation's predecessors, appends the operation to the leaves list if it has no successors.", "type": "comment" }, - "474": { - "file_id": 29, + "468": { + "file_id": 28, "content": "/graph_of_thoughts/operations/operations.py", "type": "filepath" }, - "475": { - "file_id": 29, + "469": { + "file_id": 28, "content": "The comments describe operations that preserve valid thoughts from predecessors, with Comment A introducing an abstract base class for Graph of Thoughts operations and Comment B focusing on the GroundTruth operation in a code context.", "type": "summary" }, - "476": { - "file_id": 29, + "470": { + "file_id": 28, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom __future__ import annotations\nimport logging\nfrom enum import Enum\nfrom typing import List, Iterator, Dict, Callable, Union\nfrom abc import ABC, abstractmethod\nimport itertools\nfrom graph_of_thoughts.operations.thought import Thought\nfrom graph_of_thoughts.language_models import AbstractLanguageModel\nfrom graph_of_thoughts.prompter import Prompter\nfrom graph_of_thoughts.parser import Parser\nclass OperationType(Enum):\n \"\"\"\n Enum to represent different operation types that can be used as unique identifiers.\n \"\"\"\n score: int = 0\n validate_and_improve: int = 1\n generate: int = 2\n improve: int = 3\n aggregate: int = 4\n keep_best_n: int = 5\n keep_valid: int = 6\n ground_truth_evaluator: int = 7\n selector: int = 8\nclass Operation(ABC):\n \"\"\"\n Abstract base class that defines the interface for all operations.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:1-40" }, - "477": { - "file_id": 29, + "471": { + "file_id": 28, "content": "This code defines an abstract base class for operations in the Graph of Thoughts system. It includes an OperationType Enum representing unique operation identifiers and outlines the interface for all operations. This base class will be used to create concrete implementations of different types of operations within the system.", "type": "comment" }, - "478": { - "file_id": 29, + "472": { + "file_id": 28, "content": " \"\"\"\n _ids: Iterator[int] = itertools.count(0)\n operation_type: OperationType = None\n def __init__(self) -> None:\n \"\"\"\n Initializes a new Operation instance with a unique id, and empty predecessors and successors.\n \"\"\"\n self.logger: logging.Logger = logging.getLogger(self.__class__.__name__)\n self.id: int = next(Operation._ids)\n self.predecessors: List[Operation] = []\n self.successors: List[Operation] = []\n self.executed: bool = False\n def can_be_executed(self) -> bool:\n \"\"\"\n Checks if the operation can be executed based on its predecessors.\n :return: True if all predecessors have been executed, False otherwise.\n :rtype: bool\n \"\"\"\n return all(predecessor.executed for predecessor in self.predecessors)\n def get_previous_thoughts(self) -> List[Thought]:\n \"\"\"\n Iterates over all predecessors and aggregates their thoughts.\n :return: A list of all thoughts from the predecessors.\n :rtype: List[Thought]", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:41-71" }, - "479": { - "file_id": 29, + "473": { + "file_id": 28, "content": "Initializes a new Operation instance with a unique ID and empty predecessors and successors. The operation can be executed if all its predecessors have been executed. Aggregates thoughts from predecessors to return all thoughts from them.", "type": "comment" }, - "480": { - "file_id": 29, + "474": { + "file_id": 28, "content": " \"\"\"\n previous_thoughts: List[Thought] = [\n thought\n for predecessor in self.predecessors\n for thought in predecessor.get_thoughts()\n ]\n return previous_thoughts\n def add_predecessor(self, operation: Operation) -> None:\n \"\"\"\n Add a preceding operation and update the relationships.\n :param operation: The operation to be set as a predecessor.\n :type operation: Operation\n \"\"\"\n self.predecessors.append(operation)\n operation.successors.append(self)\n def add_successor(self, operation: Operation) -> None:\n \"\"\"\n Add a succeeding operation and update the relationships.\n :param operation: The operation to be set as a successor.\n :type operation: Operation\n \"\"\"\n self.successors.append(operation)\n operation.predecessors.append(self)\n def execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Execute the operation, assuring that all predecessors have been executed.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:72-105" }, - "481": { - "file_id": 29, + "475": { + "file_id": 28, "content": "This code defines an Operation class with methods to add predecessors and successors, ensuring proper relationships are updated. The execute method executes the operation after all predecessors have been executed.", "type": "comment" }, - "482": { - "file_id": 29, + "476": { + "file_id": 28, "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If not all predecessors have been executed.\n \"\"\"\n assert self.can_be_executed(), \"Not all predecessors have been executed\"\n self.logger.info(\n \"Executing operation %d of type %s\", self.id, self.operation_type\n )\n self._execute(lm, prompter, parser, **kwargs)\n self.logger.debug(\"Operation %d executed\", self.id)\n self.executed = True\n @abstractmethod\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Abstract method for the actual execution of the operation.\n This should be implemented in derived classes.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:107-130" }, - "483": { - "file_id": 29, + "477": { + "file_id": 28, "content": "The code defines a class with an abstract method for executing operations, requiring a language model (AbstractLanguageModel), prompter (Prompter), and parser (Parser). The class checks if all predecessors have been executed before execution, logs information during execution, marks itself as executed upon completion.", "type": "comment" }, - "484": { - "file_id": 29, + "478": { + "file_id": 28, "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n \"\"\"\n pass\n @abstractmethod\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Abstract method to retrieve the thoughts associated with the operation.\n This should be implemented in derived classes.\n :return: List of associated thoughts.\n :rtype: List[Thought]\n \"\"\"\n pass\nclass Score(Operation):\n \"\"\"\n Operation to score thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.score\n def __init__(\n self,\n num_samples: int = 1,\n combined_scoring: bool = False,\n scoring_function: Callable[\n [Union[List[Dict], Dict]], Union[List[float], float]\n ] = None,\n ) -> None:", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:132-168" }, - "485": { - "file_id": 29, + "479": { + "file_id": 28, "content": "This code defines an abstract class \"Operation\" with a method to get associated thoughts and a concrete class \"Score\" that inherits from it. The Score class takes parameters like num_samples, combined_scoring, and scoring_function for scoring thoughts. The get_thoughts method must be implemented in derived classes.", "type": "comment" }, - "486": { - "file_id": 29, + "480": { + "file_id": 28, "content": " \"\"\"\n Initializes a new Score operation.\n :param num_samples: Number of samples to use for scoring. Defaults to 1.\n :type num_samples: int\n :param combined_scoring: Whether to score all thoughts together or individually. Defaults to False.\n :type combined_scoring: bool\n :param scoring_function: A function to score thoughts (if not using LM). Defaults to None.\n :type scoring_function: Takes a list of thought states or a single thought state and\n returns a list of scores or a single score.\n \"\"\"\n super().__init__()\n self.num_samples: int = num_samples\n self.combined_scoring: bool = combined_scoring\n self.thoughts: List[Thought] = []\n self.scoring_function: Callable[\n [Union[List[Dict], Dict]], Union[List[float], float]\n ] = scoring_function\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of scored thoughts.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:169-192" }, - "487": { - "file_id": 29, + "481": { + "file_id": 28, "content": "This code defines a class for a Score operation that takes a specified number of samples, whether to score thoughts individually or combined, and a scoring function (defaulting to None). It initializes the operation with these parameters and returns the associated scored thoughts.", "type": "comment" }, - "488": { - "file_id": 29, + "482": { + "file_id": 28, "content": " :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the scoring operation by scoring the thoughts from the predecessors.\n If combined scoring is used, the thoughts are scored together, otherwise individually.\n If a scoring function is provided, it is used, otherwise the LM is prompted.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert (\n len(self.predecessors) > 0\n ), \"Score operation needs at least one predecessor\"", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:193-218" }, - "489": { - "file_id": 29, + "483": { + "file_id": 28, "content": "This code defines a method that executes a scoring operation on thoughts from predecessors. It first gets the previous thoughts and asserts that there is at least one predecessor. If combined scoring is used, it scores the thoughts together; otherwise, individually. The language model (LM) and prompter are used for prompting if a scoring function is not provided.", "type": "comment" }, - "490": { - "file_id": 29, + "484": { + "file_id": 28, "content": " if self.combined_scoring:\n previous_thoughts_states = [thought.state for thought in previous_thoughts]\n if self.scoring_function is not None:\n self.logger.debug(\n \"Using scoring function %s to score states\", self.scoring_function\n )\n scores = self.scoring_function(previous_thoughts_states)\n else:\n prompt = prompter.score_prompt(previous_thoughts_states)\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n scores = parser.parse_score_answer(previous_thoughts_states, responses)\n for thought, score in zip(previous_thoughts, scores):\n new_thought = Thought.from_thought(thought)\n new_thought.score = score\n self.thoughts.append(new_thought)", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:220-239" }, - "491": { - "file_id": 29, + "485": { + "file_id": 28, "content": "This code calculates scores for each previous thought using either a scoring function or by generating prompts from the thoughts and querying a language model. The scores are then assigned to the respective thoughts, and new Thought objects are created with the updated scores before being added to the thoughts list.", "type": "comment" }, - "492": { - "file_id": 29, + "486": { + "file_id": 28, "content": " else:\n for thought in previous_thoughts:\n new_thought = Thought.from_thought(thought)\n if self.scoring_function is not None:\n self.logger.debug(\n \"Using scoring function %s to score state\",\n self.scoring_function,\n )\n score = self.scoring_function(thought.state)\n else:\n prompt = prompter.score_prompt([thought.state])\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n score = parser.parse_score_answer([thought.state], responses)[0]\n new_thought.score = score\n self.thoughts.append(new_thought)\n self.logger.info(\n \"Score operation %d scored %d thoughts\",", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:240-263" }, - "493": { - "file_id": 29, + "487": { + "file_id": 28, "content": "This code handles scoring thoughts based on whether a scoring function is defined or not. If the scoring function is not defined, it prompts a language model (LM) to generate responses for each thought state and uses a parser to calculate scores from the LM's responses. The new score is then assigned to the thought object, and the thought is appended to the thoughts list.", "type": "comment" }, - "494": { - "file_id": 29, + "488": { + "file_id": 28, "content": " self.id,\n len(self.thoughts),\n )\nclass ValidateAndImprove(Operation):\n \"\"\"\n Operation to validate and improve thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.validate_and_improve\n def __init__(\n self,\n num_samples: int = 1,\n improve: bool = True,\n num_tries: int = 3,\n validate_function: Callable[[Dict], bool] = None,\n ) -> None:\n \"\"\"\n Initializes a new ValidateAndImprove operation.\n :param num_samples: Number of samples to use for validation. Defaults to 1.\n :type num_samples: int\n :param improve: Whether to improve the thought if it is not valid. Defaults to True.\n :type improve: bool\n :param num_tries: Number of tries to improve the thought before giving up. Defaults to 3.\n :type num_tries: int\n :param validate_function: A function to validate thoughts (if not using LM). Defaults to None.\n :type validate_function: Takes a thought state and returns a boolean.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:264-293" }, - "495": { - "file_id": 29, + "489": { + "file_id": 28, "content": "This code defines a class called `ValidateAndImprove` that extends the `Operation` class. It is designed to validate and improve thoughts, with parameters for number of samples, whether to improve if not valid, number of tries before giving up, and a function to validate thoughts (optional). The operation type is specified as \"validate_and_improve\".", "type": "comment" }, - "496": { - "file_id": 29, + "490": { + "file_id": 28, "content": " \"\"\"\n super().__init__()\n self.num_samples: int = num_samples\n self.improve: bool = improve\n self.num_tries: int = num_tries\n self.validate_function: Callable[[Dict], bool] = validate_function\n self.thoughts: List[List[Thought]] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the list of final thoughts, after validation and improvement.\n :return: List of final validated and improved thoughts.\n :rtype: List[Thought]\n \"\"\"\n return [thought_list[-1] for thought_list in self.thoughts]\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the ValidateAndImprove operation by validating and improving the predecessors' thoughts.\n If a validation function is provided, it is used, otherwise the LM is prompted.\n If improvement is enabled, the LM is prompted to improve the thought, if it is not valid.\n :param lm: The language model to be used.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:294-319" }, - "497": { - "file_id": 29, + "491": { + "file_id": 28, "content": "This code defines a class called `ValidateAndImprove` with attributes for the number of samples, whether to validate and improve thoughts, the number of tries, and a function to validate the thoughts. It also has methods to get final validated and improved thoughts, and execute validation and improvement using a language model.", "type": "comment" }, - "498": { - "file_id": 29, + "492": { + "file_id": 28, "content": " :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert (\n len(self.predecessors) > 0\n ), \"ValidateAndImprove operation needs at least one predecessor\"\n for thought in previous_thoughts:\n thought_list = []\n current_thought = Thought.from_thought(thought)\n current_try = 0\n while True:\n if self.validate_function is not None:\n self.logger.debug(\n \"Using validate function %s to score states\",\n self.validate_function,\n )\n valid = self.validate_function(current_thought.state)", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:320-344" }, - "499": { - "file_id": 29, + "493": { + "file_id": 28, "content": "This function gets the previous thoughts, checks that it has at least one predecessor, then iterates through the previous thoughts. It creates a new thought from each previous thought and enters a loop where it validates the current thought's state using a validate function.", "type": "comment" + }, + "494": { + "file_id": 28, + "content": " else:\n prompt = prompter.validation_prompt(**current_thought.state)\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n valid = parser.parse_validation_answer(\n current_thought.state, responses\n )\n current_thought.valid = valid\n thought_list.append(current_thought)\n if (\n not self.improve\n or current_thought.valid\n or current_try >= self.num_tries\n ):\n break\n improve_prompt = prompter.improve_prompt(**current_thought.state)\n self.logger.debug(\"Prompt for LM: %s\", improve_prompt)\n responses = lm.get_response_texts(", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:345-366" + }, + "495": { + "file_id": 28, + "content": "Code block retrieves a prompt from prompter, then uses it to get responses from a language model (LM). It validates the response, updates the current thought's validation status and adds it to the thought list. If conditions met, breaks out of the loop.", + "type": "comment" + }, + "496": { + "file_id": 28, + "content": " lm.query(improve_prompt, num_responses=1)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n state_update = parser.parse_improve_answer(\n current_thought.state, responses\n )\n current_thought = Thought({**current_thought.state, **state_update})\n current_try += 1\n self.thoughts.append(thought_list)\n self.logger.info(\n \"Validate and improve operation %d created %d valid thoughts from %d previous thoughts\",\n self.id,\n len(\n [\n thought_list[-1]\n for thought_list in self.thoughts\n if thought_list[-1].valid\n ]\n ),\n len(previous_thoughts),\n )\nclass Generate(Operation):\n \"\"\"\n Operation to generate thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.generate\n def __init__(\n self, num_branches_prompt: int = 1, num_branches_response: int = 1", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:367-399" + }, + "497": { + "file_id": 28, + "content": "This code defines an operation class \"Generate\" for generating thoughts using a language model (LM). It iteratively improves and validates each thought until it reaches the specified number of valid thoughts. Each thought is stored in the \"thoughts\" list. The \"Validate and improve\" operation creates new valid thoughts from previous invalid ones, appending them to the \"thoughts\" list.", + "type": "comment" + }, + "498": { + "file_id": 28, + "content": " ) -> None:\n \"\"\"\n Initializes a new Generate operation.\n :param num_branches_prompt: Number of responses that each prompt should generate (passed to prompter). Defaults to 1.\n :type num_branches_prompt: int\n :param num_branches_response: Number of responses the LM should generate for each prompt. Defaults to 1.\n :type num_branches_response: int\n \"\"\"\n super().__init__()\n self.num_branches_prompt: int = num_branches_prompt\n self.num_branches_response: int = num_branches_response\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of generated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Generate operation by generating thoughts from the predecessors.", + "type": "code", + "location": "/graph_of_thoughts/operations/operations.py:400-427" + }, + "499": { + "file_id": 28, + "content": "This code defines a class for generating thoughts, with parameters for the number of responses per prompt and the language model used. It initializes these parameters, stores generated thoughts in a list, and provides methods to retrieve them. The `_execute` method is responsible for generating thoughts using a language model, prompter, and parser.", + "type": "comment" } } \ No newline at end of file diff --git a/docs/data/5.json b/docs/data/5.json index 93446ec..325fdc4 100644 --- a/docs/data/5.json +++ b/docs/data/5.json @@ -1,545 +1,544 @@ { "500": { - "file_id": 29, - "content": " else:\n prompt = prompter.validation_prompt(**current_thought.state)\n self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_samples)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n valid = parser.parse_validation_answer(\n current_thought.state, responses\n )\n current_thought.valid = valid\n thought_list.append(current_thought)\n if (\n not self.improve\n or current_thought.valid\n or current_try >= self.num_tries\n ):\n break\n improve_prompt = prompter.improve_prompt(**current_thought.state)\n self.logger.debug(\"Prompt for LM: %s\", improve_prompt)\n responses = lm.get_response_texts(", - "type": "code", - "location": "/graph_of_thoughts/operations/operations.py:345-366" - }, - "501": { - "file_id": 29, - "content": "Code block retrieves a prompt from prompter, then uses it to get responses from a language model (LM). It validates the response, updates the current thought's validation status and adds it to the thought list. If conditions met, breaks out of the loop.", - "type": "comment" - }, - "502": { - "file_id": 29, - "content": " lm.query(improve_prompt, num_responses=1)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n state_update = parser.parse_improve_answer(\n current_thought.state, responses\n )\n current_thought = Thought({**current_thought.state, **state_update})\n current_try += 1\n self.thoughts.append(thought_list)\n self.logger.info(\n \"Validate and improve operation %d created %d valid thoughts from %d previous thoughts\",\n self.id,\n len(\n [\n thought_list[-1]\n for thought_list in self.thoughts\n if thought_list[-1].valid\n ]\n ),\n len(previous_thoughts),\n )\nclass Generate(Operation):\n \"\"\"\n Operation to generate thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.generate\n def __init__(\n self, num_branches_prompt: int = 1, num_branches_response: int = 1", - "type": "code", - "location": "/graph_of_thoughts/operations/operations.py:367-399" - }, - "503": { - "file_id": 29, - "content": "This code defines an operation class \"Generate\" for generating thoughts using a language model (LM). It iteratively improves and validates each thought until it reaches the specified number of valid thoughts. Each thought is stored in the \"thoughts\" list. The \"Validate and improve\" operation creates new valid thoughts from previous invalid ones, appending them to the \"thoughts\" list.", - "type": "comment" - }, - "504": { - "file_id": 29, - "content": " ) -> None:\n \"\"\"\n Initializes a new Generate operation.\n :param num_branches_prompt: Number of responses that each prompt should generate (passed to prompter). Defaults to 1.\n :type num_branches_prompt: int\n :param num_branches_response: Number of responses the LM should generate for each prompt. Defaults to 1.\n :type num_branches_response: int\n \"\"\"\n super().__init__()\n self.num_branches_prompt: int = num_branches_prompt\n self.num_branches_response: int = num_branches_response\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of generated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Generate operation by generating thoughts from the predecessors.", - "type": "code", - "location": "/graph_of_thoughts/operations/operations.py:400-427" - }, - "505": { - "file_id": 29, - "content": "This code defines a class for generating thoughts, with parameters for the number of responses per prompt and the language model used. It initializes these parameters, stores generated thoughts in a list, and provides methods to retrieve them. The `_execute` method is responsible for generating thoughts using a language model, prompter, and parser.", - "type": "comment" - }, - "506": { - "file_id": 29, + "file_id": 28, "content": " The thoughts are generated by prompting the LM with the predecessors' thought states.\n If there are no predecessors, the kwargs are used as a base state.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n if len(previous_thoughts) == 0 and len(self.predecessors) > 0:\n return\n if len(previous_thoughts) == 0:\n # no predecessors, use kwargs as base state\n previous_thoughts = [Thought(state=kwargs)]\n for thought in previous_thoughts:\n base_state = thought.state\n prompt = prompter.generate_prompt(self.num_branches_prompt, **base_state)\n self.logger.debug(\"Prompt for LM: %s\", prompt)", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:428-451" }, - "507": { - "file_id": 29, + "501": { + "file_id": 28, "content": "This function generates thoughts by using a language model (LM) with the predecessor's thought states as prompts. If there are no predecessors, it uses kwargs as a base state to generate thoughts. It then parses and logs the generated prompt for the LM.", "type": "comment" }, - "508": { - "file_id": 29, + "502": { + "file_id": 28, "content": " responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_branches_response)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n for new_state in parser.parse_generate_answer(base_state, responses):\n new_state = {**base_state, **new_state}\n self.thoughts.append(Thought(new_state))\n self.logger.debug(\n \"New thought %d created with state %s\",\n self.thoughts[-1].id,\n self.thoughts[-1].state,\n )\n if (\n len(self.thoughts)\n > self.num_branches_prompt\n * self.num_branches_response\n * len(previous_thoughts)\n and self.num_branches_prompt > 0\n ):\n self.logger.warning(\n \"Generate operation %d created more thoughts than expected\",\n self.id,\n )\n self.logger.info(\n \"Generate operation %d created %d new thoughts\", self.id, len(self.thoughts)", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:452-476" }, - "509": { - "file_id": 29, + "503": { + "file_id": 28, "content": "This code generates responses from a language model, parses them using a parser, and appends new thoughts to the thoughts list. If more thoughts are created than expected based on prompt and response numbers, a warning is logged.", "type": "comment" }, - "510": { - "file_id": 29, + "504": { + "file_id": 28, "content": " )\nclass Improve(Operation):\n \"\"\"\n Operation to improve thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.improve\n def __init__(self) -> None:\n \"\"\"\n Initializes a new Improve operation.\n \"\"\"\n super().__init__()\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation after improvement.\n :return: List of improved thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Improve operation by improving the predecessors' thoughts.\n The thoughts are improved by prompting the LM with the predecessors' thought states.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:477-513" }, - "511": { - "file_id": 29, + "505": { + "file_id": 28, "content": "The code defines a class \"Improve\" which represents an operation to enhance thoughts. It initializes a new Improve operation and gets the associated thoughts after improvement. The \"_execute\" method executes the operation by improving the predecessor's thoughts using language model (LM) prompts.", "type": "comment" }, - "512": { - "file_id": 29, + "506": { + "file_id": 28, "content": " :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert len(self.predecessors) > 0, \"Needs at least one predecessor\"\n for thought in previous_thoughts:\n improve_prompt = prompter.improve_prompt(**thought.state)\n self.logger.debug(\"Prompt for LM: %s\", improve_prompt)\n responses = lm.get_response_texts(lm.query(improve_prompt, num_responses=1))\n self.logger.debug(\"Responses from LM: %s\", responses)\n state_update = parser.parse_improve_answer(thought.state, responses)\n self.thoughts.append(Thought({**thought.state, **state_update}))\n self.logger.info(\n \"Improve operation %d improved %d thoughts\", self.id, len(self.thoughts)\n )\nclass Aggregate(Operation):\n \"\"\"", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:514-537" }, - "513": { - "file_id": 29, + "507": { + "file_id": 28, "content": "This code defines two classes: \"Improve\" and \"Aggregate\", which are subclasses of the \"Operation\" class. The \"Improve\" operation retrieves previous thoughts, improves their prompts using a prompter and language model (LM), gets response texts, parses the responses using a parser, and appends the updated thoughts to the list of thoughts for the current operation. The \"Aggregate\" operation also exists but has no implementation shown in this code snippet.", "type": "comment" }, - "514": { - "file_id": 29, + "508": { + "file_id": 28, "content": " Operation to aggregate thoughts.\n \"\"\"\n operation_type: OperationType = OperationType.aggregate\n def __init__(self, num_responses: int = 1) -> None:\n \"\"\"\n Initializes a new Aggregate operation.\n :param num_responses: Number of responses to use for aggregation. Defaults to 1.\n :type num_responses: int\n \"\"\"\n super().__init__()\n self.thoughts: List[Thought] = []\n self.num_responses: int = num_responses\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation after aggregation.\n :return: List of aggregated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Aggregate operation by aggregating the predecessors' thoughts.\n The thoughts are aggregated by prompting the LM with the predecessors' thought states.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:538-568" }, - "515": { - "file_id": 29, + "509": { + "file_id": 28, "content": "This code defines an Aggregate operation class that initializes a new Aggregate operation and gets the associated thoughts after aggregation. It also includes a method to execute the operation by prompting the language model with predecessors' thought states for aggregation.", "type": "comment" }, - "516": { - "file_id": 29, + "510": { + "file_id": 28, "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"Aggregate operation must have at least one predecessor\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n if len(previous_thoughts) == 0:\n return\n # applied in order of score\n base_state: Dict = {}\n for thought in sorted(previous_thoughts, key=lambda thought: thought.score):\n base_state = {**base_state, **thought.state}\n previous_thought_states = [thought.state for thought in previous_thoughts]\n prompt = prompter.aggregation_prompt(previous_thought_states)", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:570-594" }, - "517": { - "file_id": 29, + "511": { + "file_id": 28, "content": "This code is a part of an operation class in Python. It checks if the operation has at least one predecessor and retrieves the previous thoughts from it. Then, it sorts the previous thoughts based on their score and constructs a prompt for aggregation using the prompter. Finally, it stores the states of the previous thoughts.", "type": "comment" }, - "518": { - "file_id": 29, + "512": { + "file_id": 28, "content": " self.logger.debug(\"Prompt for LM: %s\", prompt)\n responses = lm.get_response_texts(\n lm.query(prompt, num_responses=self.num_responses)\n )\n self.logger.debug(\"Responses from LM: %s\", responses)\n parsed = parser.parse_aggregation_answer(previous_thought_states, responses)\n if isinstance(parsed, dict):\n parsed = [parsed]\n for new_state in parsed:\n self.thoughts.append(Thought({**base_state, **new_state}))\nclass KeepBestN(Operation):\n \"\"\"\n Operation to keep the best N thoughts from predecessors based on their score.\n \"\"\"\n operation_type: OperationType = OperationType.keep_best_n\n def __init__(self, n: int, higher_is_better: bool = True) -> None:\n \"\"\"\n Initializes a new KeepBestN operation.\n :param n: Maximum number of thoughts to keep.\n :type n: int\n :param higher_is_better: Whether higher scores are better. Defaults to True.\n :type higher_is_better: bool\n :raises AssertionError: If `n` is not greater than zero.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:596-627" }, - "519": { - "file_id": 29, + "513": { + "file_id": 28, "content": "The code defines a class `KeepBestN` that represents an operation to keep the best N thoughts from predecessors based on their score. The `__init__` method initializes a new `KeepBestN` object with the maximum number of thoughts to keep and whether higher scores are better.", "type": "comment" }, - "520": { - "file_id": 29, + "514": { + "file_id": 28, "content": " \"\"\"\n super().__init__()\n self.n: int = n\n assert self.n > 0, \"KeepBestN operation must keep at least one thought\"\n self.higher_is_better: bool = higher_is_better\n self.thoughts: List[Thought] = []\n def get_best_n(self) -> List[Thought]:\n \"\"\"\n Returns the best N thoughts from the predecessors based on their score.\n :return: List of best N thoughts.\n :rtype: List[Thought]\n :raises AssertionError: If not all predecessors have been executed.\n :raises AssertionError: If not all thoughts have been scored.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n assert all(\n previous_thought.scored for previous_thought in previous_thoughts\n ), \"Not all thoughts have been scored\"\n try:\n return sorted(\n previous_thoughts,\n key=lambda thought: thought.score,\n reverse=self.higher_is_better,\n )[: self.n]\n except:", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:628-655" }, - "521": { - "file_id": 29, + "515": { + "file_id": 28, "content": "Class `KeepBestN` initializes its attributes and checks the minimum number of thoughts to keep, then provides a method `get_best_n()` that returns the top N thoughts based on their scores. It raises `AssertionError` if all predecessors haven't been executed or if not all thoughts have been scored.", "type": "comment" }, - "522": { - "file_id": 29, + "516": { + "file_id": 28, "content": " self.logger.error(\"Error in KeepBestN operation\")\n self.logger.error(\n \"Previous operation: %s\", [op.id for op in self.predecessors]\n )\n self.logger.error(\"Previous thoughts: %s\", previous_thoughts)\n self.logger.error(\n \"Scores: %s\", [thought.score for thought in previous_thoughts]\n )\n return sorted(\n [i for i in previous_thoughts if isinstance(i.score, float)],\n key=lambda thought: thought.score,\n reverse=self.higher_is_better,\n )[: self.n]\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts kept by the operation.\n :return: List of kept thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the KeepBestN operation by keeping the best N thoughts from the predecessors according to their score.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:656-683" }, - "523": { - "file_id": 29, + "517": { + "file_id": 28, "content": "This code defines a `KeepBestN` operation that keeps the top N thoughts from predecessors based on their scores. It logs an error message with previous operation details and previous thoughts' scores, and returns the sorted list of thoughts. The class has methods to access kept thoughts and execute the operation using given language model, prompter, and parser.", "type": "comment" }, - "524": { - "file_id": 29, + "518": { + "file_id": 28, "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n :raises AssertionError: If not all predecessors have been executed.\n :raises AssertionError: If not all thoughts have been scored.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"KeepBestN operation must have at least one predecessor\"\n self.thoughts = [Thought.from_thought(thought) for thought in self.get_best_n()]\n for thought in self.thoughts:\n self.logger.debug(\n \"Thought %d with state %s kept\", thought.id, thought.state\n )\n self.logger.info(\n \"KeepBestN operation %d kept %d thoughts\", self.id, len(self.thoughts)", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:685-708" }, - "525": { - "file_id": 29, + "519": { + "file_id": 28, "content": "The code defines a function for the KeepBestN operation, which requires at least one predecessor, and raises AssertionError if any conditions are not met. It retrieves thoughts from predecessors and logs information about the kept thoughts.", "type": "comment" }, - "526": { - "file_id": 29, + "520": { + "file_id": 28, "content": " )\nclass KeepValid(Operation):\n \"\"\"\n Operation to keep valid thoughts from predecessors.\n \"\"\"\n operation_type: OperationType = OperationType.keep_valid\n def __init__(self) -> None:\n \"\"\"\n Initializes a new KeepValid operation.\n \"\"\"\n super().__init__()\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts kept by the operation.\n :return: List of kept thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the KeepValid operation by keeping the valid thoughts from the predecessors.\n Keeps unvalidated thoughts as well.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:709-746" }, - "527": { - "file_id": 29, + "521": { + "file_id": 28, "content": "The `KeepValid` operation keeps valid thoughts from predecessors and returns them. It also preserves unvalidated thoughts. This class initializes a new KeepValid operation and provides methods for retrieving the kept thoughts and executing the operation using a language model, prompter, and parser.", "type": "comment" }, - "528": { - "file_id": 29, + "522": { + "file_id": 28, "content": " :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessors.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"KeepValid operation must have at least one predecessor\"\n self.thoughts: List[Thought] = [\n Thought.from_thought(thought)\n for thought in self.get_previous_thoughts()\n if not thought.validated or thought.valid\n ]\n if any(not thought.validated for thought in self.thoughts):\n self.logger.warning(\n \"KeepValid operation %d has unvalidated thoughts\", self.id\n )\n for thought in self.thoughts:\n self.logger.debug(\n \"Thought %d with state %s kept\", thought.id, thought.state\n )\n self.logger.info(\n \"KeepValid operation %d kept %d thoughts\", self.id, len(self.thoughts)\n )\nclass GroundTruth(Operation):\n \"\"\"\n Operation to evaluate if thoughts correctly solve the problem, using a ground truth evaluator", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:747-778" }, - "529": { - "file_id": 29, + "523": { + "file_id": 28, "content": "The code defines two classes: \"KeepValid\" and \"GroundTruth\". The KeepValid class is an operation that requires at least one predecessor. It collects thoughts from previous operations (excluding those that are not valid or already valid) into a list called \"self.thoughts\". If there are any unvalidated thoughts, it logs a warning. Then, it logs debug and info messages for each thought in the list, including its ID and state, as well as the total number of thoughts kept. The GroundTruth class is an operation that uses a ground truth evaluator to assess if thoughts correctly solve the problem.", "type": "comment" }, - "530": { - "file_id": 29, + "524": { + "file_id": 28, "content": " \"\"\"\n operation_type: OperationType = OperationType.ground_truth_evaluator\n def __init__(self, ground_truth_evaluator: Callable[[Dict], bool]) -> None:\n \"\"\"\n Initializes a new GroundTruth operation.\n :param ground_truth_evaluator: A function to evaluate if a thought solves the problem.\n :type ground_truth_evaluator: A function that takes a thought state and returns a boolean.\n \"\"\"\n super().__init__()\n self.ground_truth_evaluator: Callable[[Dict], bool] = ground_truth_evaluator\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts associated with the operation.\n :return: List of evaluated thoughts.\n :rtype: List[Thought]\n \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the GroundTruth operation by evaluating the predecessors' thoughts using the ground truth evaluator function.", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:779-807" }, - "531": { - "file_id": 29, + "525": { + "file_id": 28, "content": "This code defines a class for the GroundTruth operation, which initializes with a ground truth evaluator function. The operation evaluates predecessors' thoughts using this function and stores them in a list of thoughts. The get_thoughts method returns these evaluated thoughts.", "type": "comment" }, - "532": { - "file_id": 29, + "526": { + "file_id": 28, "content": " :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n :raises AssertionError: If operation has no predecessor.\n \"\"\"\n assert (\n len(self.predecessors) >= 1\n ), \"GroundTruth operation must have at least one predecessor\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n for thought in previous_thoughts:\n new_thought = Thought.from_thought(thought)\n try:\n new_thought.solved = self.ground_truth_evaluator(new_thought.state)\n except:\n new_thought.solved = False\n self.thoughts.append(new_thought)\n self.logger.info(\n \"GroundTruth operation %d evaluated %d thoughts and %d solved the problem\",", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:809-833" }, - "533": { - "file_id": 29, + "527": { + "file_id": 28, "content": "This code is part of a class that implements the GroundTruth operation. It ensures that the operation has at least one predecessor and evaluates the thoughts generated by the previous operations. The evaluated thoughts are then added to the current operation's thoughts list, and their solved status is determined using the ground_truth_evaluator method. If any exceptions occur during the evaluation process, the solved status is set to False. Finally, an info message is logged indicating how many thoughts were evaluated and how many of them solved the problem.", "type": "comment" }, - "534": { - "file_id": 29, + "528": { + "file_id": 28, "content": " self.id,\n len(self.thoughts),\n len([thought for thought in self.thoughts if thought.solved]),\n )\nclass Selector(Operation):\n \"\"\"\n Operation to select thoughts from predecessors.\n Useful for separating thoughts to perform different, subsequent operations on them.\n \"\"\"\n operation_type: OperationType = OperationType.selector\n def __init__(self, selector: Callable[[List[Thought]], List[Thought]]) -> None:\n \"\"\"\n Initializes a new Selector operation.\n :param selector: A function to select thoughts from the predecessors' thoughts.\n :type selector: A function that takes a list of thoughts and returns a list of thoughts.\n \"\"\"\n super().__init__()\n self.selector: Callable[[List[Thought]], List[Thought]] = selector\n self.thoughts: List[Thought] = []\n def get_thoughts(self) -> List[Thought]:\n \"\"\"\n Returns the thoughts selected by the operation.\n :return: List of selected thoughts.\n :rtype: List[Thought]", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:834-864" }, - "535": { - "file_id": 29, + "529": { + "file_id": 28, "content": "This code defines a Selector operation for the Graph of Thoughts, which selects thoughts from predecessors to be used in subsequent operations. The constructor takes a selector function that accepts a list of thoughts and returns a list of selected thoughts. The get_thoughts method returns the thoughts selected by the operation.", "type": "comment" }, - "536": { - "file_id": 29, + "530": { + "file_id": 28, "content": " \"\"\"\n return self.thoughts\n def _execute(\n self, lm: AbstractLanguageModel, prompter: Prompter, parser: Parser, **kwargs\n ) -> None:\n \"\"\"\n Executes the Selector operation by selecting thoughts from the predecessors using the selector function.\n If the Selector has no predecessors, the selector function is called with a thought containing the kwargs as state.\n :param lm: The language model to be used.\n :type lm: AbstractLanguageModel\n :param prompter: The prompter for crafting prompts.\n :type prompter: Prompter\n :param parser: The parser for parsing responses.\n :type parser: Parser\n :param kwargs: Additional parameters for execution.\n \"\"\"\n previous_thoughts: List[Thought] = self.get_previous_thoughts()\n if len(previous_thoughts) == 0:\n previous_thoughts = [Thought(kwargs)]\n self.thoughts = [\n Thought.from_thought(thought)\n for thought in self.selector(previous_thoughts)", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:865-890" }, - "537": { - "file_id": 29, + "531": { + "file_id": 28, "content": "This code defines a Selector operation, which selects thoughts from predecessors using a provided selector function. If there are no predecessors, the function calls the selector with a thought containing the provided kwargs as state. The selected thoughts are then returned.", "type": "comment" }, - "538": { - "file_id": 29, + "532": { + "file_id": 28, "content": " ]\n for thought in self.thoughts:\n self.logger.debug(\n \"Thought %d with state %s selected\", thought.id, thought.state\n )\n self.logger.info(\n \"Selector operation %d selected %d thoughts\", self.id, len(self.thoughts)\n )", "type": "code", "location": "/graph_of_thoughts/operations/operations.py:891-900" }, - "539": { - "file_id": 29, + "533": { + "file_id": 28, "content": "This code segment is logging the selection of thoughts by a selector operation. It iterates over each thought in the self.thoughts list, and logs their ID and state. Finally, it logs the total number of thoughts selected by this operation.", "type": "comment" }, - "540": { - "file_id": 30, + "534": { + "file_id": 29, "content": "/graph_of_thoughts/operations/thought.py", "type": "filepath" }, - "541": { - "file_id": 30, + "535": { + "file_id": 29, "content": "The Thought class represents an LLM thought with attributes including state, score, validity flag, and solution flag. It includes methods for initializing new instances and cloning existing thoughts, as well as properties for validity, score, and solved flag management.", "type": "summary" }, - "542": { - "file_id": 30, + "536": { + "file_id": 29, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\nfrom __future__ import annotations\nimport logging\nfrom typing import Iterator, Dict, Optional\nimport itertools\nclass Thought:\n \"\"\"\n Represents an LLM thought with its state, constructed by the parser, and various flags.\n \"\"\"\n _ids: Iterator[int] = itertools.count(0)\n def __init__(self, state: Optional[Dict] = None) -> None:\n \"\"\"\n Initializes a new Thought instance with a state and various default flags.\n :param state: The state of the thought. Defaults to None.\n :type state: Optional[Dict]\n \"\"\"\n self.logger: logging.Logger = logging.getLogger(self.__class__.__name__)\n self.id: int = next(Thought._ids)\n self.state: Dict = state\n self._score: float = 0.0\n self._valid: bool = False\n self._solved: bool = False\n self.scored: bool = False", "type": "code", "location": "/graph_of_thoughts/operations/thought.py:1-35" }, - "543": { - "file_id": 30, + "537": { + "file_id": 29, "content": "This code defines a `Thought` class that represents an LLM thought with its state, constructed by the parser, and various flags. The class has instance attributes including a logger, unique ID, state, score, validity flag, solution flag, and a method to initialize a new Thought instance with a state and default flags if none provided.", "type": "comment" }, - "544": { - "file_id": 30, + "538": { + "file_id": 29, "content": " self.validated: bool = False\n self.compared_to_ground_truth: bool = False\n @staticmethod\n def from_thought(thought: Thought) -> Thought:\n \"\"\"\n Creates a new thought from an existing one.\n :param thought: An instance of a Thought to clone.\n :return: A new Thought instance with properties copied from the input thought.\n \"\"\"\n new_thought = Thought(thought.state)\n new_thought.score = thought.score\n new_thought.valid = thought.valid\n new_thought.solved = thought.solved\n new_thought.scored = thought.scored\n new_thought.validated = thought.validated\n new_thought.compared_to_ground_truth = thought.compared_to_ground_truth\n return new_thought\n @property\n def valid(self) -> bool:\n \"\"\"\n Returns the validity of the thought.\n :return: The validity of the thought.\n :rtype: bool\n \"\"\"\n return self._valid\n @valid.setter\n def valid(self, valid: bool) -> None:\n \"\"\"", "type": "code", "location": "/graph_of_thoughts/operations/thought.py:36-68" }, - "545": { - "file_id": 30, + "539": { + "file_id": 29, "content": "This code defines a Thought class with properties like state, score, validity, solved status, scoring information, and comparison to ground truth. The class also has a static method `from_thought` to create a new thought from an existing one by cloning its properties. The `valid` property is a boolean representing the validity of the thought, which can be accessed using the `@property` decorator and modified with the `@valid.setter` decorator.", "type": "comment" }, - "546": { - "file_id": 30, + "540": { + "file_id": 29, "content": " Sets the validity of the thought and the validated flag.\n :param valid: The validity of the thought.\n :type valid: bool\n \"\"\"\n self.validated = True\n self._valid = valid\n @property\n def score(self) -> float:\n \"\"\"\n Returns the score of the thought.\n :return: The score of the thought.\n :rtype: float\n \"\"\"\n return self._score\n @score.setter\n def score(self, new_score: float) -> None:\n \"\"\"\n Sets the score of the thought and the scored flag.\n :param new_score: The score of the thought.\n :type new_score: float\n \"\"\"\n self.scored = True\n self._score = new_score\n @property\n def solved(self) -> bool:\n \"\"\"\n Returns the solved flag of the thought.\n :return: The solved flag of the thought.\n :rtype: bool\n \"\"\"\n return self._solved\n @solved.setter\n def solved(self, solved: bool) -> None:\n \"\"\"\n Sets the solved flag of the thought and the compared_to_ground_truth flag.", "type": "code", "location": "/graph_of_thoughts/operations/thought.py:69-111" }, - "547": { - "file_id": 30, + "541": { + "file_id": 29, "content": "This code defines a Thought class with properties for validity, score, and solved flag. The valid property can be set and gets the validity of the thought. The score property returns and sets the score of the thought, marking it as scored when updated. The solved property returns and sets the solved flag of the thought, also marking it as compared_to_ground_truth when updated.", "type": "comment" }, - "548": { - "file_id": 30, + "542": { + "file_id": 29, "content": " :param solved: Whether the thought contains a solution to the problem.\n :type solved: bool\n \"\"\"\n self.compared_to_ground_truth = True\n self._solved = solved", "type": "code", "location": "/graph_of_thoughts/operations/thought.py:113-117" }, - "549": { - "file_id": 30, + "543": { + "file_id": 29, "content": "Method defining a Thought object with a boolean parameter \"solved\" indicating if it contains a solution to the problem. The method sets self.compared_to_ground_truth to True and assigns the value of solved to self._solved.", "type": "comment" }, - "550": { - "file_id": 31, + "544": { + "file_id": 30, "content": "/graph_of_thoughts/parser/__init__.py", "type": "filepath" }, - "551": { - "file_id": 31, + "545": { + "file_id": 30, "content": "The code imports the Parser class from the \"parser\" module in the current package, allowing for easier usage and organization of related functions and classes.", "type": "summary" }, - "552": { - "file_id": 31, + "546": { + "file_id": 30, "content": "from .parser import Parser", "type": "code", "location": "/graph_of_thoughts/parser/__init__.py:1-1" }, - "553": { - "file_id": 31, + "547": { + "file_id": 30, "content": "The code imports the Parser class from the \"parser\" module in the current package, allowing for easier usage and organization of related functions and classes.", "type": "comment" }, - "554": { - "file_id": 32, + "548": { + "file_id": 31, "content": "/graph_of_thoughts/parser/parser.py", "type": "filepath" }, - "555": { - "file_id": 32, + "549": { + "file_id": 31, "content": "The code includes an abstract class that defines three methods for parsing different language model responses, utilizing thought states and text inputs. The 'pass' statement serves as a placeholder or temporary measure.", "type": "summary" }, - "556": { - "file_id": 32, + "550": { + "file_id": 31, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main authors: Robert Gerstenberger, Nils Blach\nfrom __future__ import annotations\nfrom abc import ABC, abstractmethod\nfrom typing import Dict, List, Union\nclass Parser(ABC):\n \"\"\"\n Abstract base class that defines the interface for all parsers.\n Parsers are used to parse the responses from the language models.\n \"\"\"\n @abstractmethod\n def parse_aggregation_answer(\n self, states: List[Dict], texts: List[str]\n ) -> Union[Dict, List[Dict]]:\n \"\"\"\n Parse the response from the language model for a aggregation prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the response from the language model.", "type": "code", "location": "/graph_of_thoughts/parser/parser.py:1-31" }, - "557": { - "file_id": 32, + "551": { + "file_id": 31, "content": "Parser abstract class for language model responses parsing. Defines an interface to be implemented by subclasses. Used for aggregation prompts and takes thought states and language model responses as input, returning updated thought states after parsing the response.", "type": "comment" }, - "558": { - "file_id": 32, + "552": { + "file_id": 31, "content": " :rtype: Union[Dict, List[Dict]]\n \"\"\"\n pass\n @abstractmethod\n def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict:\n \"\"\"\n Parse the response from the language model for an improve prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought state after parsing the response from the language model.\n :rtype: Dict\n \"\"\"\n pass\n @abstractmethod\n def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:\n \"\"\"\n Parse the response from the language model for a generate prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The new thought states after parsing the response from the language model.", "type": "code", "location": "/graph_of_thoughts/parser/parser.py:32-59" }, - "559": { - "file_id": 32, + "553": { + "file_id": 31, "content": "This code defines three abstract methods in a class: `parse_improve_answer`, `parse_generate_answer`, and `parse`. These methods are responsible for parsing responses from a language model given a thought state and text inputs. The return types vary depending on the method, with `parse` returning either a dictionary or a list of dictionaries.", "type": "comment" }, - "560": { - "file_id": 32, + "554": { + "file_id": 31, "content": " :rtype: List[Dict]\n \"\"\"\n pass\n @abstractmethod\n def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool:\n \"\"\"\n Parse the response from the language model for a validation prompt.\n :param state: The thought state used to generate the prompt.\n :type state: Dict\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: Whether the thought state is valid or not.\n :rtype: bool\n \"\"\"\n pass\n @abstractmethod\n def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]:\n \"\"\"\n Parse the response from the language model for a score prompt.\n :param states: The thought states used to generate the prompt.\n :type states: List[Dict]\n :param texts: The responses to the prompt from the language model.\n :type texts: List[str]\n :return: The scores for the thought states.\n :rtype: List[float]\n \"\"\"", "type": "code", "location": "/graph_of_thoughts/parser/parser.py:60-89" }, - "561": { - "file_id": 32, + "555": { + "file_id": 31, "content": "This code defines an abstract base class with three methods for parsing different types of language model responses. The `parse_thought_answer` method takes a thought state and text response to determine if the thought is valid. The `parse_validation_answer` method takes a list of thought states and text responses to determine if each thought's state is valid. Finally, the `parse_score_answer` method takes a list of thought states and text responses and returns the scores for each thought state.", "type": "comment" }, - "562": { - "file_id": 32, + "556": { + "file_id": 31, "content": " pass", "type": "code", "location": "/graph_of_thoughts/parser/parser.py:90-90" }, - "563": { - "file_id": 32, + "557": { + "file_id": 31, "content": "The code contains a 'pass' statement, which is used as a placeholder and does nothing. It could be used for future implementation or as a temporary measure during development.", "type": "comment" }, - "564": { - "file_id": 33, + "558": { + "file_id": 32, "content": "/graph_of_thoughts/prompter/__init__.py", "type": "filepath" }, - "565": { - "file_id": 33, + "559": { + "file_id": 32, "content": "This line imports the Prompter class from the \"prompter\" module within the current package, allowing its functionality to be accessed by other parts of the codebase.", "type": "summary" }, - "566": { - "file_id": 33, + "560": { + "file_id": 32, "content": "from .prompter import Prompter", "type": "code", "location": "/graph_of_thoughts/prompter/__init__.py:1-1" }, - "567": { - "file_id": 33, + "561": { + "file_id": 32, "content": "This line imports the Prompter class from the \"prompter\" module within the current package, allowing its functionality to be accessed by other parts of the codebase.", "type": "comment" }, - "568": { - "file_id": 34, + "562": { + "file_id": 33, "content": "/graph_of_thoughts/prompter/prompter.py", "type": "filepath" }, - "569": { - "file_id": 34, + "563": { + "file_id": 33, "content": "The code presents an abstract base class, Prompter, that generates language model prompts through two methods: `aggregation_prompt()` and `improve_prompt()`. It also includes optional parameters and keyword arguments for subclass customization.", "type": "summary" }, - "570": { - "file_id": 34, + "564": { + "file_id": 33, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main authors: Robert Gerstenberger, Nils Blach\nfrom __future__ import annotations\nfrom abc import ABC, abstractmethod\nfrom typing import Dict, List\nclass Prompter(ABC):\n \"\"\"\n Abstract base class that defines the interface for all prompters.\n Prompters are used to generate the prompts for the language models.\n \"\"\"\n @abstractmethod\n def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str:\n \"\"\"\n Generate a aggregation prompt for the language model.\n :param state_dicts: The thought states that should be aggregated.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The aggregation prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def improve_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate an improve prompt for the language model.", "type": "code", "location": "/graph_of_thoughts/prompter/prompter.py:1-36" }, - "571": { - "file_id": 34, + "565": { + "file_id": 33, "content": "This code is an abstract base class called Prompter, which defines interfaces for all prompters. It helps generate prompts for language models in the form of aggregation and improve prompts. The class has two abstract methods: `aggregation_prompt()` and `improve_prompt()`, both with their own parameters and return types.", "type": "comment" }, - "572": { - "file_id": 34, + "566": { + "file_id": 33, "content": " The thought state is unpacked to allow for additional keyword arguments\n and concrete implementations to specify required arguments explicitly.\n :param kwargs: Additional keyword arguments.\n :return: The improve prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def generate_prompt(self, num_branches: int, **kwargs) -> str:\n \"\"\"\n Generate a generate prompt for the language model.\n The thought state is unpacked to allow for additional keyword arguments\n and concrete implementations to specify required arguments explicitly.\n :param num_branches: The number of responses the prompt should ask the LM to generate.\n :type num_branches: int\n :param kwargs: Additional keyword arguments.\n :return: The generate prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def validation_prompt(self, **kwargs) -> str:\n \"\"\"\n Generate a validation prompt for the language model.\n The thought state is unpacked to allow for additional keyword arguments", "type": "code", "location": "/graph_of_thoughts/prompter/prompter.py:37-65" }, - "573": { - "file_id": 34, + "567": { + "file_id": 33, "content": "This code defines a base class for generating prompts and validation prompts for language models. The `generate_prompt` and `validation_prompt` methods are abstract, indicating that concrete implementations should override them. The methods accept an optional parameter `num_branches`, and additional keyword arguments (`kwargs`) to allow for customization in subclasses. The thought state is unpacked to enable explicit specification of required arguments.", "type": "comment" }, - "574": { - "file_id": 34, + "568": { + "file_id": 33, "content": " and concrete implementations to specify required arguments explicitly.\n :param kwargs: Additional keyword arguments.\n :return: The validation prompt.\n :rtype: str\n \"\"\"\n pass\n @abstractmethod\n def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str:\n \"\"\"\n Generate a score prompt for the language model.\n :param state_dicts: The thought states that should be scored,\n if more than one, they should be scored together.\n :type state_dicts: List[Dict]\n :param kwargs: Additional keyword arguments.\n :return: The score prompt.\n :rtype: str\n \"\"\"\n pass", "type": "code", "location": "/graph_of_thoughts/prompter/prompter.py:66-86" }, - "575": { - "file_id": 34, + "569": { + "file_id": 33, "content": "This code defines an abstract class with two methods: `generate_prompt()` and `score_prompt()`. The first method generates a validation prompt, and the second method generates a score prompt. Both methods accept additional keyword arguments. State dictionaries are used as input for the `score_prompt()` method to generate prompts for multiple thought states simultaneously.", "type": "comment" }, - "576": { - "file_id": 35, + "570": { + "file_id": 34, "content": "/paper/README.md", "type": "filepath" }, - "577": { - "file_id": 35, + "571": { + "file_id": 34, "content": "The code provides instructions to access and visualize arXiv preprint data, which is stored in the `final_results_gpt35.tar.bz2` archive. The `plots.py` file needs to be executed after unpacking the archive for visualization purposes.", "type": "summary" }, - "578": { - "file_id": 35, + "572": { + "file_id": 34, "content": "## Plot Data\nThe data used to create the figure of the arXiv preprint article can be\nfound in the `final_results_gpt35.tar.bz2` archive. Unpack the archive\nand run the file `plots.py`.", "type": "code", "location": "/paper/README.md:1-5" }, - "579": { - "file_id": 35, + "573": { + "file_id": 34, "content": "The code provides instructions to access and visualize arXiv preprint data, which is stored in the `final_results_gpt35.tar.bz2` archive. The `plots.py` file needs to be executed after unpacking the archive for visualization purposes.", "type": "comment" }, - "580": { - "file_id": 36, + "574": { + "file_id": 35, "content": "/paper/plots.py", "type": "filepath" }, - "581": { - "file_id": 36, + "575": { + "file_id": 35, "content": "Both scripts utilize Python to process data from JSON files, generate boxplots, and customize visualizations with various settings such as titles, colors, y-axis limits, cost thresholds, and display options.", "type": "summary" }, - "582": { - "file_id": 36, + "576": { + "file_id": 35, "content": "# Copyright (c) 2023 ETH Zurich.\n# All rights reserved.\n#\n# Use of this source code is governed by a BSD-style license that can be\n# found in the LICENSE file.\n#\n# main author: Nils Blach\n# contributions: Robert Gerstenberger\nimport json\nimport os\nimport matplotlib.pyplot as plt\ndef get_complete_results(base_directory):\n results_complete = {}\n for folder_name in os.listdir(base_directory):\n folder_path = os.path.join(base_directory, folder_name)\n if os.path.isdir(folder_path):\n results_complete[folder_name] = []\n for file_name in os.listdir(folder_path):\n if file_name.endswith(\".json\"):\n file_path = os.path.join(folder_path, file_name)\n with open(file_path, \"r\") as f:\n data = json.load(f)\n results_complete[folder_name].append(\n {\"key\": int(file_name.split(\".\")[0]), \"data\": data}\n )\n for key in results_complete.keys():", "type": "code", "location": "/paper/plots.py:1-29" }, - "583": { - "file_id": 36, + "577": { + "file_id": 35, "content": "This Python script reads data from a directory of JSON files, organizes it by folders, and stores the results in a dictionary. It uses the matplotlib library for plotting, but the code provided focuses on reading and organizing data, not plotting itself. The script is likely part of a larger program that utilizes this data for further analysis or visualization.", "type": "comment" }, - "584": { - "file_id": 36, + "578": { + "file_id": 35, "content": " results_complete[key] = sorted(\n results_complete[key], key=lambda x: x[\"key\"]\n )\n return results_complete\ndef get_final_scores(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 100\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in result[\"data\"]:\n if \"operation\" in op and op[\"operation\"] == \"ground_truth_evaluator\":\n try:\n score = min(op[\"scores\"])\n solved = any(op[\"problem_solved\"])\n except:\n continue\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]", "type": "code", "location": "/paper/plots.py:30-58" }, - "585": { - "file_id": 36, + "579": { + "file_id": 35, "content": "This code snippet sorts the results and then calculates final scores for different methods based on metrics like score, solution status, prompt and completion tokens, and cost.", "type": "comment" }, - "586": { - "file_id": 36, + "580": { + "file_id": 35, "content": " )\n scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_final_scores_doc_merge(results_complete):\n scores = {}\n for method in results_complete.keys():\n scores[method] = []\n for result in results_complete[method]:\n score = 0\n solved = False\n cost = 1\n prompt_tokens = 0\n completion_tokens = 0\n for op in reversed(result[\"data\"]):\n if \"cost\" in op:\n cost = op[\"cost\"]\n prompt_tokens = op[\"prompt_tokens\"]\n completion_tokens = op[\"completion_tokens\"]\n if \"operation\" in op and op[\"operation\"] == \"score\":\n try:\n score = max(op[\"scores\"])\n break\n except:\n continue\n scores[method].append(\n [result[\"key\"], score, solved, prompt_tokens, completion_tokens, cost]\n )", "type": "code", "location": "/paper/plots.py:59-87" }, - "587": { - "file_id": 36, + "581": { + "file_id": 35, "content": "This function calculates the final scores for each method in the results_complete dictionary, considering factors like cost, prompt and completion tokens, and operation scores. It sorts the scores in ascending order before returning them.", "type": "comment" }, - "588": { - "file_id": 36, + "582": { + "file_id": 35, "content": " scores[method] = sorted(scores[method], key=lambda x: x[0])\n return scores\ndef get_plotting_data(base_directory, score_method):\n results_complete = get_complete_results(base_directory)\n scores = score_method(results_complete)\n results_plotting = {\n method: {\n \"scores\": [x[1] for x in scores[method]],\n \"solved\": sum([1 for x in scores[method] if x[2]]),\n \"costs\": [x[5] for x in scores[method]],\n }\n for method in scores.keys()\n }\n return results_plotting\ndef plot_results(\n name,\n results,\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog\"],\n methods_labels=[\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT\"],\n model=\"GPT-3.5\",\n length=32,\n y_lower=0,\n y_upper=16,\n cost_upper=1.8,\n display_solved=True,\n annotation_offset=1,\n display_left_ylabel=False,\n display_right_ylabel=False,\n):\n methods_order = [method for method in methods_order if method in results]\n # Extract scores based on the order\n if name == \"set_intersection\":", "type": "code", "location": "/paper/plots.py:88-123" }, - "589": { - "file_id": 36, + "583": { + "file_id": 35, "content": "The code defines a function `get_plotting_data` that takes a base directory and a score method as input, returns plotting data for different methods by extracting scores, solved counts, and costs from the complete results. Another function, `plot_results`, is defined which takes various parameters such as name, results, methods order, etc., and plots the results based on the specified parameters. The code also includes checks to ensure that only valid methods are considered for plotting.", "type": "comment" }, - "590": { - "file_id": 36, + "584": { + "file_id": 35, "content": " scores_ordered = [\n [min(score, length) for score in results[method][\"scores\"] if score != 1000]\n for method in methods_order\n ]\n elif name == \"sorting\":\n scores_ordered = [\n [\n min(score, length)\n for score in results[method][\"scores\"]\n if score != 100 and score != 300\n ]\n for method in methods_order\n ]\n elif name == \"keyword_counting\":\n scores_ordered = [\n [\n score\n for score in results[method][\"scores\"]\n if score != 100 and score != 300\n ]\n for method in methods_order\n ]\n elif name == \"document_merging\":\n scores_ordered = [\n [score for score in results[method][\"scores\"]] for method in methods_order\n ]\n total_costs = [sum(results[method][\"costs\"]) for method in methods_order]\n # Create figure and axis\n if name == \"keyword_counting\" or name == \"document_merging\":", "type": "code", "location": "/paper/plots.py:124-153" }, - "591": { - "file_id": 36, + "585": { + "file_id": 35, "content": "The code is filtering scores and costs based on specific conditions for different tasks (e.g., sorting, keyword counting, document merging) and creating a figure with axes. For each task, it generates a list of filtered scores and total costs.", "type": "comment" }, - "592": { - "file_id": 36, + "586": { + "file_id": 35, "content": " fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5))\n else:\n fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))\n # Create boxplots\n positions = range(1, len(methods_order) + 1)\n ax.boxplot(scores_ordered, positions=positions)\n fig_fontsize = 12\n # Set the ticks and labels\n plt.yticks(fontsize=fig_fontsize)\n ax.set_xticks(range(1, len(methods_order) + 1))\n ax.set_xticks(range(1, len(methods_order) + 1))\n if name == \"keyword_counting\":\n ax.set_xticklabels(methods_labels, fontsize=10)\n else:\n ax.set_xticklabels(methods_labels, fontsize=fig_fontsize)\n if name == \"document_merging\":\n ax.set_ylim(y_lower, 12 if display_solved else 9.75)\n else:\n ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1)\n if name == \"sorting\" or name == \"set_intersection\":\n ax1_yticks = range(\n y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)\n )\n ax.set_yticks(ax1_yticks)\n if display_left_ylabel:", "type": "code", "location": "/paper/plots.py:154-184" }, - "593": { - "file_id": 36, + "587": { + "file_id": 35, "content": "Creates boxplots for sorted scores based on methods order, sets x-tick labels and y-limits according to the current name (method), adjusts y-ticks depending on length and displays left ylabel if needed.", "type": "comment" }, - "594": { - "file_id": 36, + "588": { + "file_id": 35, "content": " if name == \"keyword_counting\":\n ax.set_ylabel(\n f\"Number of errors; the lower the better\", fontsize=fig_fontsize\n )\n elif name == \"document_merging\":\n ax.set_ylabel(\n f\"Score (out of 10); the higher the better\", fontsize=fig_fontsize\n )\n else:\n ax.set_ylabel(\n f\"#incorrect elements; the lower the better\", fontsize=fig_fontsize\n )\n if name == \"sorting\" or name == \"set_intersection\":\n ax.set_title(f\"{length} elements\")\n ax2 = ax.twinx()\n ax2.bar(positions, total_costs, alpha=0.5, color=\"blue\", label=\"Total Cost ($)\")\n ax2.yaxis.set_tick_params(colors=\"#1919ff\", labelsize=fig_fontsize)\n ax2.set_ylim(0, cost_upper)\n number_of_ticks = len(ax.get_yticks())\n tick_interval = cost_upper / (number_of_ticks)\n ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]\n # Set custom tick positions for ax2\n ax2.set_yticks(ax2_ticks)\n if display_right_ylabel:", "type": "code", "location": "/paper/plots.py:185-212" }, - "595": { - "file_id": 36, + "589": { + "file_id": 35, "content": "If \"keyword_counting\", set ylabel as \"Number of errors; the lower the better\". If \"document_merging\", set ylabel as \"Score (out of 10); the higher the better\". Otherwise, set ylabel as \"#incorrect elements; the lower the better\". If \"sorting\" or \"set_intersection\", set title as length of elements. Add a blue bar chart for total cost using ax2. Set tick colors and ylim on ax2. Customize ytick positions for ax2 using provided interval.", "type": "comment" }, - "596": { - "file_id": 36, + "590": { + "file_id": 35, "content": " ax2.set_ylabel(\n \"Total Cost ($); the lower the better\",\n color=\"#1919ff\",\n fontsize=fig_fontsize,\n )\n if display_solved:\n annotation_height = y_upper + annotation_offset\n count = 1\n for method in methods_order:\n if method not in results:\n continue\n solved = results[method][\"solved\"]\n ax.text(\n count,\n annotation_height,\n f\"{solved}\",\n ha=\"center\",\n va=\"bottom\",\n fontsize=fig_fontsize,\n )\n count += 1\n model = model.replace(\".\", \"\").replace(\"-\", \"\").lower()\n if name == \"keyword_counting\" or name == \"document_merging\":\n fig.savefig(f\"{name}_{model}.pdf\", bbox_inches=\"tight\")\n else:\n fig.savefig(f\"{name}_{model}_{length}.pdf\", bbox_inches=\"tight\")\nplot_results(\n \"set_intersection\",\n get_plotting_data(\"set_intersection_gpt35_032\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog2\"],", "type": "code", "location": "/paper/plots.py:213-246" }, - "597": { - "file_id": 36, + "591": { + "file_id": 35, "content": "Setting the y-label for a plot, displaying the number of solved methods, and saving the figure with appropriate filename based on the method name and model.", "type": "comment" }, - "598": { - "file_id": 36, + "592": { + "file_id": 35, "content": " length=32,\n y_upper=19,\n cost_upper=2,\n display_solved=True,\n annotation_offset=0.5,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"set_intersection\",\n get_plotting_data(\"set_intersection_gpt35_064\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog2\"],\n length=64,\n y_upper=32,\n cost_upper=5.4,\n display_solved=True,\n annotation_offset=0.2,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"set_intersection\",\n get_plotting_data(\"set_intersection_gpt35_128\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"tog2\"],\n length=128,\n y_upper=94,\n cost_upper=12,\n display_solved=True,\n annotation_offset=-3,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_032\", get_final_scores),\n length=32,\n display_solved=False,\n annotation_offset=0.5,\n display_left_ylabel=True,\n display_right_ylabel=True,", "type": "code", "location": "/paper/plots.py:247-289" }, - "599": { - "file_id": 36, + "593": { + "file_id": 35, "content": "The code snippet is defining functions and parameters for plotting data from various models. It uses the 'plot_results' function with different arguments to visualize the results of operations such as set intersection and sorting. The plots have various settings like length, upper limit, cost, display options, etc. to customize the visual representation of the data.", "type": "comment" + }, + "594": { + "file_id": 35, + "content": ")\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_064\", get_final_scores),\n length=64,\n y_upper=64,\n cost_upper=5.1,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_128\", get_final_scores),\n length=128,\n y_upper=128,\n cost_upper=17,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"keyword_counting\",\n get_plotting_data(\"keyword_counting_gpt35\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"gsp4\", \"gsp8\", \"gspx\"],\n methods_labels=[\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT4\", \"GoT8\", \"GoTx\"],\n y_upper=35,\n cost_upper=9,\n display_solved=True,\n annotation_offset=-0.3,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"document_merging\",\n get_plotting_data(\"document_merging_gpt35_16k\", get_final_scores_doc_merge),\n methods_order=[\"io\", \"cot\", \"tot\", \"gsp\", \"gsp2\"],", + "type": "code", + "location": "/paper/plots.py:290-330" + }, + "595": { + "file_id": 35, + "content": "Code snippet contains multiple function calls to plot results using different sets of data and parameters. It plots data for tasks \"sorting\" and \"keyword_counting\", and \"document_merging\". Each call specifies the task, data, methods order, labels, limits, display options, and other settings.", + "type": "comment" + }, + "596": { + "file_id": 35, + "content": " methods_labels=[\"IO\", \"CoT\", \"ToT\", \"GoT\", \"GoT2\"],\n y_upper=10,\n cost_upper=15,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)", + "type": "code", + "location": "/paper/plots.py:331-337" + }, + "597": { + "file_id": 35, + "content": "Parameters for plotting methods labels, upper limit of y-axis, cost threshold, and display options.", + "type": "comment" + }, + "598": { + "file_id": 36, + "content": "/pyproject.toml", + "type": "filepath" + }, + "599": { + "file_id": 36, + "content": "The code uses Hatchling to define project settings for the Python package \"graph_of_thoughts,\" including package details, dependencies, and URLs. It also includes a TOML configuration file setting up an entry point for executable scripts under the project's namespace within the \"scripts\" section of the \"project\" block.", + "type": "summary" } } \ No newline at end of file diff --git a/docs/data/6.json b/docs/data/6.json index 4bff850..423be20 100644 --- a/docs/data/6.json +++ b/docs/data/6.json @@ -1,24 +1,24 @@ { "600": { "file_id": 36, - "content": ")\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_064\", get_final_scores),\n length=64,\n y_upper=64,\n cost_upper=5.1,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"sorting\",\n get_plotting_data(\"sorting_gpt35_128\", get_final_scores),\n length=128,\n y_upper=128,\n cost_upper=17,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"keyword_counting\",\n get_plotting_data(\"keyword_counting_gpt35\", get_final_scores),\n methods_order=[\"io\", \"cot\", \"tot\", \"tot2\", \"gsp4\", \"gsp8\", \"gspx\"],\n methods_labels=[\"IO\", \"CoT\", \"ToT\", \"ToT2\", \"GoT4\", \"GoT8\", \"GoTx\"],\n y_upper=35,\n cost_upper=9,\n display_solved=True,\n annotation_offset=-0.3,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)\nplot_results(\n \"document_merging\",\n get_plotting_data(\"document_merging_gpt35_16k\", get_final_scores_doc_merge),\n methods_order=[\"io\", \"cot\", \"tot\", \"gsp\", \"gsp2\"],", + "content": "[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n[project]\nname = \"graph_of_thoughts\"\nversion = \"0.0.3\"\nauthors = [\n { name=\"Maciej Besta\", email=\"maciej.besta@inf.ethz.ch\" },\n { name=\"Nils Blach\", email=\"nils.blach@inf.ethz.ch\" },\n { name=\"Ales Kubicek\", email=\"akubicek@student.ethz.ch\" },\n { name=\"Robert Gerstenberger\", email=\"gerstenberger.robert@gmail.com\" },\n]\ndescription = \"Python package for Graph of Thoughts that enables solving elaborate problems with Large Language Models\"\nreadme = \"README.md\"\nlicense = {file = \"LICENSE\"}\nrequires-python = \">=3.8\"\nclassifiers = [\n \"Programming Language :: Python :: 3\",\n \"Operating System :: OS Independent\",\n]\ndependencies = [\n \"backoff>=2.2.1,<3.0.0\",\n \"openai>=1.0.0,<2.0.0\",\n \"matplotlib>=3.7.1,<4.0.0\",\n \"numpy>=1.24.3,<2.0.0\",\n \"pandas>=2.0.3,<3.0.0\",\n \"sympy>=1.12,<2.0\",\n \"torch>=2.0.1,<3.0.0\",\n \"transformers>=4.31.0,<5.0.0\",\n \"accelerate>=0.21.0,<1.0.0\",\n \"bitsandbytes>=0.41.0,<1.0.0\",\n \"scipy>=1.10.1,<2.0.0\",\n]\n[project.urls]\nHomepage = \"https://github.com/spcl/graph-of-thoughts\"", "type": "code", - "location": "/paper/plots.py:290-330" + "location": "/pyproject.toml:1-37" }, "601": { "file_id": 36, - "content": "Code snippet contains multiple function calls to plot results using different sets of data and parameters. It plots data for tasks \"sorting\" and \"keyword_counting\", and \"document_merging\". Each call specifies the task, data, methods order, labels, limits, display options, and other settings.", + "content": "This code defines the project settings for a Python package called \"graph_of_thoughts\" using Hatchling as the build system. It specifies the package name, version, authors, description, dependencies, and URLs for further information.", "type": "comment" }, "602": { "file_id": 36, - "content": " methods_labels=[\"IO\", \"CoT\", \"ToT\", \"GoT\", \"GoT2\"],\n y_upper=10,\n cost_upper=15,\n display_solved=False,\n display_left_ylabel=True,\n display_right_ylabel=True,\n)", + "content": "[project.scripts]", "type": "code", - "location": "/paper/plots.py:331-337" + "location": "/pyproject.toml:39-39" }, "603": { "file_id": 36, - "content": "Parameters for plotting methods labels, upper limit of y-axis, cost threshold, and display options.", + "content": "The code snippet is a part of a TOML configuration file, specifically defining the \"scripts\" section within the \"project\" block. It sets up an entry point for executable scripts under the project's namespace.", "type": "comment" } } \ No newline at end of file diff --git a/docs/data/titles/0.json b/docs/data/titles/0.json new file mode 100644 index 0000000..dba8c30 --- /dev/null +++ b/docs/data/titles/0.json @@ -0,0 +1,293 @@ +{ + "/README.md": "GoT Framework: Efficient Python 3.8+ Language Model", + "/README.md:1-20": "Installing Graph of Thoughts Framework", + "/README.md:116-133": "Framework Tutorial and Examples", + "/README.md:134-150": "Accessing and Citing Project Results", + "/README.md:21-48": "Graph of Thoughts LLM Installation and Example", + "/README.md:49-83": "Generate Graph of Thoughts using GoT", + "/README.md:85-116": "Code for Framework Execution and Error Counts", + "/examples/README.md": "Graph of Thoughts Examples", + "/examples/doc_merge/README.md": "Document Merging with Multiple Methods", + "/examples/doc_merge/README.md:1-28": "Document Merging Use Cases", + "/examples/doc_merge/README.md:29-38": "Configure and Log Code Generator", + "/examples/doc_merge/doc_merge.py": "Efficient NDA Merging with Language Model", + "/examples/doc_merge/doc_merge.py:1-31": "Merge NDA Documents Prompter Class", + "/examples/doc_merge/doc_merge.py:113-143": "Merged NDAs Prompt Generator", + "/examples/doc_merge/doc_merge.py:144-174": "Generate Prompt Class", + "/examples/doc_merge/doc_merge.py:176-198": "Generate Prompt for Document Merging", + "/examples/doc_merge/doc_merge.py:199-221": "Dynamic Prompt for Document Merging", + "/examples/doc_merge/doc_merge.py:222-245": "Document Merge and Improvement Prompt", + "/examples/doc_merge/doc_merge.py:246-274": "Generate Score Prompt for Language Model", + "/examples/doc_merge/doc_merge.py:275-315": "DocMergeParser: Parsing Doc Merge Language Model Responses", + "/examples/doc_merge/doc_merge.py:316-342": "Remove Tags from Text", + "/examples/doc_merge/doc_merge.py:32-54": "NDA Document Merging and Improvement Tool", + "/examples/doc_merge/doc_merge.py:343-369": "Aggregation Prompt Parser", + "/examples/doc_merge/doc_merge.py:370-393": "Aggregating Thought States from Multiple Sources", + "/examples/doc_merge/doc_merge.py:394-420": "Parse Thought States and Scores", + "/examples/doc_merge/doc_merge.py:421-441": "Redundancy Extraction with Regex", + "/examples/doc_merge/doc_merge.py:442-464": "Language Model Improve Prompt Parsing Function", + "/examples/doc_merge/doc_merge.py:465-497": "Functions for Thought State Management and IO Operations", + "/examples/doc_merge/doc_merge.py:500-533": "CoT and ToT Operation Graphs", + "/examples/doc_merge/doc_merge.py:534-561": "Document Merge Graph Generation Code", + "/examples/doc_merge/doc_merge.py:55-71": "Redundancy-Aware NDA Merge Prompts", + "/examples/doc_merge/doc_merge.py:562-593": "Graph Operations Code for GoT2 Merge", + "/examples/doc_merge/doc_merge.py:594-619": "Document Merge Operations Graph Creation", + "/examples/doc_merge/doc_merge.py:620-649": "Operations Graph for Language Model Inference", + "/examples/doc_merge/doc_merge.py:650-679": "Budgeted Language Model Folder Creation", + "/examples/doc_merge/doc_merge.py:680-712": "Budget-Controlled Data Merging", + "/examples/doc_merge/doc_merge.py:713-741": "Initialize Language Model and Run Executor", + "/examples/doc_merge/doc_merge.py:73-112": "NDA Merge and Summarize Tool", + "/examples/doc_merge/doc_merge.py:742-767": "Combine, Evaluate, and Score NDAs with LLM", + "/examples/doc_merge/plot.py": "DocMerge Plotting Functionality", + "/examples/doc_merge/plot.py:1-29": "JSON Data Merging and Sorting", + "/examples/doc_merge/plot.py:133-168": "Custom Tick Positions and Labels for Y-Axis Plotting", + "/examples/doc_merge/plot.py:169-170": "Initializing DocMerge with Cost Upper Limit", + "/examples/doc_merge/plot.py:30-59": "Sorting Final Scores", + "/examples/doc_merge/plot.py:60-96": "Plotting Results from Method Data", + "/examples/doc_merge/plot.py:97-132": "Boxplot and Bar Plot Creation with Customized Axes", + "/examples/doc_merge/pure_documents.json": "Company-Supplier Agreement Aspects", + "/examples/doc_merge/pure_documents.json:1-3": "JSON NDA Examples: Confidential Information Sharing Agreements", + "/examples/doc_merge/pure_documents.json:10-10": "Non-Disclosure Contract", + "/examples/doc_merge/pure_documents.json:10-11": "B2B Contractor Loyalty Agreement", + "/examples/doc_merge/pure_documents.json:11-11": "Comprehensive Contract Template", + "/examples/doc_merge/pure_documents.json:11-12": "Non-Disclosure Agreement between Company and Supplier", + "/examples/doc_merge/pure_documents.json:12-13": "Non-Disclosure Non-Compete Agreement", + "/examples/doc_merge/pure_documents.json:13-14": "Supplier Contract Template", + "/examples/doc_merge/pure_documents.json:14-14": "Data Analyst Employment Agreement Outline", + "/examples/doc_merge/pure_documents.json:14-15": "Code of Employment", + "/examples/doc_merge/pure_documents.json:15-15": "Data Analysis Contract Snippet", + "/examples/doc_merge/pure_documents.json:15-16": "Researcher-University NDA", + "/examples/doc_merge/pure_documents.json:16-17": "Business-University Cooperation Agreement", + "/examples/doc_merge/pure_documents.json:17-17": "Research Collaboration Agreement", + "/examples/doc_merge/pure_documents.json:17-18": "University Lab Supply Agreement Sample", + "/examples/doc_merge/pure_documents.json:18-18": "Supplier-University Equipment Agreement", + "/examples/doc_merge/pure_documents.json:18-19": "Laboratory Supply Agreement Template", + "/examples/doc_merge/pure_documents.json:19-20": "Freelance Agreement Template", + "/examples/doc_merge/pure_documents.json:20-21": "Freelance Contract Terms", + "/examples/doc_merge/pure_documents.json:21-21": "Freelancer Legal Agreement Template", + "/examples/doc_merge/pure_documents.json:21-22": "Joint Research Agreement Outline", + "/examples/doc_merge/pure_documents.json:22-23": "Business Agreement Template", + "/examples/doc_merge/pure_documents.json:23-24": "Business Agreement Template: Confidentiality & Termination", + "/examples/doc_merge/pure_documents.json:24-24": "Non-Disclosure Agreement Clause", + "/examples/doc_merge/pure_documents.json:24-25": "Comprehensive Confidentiality and Loyalty Agreement", + "/examples/doc_merge/pure_documents.json:25-26": "Non-Compete and Loyalty Agreement: Terms and Consequences", + "/examples/doc_merge/pure_documents.json:26-26": "Non-Compete Loyalty Agreement", + "/examples/doc_merge/pure_documents.json:26-27": "Non-Compete Amendment: Duration Update", + "/examples/doc_merge/pure_documents.json:27-28": "Late Fee Adjustment Amendment", + "/examples/doc_merge/pure_documents.json:28-29": "Late Fee Amendment Contract", + "/examples/doc_merge/pure_documents.json:29-29": "Code Amendment for IT Maintenance", + "/examples/doc_merge/pure_documents.json:29-30": "Legal Amendment for Software Development", + "/examples/doc_merge/pure_documents.json:3-3": "Confidentiality Agreement Outline", + "/examples/doc_merge/pure_documents.json:3-4": "Non-Disclosure Pact: AquaBlue & PineTree", + "/examples/doc_merge/pure_documents.json:30-31": "Contract Extension: New Delivery Dates", + "/examples/doc_merge/pure_documents.json:31-31": "Delayed Contract Terms and Consequences", + "/examples/doc_merge/pure_documents.json:31-32": "Contract Appendices Extraction", + "/examples/doc_merge/pure_documents.json:32-33": "Confidentiality Addendum", + "/examples/doc_merge/pure_documents.json:33-33": "Amended NDA Time Restriction Extension", + "/examples/doc_merge/pure_documents.json:33-34": "Business Agreement Amendment: Conflict Resolution and Execution", + "/examples/doc_merge/pure_documents.json:34-35": "Business Cooperation Agreement with Confidentiality Extension", + "/examples/doc_merge/pure_documents.json:35-36": "Confidentiality Period Extension", + "/examples/doc_merge/pure_documents.json:36-37": "Legal Document Template: Commitment, Consequences, Governing Law", + "/examples/doc_merge/pure_documents.json:37-37": "Confidentiality Agreement Clause", + "/examples/doc_merge/pure_documents.json:37-38": "Confidentiality Agreement Outline", + "/examples/doc_merge/pure_documents.json:38-39": "Confidentiality Agreement for Tech Company and Contractor", + "/examples/doc_merge/pure_documents.json:39-40": "Confidentiality Terms and Remedies", + "/examples/doc_merge/pure_documents.json:4-5": "AquaBlue-PineTree NDA Terms", + "/examples/doc_merge/pure_documents.json:40-41": "Confidentiality Agreement Summary", + "/examples/doc_merge/pure_documents.json:41-42": "Legal Agreement Clause", + "/examples/doc_merge/pure_documents.json:42-43": "Termination and Obligations", + "/examples/doc_merge/pure_documents.json:43-44": "Termination Obligations: Party A & B", + "/examples/doc_merge/pure_documents.json:44-45": "Comprehensive NDA Clauses", + "/examples/doc_merge/pure_documents.json:45-46": "Confidentiality Agreement Template", + "/examples/doc_merge/pure_documents.json:46-46": "Service Agreement Template: Structure and Clauses", + "/examples/doc_merge/pure_documents.json:47-48": "Legal Document Collection", + "/examples/doc_merge/pure_documents.json:48-48": "Collaboration and Loyalty Agreement", + "/examples/doc_merge/pure_documents.json:48-49": "Business Consulting Contract Terms", + "/examples/doc_merge/pure_documents.json:49-49": "Consulting Contract Terms", + "/examples/doc_merge/pure_documents.json:49-50": "Confidentiality and Non-Compete Agreement", + "/examples/doc_merge/pure_documents.json:5-6": "NDA & Non-Compete Agreement: AquaBlue-PineTree", + "/examples/doc_merge/pure_documents.json:50-50": "Confidentiality Fee and Penalty Structure", + "/examples/doc_merge/pure_documents.json:50-51": "Confidentiality Breach Penalties Appendix", + "/examples/doc_merge/pure_documents.json:51-52": "Breach Notification and Termination Clause", + "/examples/doc_merge/pure_documents.json:6-6": "Non-Disclosure & Non-Compete Agreement", + "/examples/doc_merge/pure_documents.json:7-8": "Training Loyalty Agreement", + "/examples/doc_merge/pure_documents.json:8-10": "Loyalty Agreement Template", + "/examples/doc_merge/pure_documents.json:8-8": "Employment Agreement Terms and Conditions", + "/examples/keyword_counting/README.md": "Multi-Approach Country Frequency Computation", + "/examples/keyword_counting/README.md:1-26": "Country Frequency Analysis Algorithms", + "/examples/keyword_counting/README.md:27-45": "Code for Sample Processing and Plotting", + "/examples/keyword_counting/dataset_gen_countries.py": "Country-Based Language Model Dataset Generation", + "/examples/keyword_counting/dataset_gen_countries.py:1-43": "Country Indexing Function", + "/examples/keyword_counting/dataset_gen_countries.py:113-176": "Country Data Organization in Code", + "/examples/keyword_counting/dataset_gen_countries.py:177-243": "Alphabetical Country List", + "/examples/keyword_counting/dataset_gen_countries.py:244-306": "Country Adjective Dataset Generator", + "/examples/keyword_counting/dataset_gen_countries.py:307-370": "Country-Adjective Listing Code", + "/examples/keyword_counting/dataset_gen_countries.py:371-433": "Country Adjectives for Keyword Counting", + "/examples/keyword_counting/dataset_gen_countries.py:434-460": "Generate Coherent Text with Country Constraints", + "/examples/keyword_counting/dataset_gen_countries.py:44-112": "Country-Adjective Dataset for Keyword Counting", + "/examples/keyword_counting/dataset_gen_countries.py:460-462": "Country Name List in Data Handling", + "/examples/keyword_counting/dataset_gen_countries.py:462-462": "Trade and Cultural Exchanges: A Conversation", + "/examples/keyword_counting/dataset_gen_countries.py:462-471": "Country Sample Dataset Generator", + "/examples/keyword_counting/dataset_gen_countries.py:473-499": "Country Name Generator for Keyword Counting", + "/examples/keyword_counting/dataset_gen_countries.py:500-532": "Invalid Adjective Counter", + "/examples/keyword_counting/dataset_gen_countries.py:533-535": "Writing Results to CSV File", + "/examples/keyword_counting/plot.py": "Customizable JSON Boxplots & Bar Charts", + "/examples/keyword_counting/plot.py:1-29": "Collect JSON Data from Directory", + "/examples/keyword_counting/plot.py:123-158": "Plotting Graph with Y-Axis Limits and Annotations", + "/examples/keyword_counting/plot.py:159-167": "GPT-3.5 Plot Generator", + "/examples/keyword_counting/plot.py:30-58": "Sorting Results by Key", + "/examples/keyword_counting/plot.py:59-93": "Plot Keyword Counting Results", + "/examples/keyword_counting/plot.py:94-122": "Boxplot and Bar Chart of Keyword Counts with Customization", + "/examples/set_intersection/README.md": "Set Intersection Data Generator", + "/examples/set_intersection/README.md:1-29": "Set Intersection Approaches and Data Generator", + "/examples/set_intersection/README.md:30-46": "Budget-Controlled Sample Selection", + "/examples/set_intersection/README.md:48-52": "Modify Results Directory", + "/examples/set_intersection/dataset_gen_intersection.py": "Random Set Intersection Generator", + "/examples/set_intersection/dataset_gen_intersection.py:1-39": "Randomized Set Shuffle Dataset Generator", + "/examples/set_intersection/dataset_gen_intersection.py:40-67": "Random Set Intersection Generator", + "/examples/set_intersection/dataset_gen_intersection.py:69-92": "Scramble, Split, Intersect, and Save", + "/examples/set_intersection/plot.py": "AI Method Results Visualization and Analysis", + "/examples/set_intersection/plot.py:1-29": "Traverse and Aggregate JSON Files", + "/examples/set_intersection/plot.py:131-162": "Customizable Y-axis Bar Plot with Twin Axis and Conditional Annotations", + "/examples/set_intersection/plot.py:163-184": "Text Annotations and Count Increment in Plotting", + "/examples/set_intersection/plot.py:30-58": "AI Results Organizer and Analyzer", + "/examples/set_intersection/plot.py:59-94": "Method-based Score Plotter", + "/examples/set_intersection/plot.py:95-130": "Customizable Boxplot for Method Results", + "/examples/set_intersection/utils.py": "Error-Counting Set Intersection Utilities", + "/examples/set_intersection/utils.py:1-36": "String to List and Set Functions", + "/examples/set_intersection/utils.py:37-72": "Set Intersection Utilities", + "/examples/set_intersection/utils.py:73-99": "Set Intersection Error Counter", + "/examples/sorting/README.md": "Sorting Algorithm Examples and Visualization", + "/examples/sorting/README.md:1-31": "Sorting Algorithms Examples and Implementations", + "/examples/sorting/README.md:32-46": "Organized Results by LLM and Run Details", + "/examples/sorting/plot.py": "Sorting Algorithm Performance Visualizer", + "/examples/sorting/plot.py:1-29": "JSON Data Collector and Organizer", + "/examples/sorting/plot.py:132-163": "Customizing ax2 Properties and Annotations", + "/examples/sorting/plot.py:164-186": "Sorting Algorithm Performance Plotter", + "/examples/sorting/plot.py:30-58": "Sorting Results to Calculate Scores", + "/examples/sorting/plot.py:59-95": "Plotting Data for Sorting Algorithms", + "/examples/sorting/plot.py:96-131": "Visualizing Scores with Boxplot in Python", + "/examples/sorting/utils.py": "String-to-Int List Converter and Sorter", + "/examples/sorting/utils.py:1-35": "String List Conversion Function", + "/examples/sorting/utils.py:36-70": "Sorting Errors Counter", + "/examples/sorting/utils.py:71-78": "Error Count in Sorted Lists", + "/graph_of_thoughts/controller/README.md": "Controller Class for Executing LLM Graphs", + "/graph_of_thoughts/controller/README.md:1-16": "Controller Class for GoO Execution", + "/graph_of_thoughts/controller/README.md:18-28": "Controller Class Initialization", + "/graph_of_thoughts/controller/__init__.py": "Module Initialization - Controller Import", + "/graph_of_thoughts/controller/controller.py": "JSON Graph Execution Flow Controller", + "/graph_of_thoughts/controller/controller.py:1-35": "Controller Class for Graph of Operations", + "/graph_of_thoughts/controller/controller.py:107-128": "Graph Operations Serializer", + "/graph_of_thoughts/controller/controller.py:129-152": "JSON-ifying Thoughts: Controller Analysis", + "/graph_of_thoughts/controller/controller.py:37-60": "Controller Initialization and Execution", + "/graph_of_thoughts/controller/controller.py:61-82": "Graph-Based State Validation and Execution", + "/graph_of_thoughts/controller/controller.py:83-106": "Graph Operations Executor and Serializer", + "/graph_of_thoughts/language_models/README.md": "Language Models: Instantiate, Add, and Query", + "/graph_of_thoughts/language_models/README.md:1-18": "Language Models Module Introduction", + "/graph_of_thoughts/language_models/README.md:19-21": "OpenAI Model Cost Calculator", + "/graph_of_thoughts/language_models/README.md:21-24": "OpenAI Pricing and Model Configurations", + "/graph_of_thoughts/language_models/README.md:24-26": "OpenAI Chat Creation Parameters", + "/graph_of_thoughts/language_models/README.md:27-42": "Initializing Language Model Controller", + "/graph_of_thoughts/language_models/README.md:42-47": "Defining Language Model Parameters", + "/graph_of_thoughts/language_models/README.md:47-59": "Llama-2 Model Setup Guide", + "/graph_of_thoughts/language_models/README.md:59-76": "Adding a New LLM to Existing Model", + "/graph_of_thoughts/language_models/README.md:77-95": "Abstract Language Model Base Class", + "/graph_of_thoughts/language_models/__init__.py": "Imports Necessary Language Model Classes", + "/graph_of_thoughts/language_models/abstract_language_model.py": "Abstract Language Model Class and Configuration", + "/graph_of_thoughts/language_models/abstract_language_model.py:1-34": "Abstract Language Model Base Class Initialization", + "/graph_of_thoughts/language_models/abstract_language_model.py:35-66": "Abstract Language Model Initialization", + "/graph_of_thoughts/language_models/abstract_language_model.py:68-92": "Abstract Language Model: Query and Get Methods", + "/graph_of_thoughts/language_models/chatgpt.py": "ChatGPT Class for OpenAI Chat API", + "/graph_of_thoughts/language_models/chatgpt.py:1-35": "Initializing ChatGPT Language Model", + "/graph_of_thoughts/language_models/chatgpt.py:120-146": "ChatGPT Response Logger and Cost Tracker", + "/graph_of_thoughts/language_models/chatgpt.py:147-157": "Converting Chats to Strings", + "/graph_of_thoughts/language_models/chatgpt.py:35-49": "Model Initialization and Configuration", + "/graph_of_thoughts/language_models/chatgpt.py:50-69": "Initialize ChatGPT Model and Query Method", + "/graph_of_thoughts/language_models/chatgpt.py:70-94": "ChatGPT Cache Function", + "/graph_of_thoughts/language_models/chatgpt.py:95-119": "OpenAI Chat Class with Error Backoff and Caching", + "/graph_of_thoughts/language_models/config_template.json": "User-Specific Language Model Config Template", + "/graph_of_thoughts/language_models/config_template.json:1-41": "Language Model Configuration Template", + "/graph_of_thoughts/language_models/config_template.json:42-49": "Optimal Language Model Config", + "/graph_of_thoughts/language_models/llamachat_hf.py": "LLaMA Chat Model Implementation", + "/graph_of_thoughts/language_models/llamachat_hf.py:1-31": "LLaMA 2 HF Interface", + "/graph_of_thoughts/language_models/llamachat_hf.py:108-119": "Extracting Generated Texts", + "/graph_of_thoughts/language_models/llamachat_hf.py:32-53": "Initializing Language Model Attributes", + "/graph_of_thoughts/language_models/llamachat_hf.py:54-82": "LLaMA Text Generation Pipeline", + "/graph_of_thoughts/language_models/llamachat_hf.py:83-107": "LLaMA Chat Cache Responses", + "/graph_of_thoughts/operations/README.md": "Thought Manipulation Operations", + "/graph_of_thoughts/operations/README.md:1-14": "Operations in Graph of Thoughts", + "/graph_of_thoughts/operations/README.md:16-40": "Graph-Based Thought Scoring", + "/graph_of_thoughts/operations/README.md:41-51": "Thought Processing System Operations", + "/graph_of_thoughts/operations/README.md:52-62": "Thought Operations and Parameters", + "/graph_of_thoughts/operations/README.md:64-70": "Thought Operations: KeepValid, Selector, GroundTruth", + "/graph_of_thoughts/operations/__init__.py": "Graph of Thoughts Operations", + "/graph_of_thoughts/operations/graph_of_operations.py": "Operation Plan Executor", + "/graph_of_thoughts/operations/graph_of_operations.py:1-32": "Graph of Operations Class", + "/graph_of_thoughts/operations/graph_of_operations.py:34-64": "Operations Appending in Graphs", + "/graph_of_thoughts/operations/graph_of_operations.py:65-69": "Trimming Predecessors and Leaves", + "/graph_of_thoughts/operations/operations.py": "Abstract Base Class for Graph of Thoughts Operations", + "/graph_of_thoughts/operations/operations.py:1-40": "Abstract Base Class for Graph of Thoughts Operations", + "/graph_of_thoughts/operations/operations.py:107-130": "Abstract Execution Class in Operations", + "/graph_of_thoughts/operations/operations.py:132-168": "Score Class in Graph of Thoughts Operations", + "/graph_of_thoughts/operations/operations.py:169-192": "Score Operation Class", + "/graph_of_thoughts/operations/operations.py:193-218": "Scoring Predecessor Thoughts Method", + "/graph_of_thoughts/operations/operations.py:220-239": "Scoring Thoughts with Functions or Prompts", + "/graph_of_thoughts/operations/operations.py:240-263": "Scoring Thoughts with LM or User Input", + "/graph_of_thoughts/operations/operations.py:264-293": "Validate and Improve Thoughts Operation", + "/graph_of_thoughts/operations/operations.py:294-319": "Validate and Improve Class Definition", + "/graph_of_thoughts/operations/operations.py:320-344": "Iterating Through Previous Thoughts", + "/graph_of_thoughts/operations/operations.py:345-366": "Validate Thoughts via Language Model", + "/graph_of_thoughts/operations/operations.py:367-399": "Generative Thought Validation Algorithm", + "/graph_of_thoughts/operations/operations.py:400-427": "Thoughts Generator Class", + "/graph_of_thoughts/operations/operations.py:41-71": "Operation Initialization and Execution", + "/graph_of_thoughts/operations/operations.py:428-451": "Generate Thoughts with Language Model", + "/graph_of_thoughts/operations/operations.py:452-476": "Generating Responses and Parsing Thoughts", + "/graph_of_thoughts/operations/operations.py:477-513": "Enhancing Thoughts with Improve Class", + "/graph_of_thoughts/operations/operations.py:514-537": "Improve and Aggregate Operations", + "/graph_of_thoughts/operations/operations.py:538-568": "Aggregate Operation Class", + "/graph_of_thoughts/operations/operations.py:570-594": "Operation Predecessor Check and Prompt Construction", + "/graph_of_thoughts/operations/operations.py:596-627": "Keep Best N Thoughts Operation", + "/graph_of_thoughts/operations/operations.py:628-655": "KeepBestN: Initialize, Check N Thoughts", + "/graph_of_thoughts/operations/operations.py:656-683": "Keep Best N Thoughts", + "/graph_of_thoughts/operations/operations.py:685-708": "KeepBestN Operation Definition", + "/graph_of_thoughts/operations/operations.py:709-746": "KeepValid: Preserving Valid Thoughts", + "/graph_of_thoughts/operations/operations.py:72-105": "Operation Execution and Relationships", + "/graph_of_thoughts/operations/operations.py:747-778": "Operations: KeepValid and GroundTruth Classes", + "/graph_of_thoughts/operations/operations.py:779-807": "GroundTruth Operation for Thoughts Evaluation", + "/graph_of_thoughts/operations/operations.py:809-833": "GroundTruth Operation Evaluation", + "/graph_of_thoughts/operations/operations.py:834-864": "Graph of Thoughts Selector Operation", + "/graph_of_thoughts/operations/operations.py:865-890": "Selector Operation: Thought Selection from Predecessors", + "/graph_of_thoughts/operations/operations.py:891-900": "Thoughts Selector Logging", + "/graph_of_thoughts/operations/thought.py": "Thought Class Operations", + "/graph_of_thoughts/operations/thought.py:1-35": "Thought Class Definition", + "/graph_of_thoughts/operations/thought.py:113-117": "Solve Thought Method Definition", + "/graph_of_thoughts/operations/thought.py:36-68": "Thought Class Definition", + "/graph_of_thoughts/operations/thought.py:69-111": "Thought Class Definition and Operations", + "/graph_of_thoughts/parser/__init__.py": "Efficient Parser Imports", + "/graph_of_thoughts/parser/parser.py": "Abstract Parser for Thought States and Text Inputs", + "/graph_of_thoughts/parser/parser.py:1-31": "Language Model Response Parser", + "/graph_of_thoughts/parser/parser.py:32-59": "Language Model Parser Methods", + "/graph_of_thoughts/parser/parser.py:60-89": "Thought Parser Methods", + "/graph_of_thoughts/parser/parser.py:90-90": "Understanding 'pass': A Placeholder in Code", + "/graph_of_thoughts/prompter/__init__.py": "Importing Prompter Class", + "/graph_of_thoughts/prompter/prompter.py": "Generating Prompts with Prompter Class", + "/graph_of_thoughts/prompter/prompter.py:1-36": "Abstract Base Class for Language Model Prompt Generation", + "/graph_of_thoughts/prompter/prompter.py:37-65": "Abstract Base Class for Prompt and Validation Prompt Generation", + "/graph_of_thoughts/prompter/prompter.py:66-86": "Abstract Class for Prompt Generation", + "/paper/README.md": "Access and Visualize ArXiv Data", + "/paper/plots.py": "Python Scripts for Data Visualization", + "/paper/plots.py:1-29": "Python JSON Data Organizer", + "/paper/plots.py:124-153": "Task-Based Score and Cost Filtering", + "/paper/plots.py:154-184": "Boxplot Creation for Sorted Scores", + "/paper/plots.py:185-212": "Plot Customization for Multi-Functionality", + "/paper/plots.py:213-246": "Creating Y-Label, Saving Plots with Method and Model Names", + "/paper/plots.py:247-289": "Customizable Model Data Visualization with 'plot_results'", + "/paper/plots.py:290-330": "Task-Specific Data Plots in Python", + "/paper/plots.py:30-58": "Results Sorting and Scoring Algorithm", + "/paper/plots.py:331-337": "Plot Parameters and Display Options", + "/paper/plots.py:59-87": "Final Scores Calculation", + "/paper/plots.py:88-123": "Plotting Functions for Scores and Results", + "/pyproject.toml": "Python Package Setup with Hatchling", + "/pyproject.toml:1-37": "Python Package Settings with Hatchling", + "/pyproject.toml:39-39": "TOML Project Script Entry Point" +} \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index 2254d8b..41dcc9f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -114,6 +114,52 @@ /* html, body{ max-width: 980px; } */ + /* Add custom styles for the drawer button */ + #drawerButton { + position: absolute; + top: 10px; + right: 10px; + z-index: 1000; + } + + .grayed-out-button { + color: #ccc; + /* Set the text color to gray */ + background-color: #f4f4f4; + /* Set a light gray background color */ + border: 1px solid #ccc; + /* Add a gray border */ + cursor: not-allowed; + /* Change the cursor to indicate the button is disabled */ + pointer-events: none; + /* Disable pointer events to prevent interaction */ + } + + .grayed-out-button i { + color: #ccc; + /* Set the icon color to gray */ + } + + + #drawer { + /* to visualize the alignment of buttons and text */ + background-color: white; + margin: auto; + /* margin: 1.5%; */ + /* margin-right: 10%; */ + text-wrap: wrap; + /* margin-bottom: 10px; */ + } + + #drawer_mask { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background-color: rgba(0, 0, 0, 0.3); + /* background-color: rgba(0, 0, 0, 0.1); */ + } #progress-overlay { position: fixed; @@ -153,6 +199,12 @@ margin-bottom: 10px; } + /* + h3, + h4 { + line-height: 0.5; + } */ + p { line-height: 1.5; } @@ -163,6 +215,7 @@ /* Styles for mobile devices */ @media (max-width: 767px) { + #progress-bar { width: 60%; @@ -189,8 +242,8 @@ .right-half { /*padding-left: 10px;*/ padding-right: 10px; - overflow-y:auto; - overflow-x:auto; + overflow-y: auto; + overflow-x: auto; } .left-half { @@ -207,6 +260,7 @@ } #searchInput { + z-index: 10; text-indent: 15px; } @@ -219,11 +273,11 @@ @media (min-width: 768px) { #progress-bar { - width: 50%; } #searchInput { + z-index: 10; text-indent: 20px; } @@ -248,7 +302,7 @@ border: 1px solid #ccc; /* justify-content: space-between; */ /* flex-direction: row; */ - height:min-content; + height: min-content; /* align-items: flex-start; */ } @@ -266,8 +320,8 @@ /* height:100%; */ /* align-self: stretch; */ - overflow-x:auto; - overflow-y:scroll; + overflow-x: auto; + overflow-y: scroll; } .right-half { @@ -303,8 +357,8 @@ /* useless now */ pre { - overflow-x:visible!important; - overflow-y:visible!important; + overflow-x: visible !important; + overflow-y: visible !important; /* overflow-x: auto; */ white-space: pre-wrap; white-space: -moz-pre-wrap; @@ -313,6 +367,10 @@ word-wrap: break-word; } + button { + cursor: pointer; + } + .codelink:hover { cursor: pointer; text-decoration: underline; @@ -375,10 +433,37 @@ border: 1px solid #ccc; } - ul { + .input-group { + position: relative; + display: flex; + /* flex-wrap: wrap; */ + /* align-items: stretch; */ + width: 100%; + justify-content: space-between; + align-items: center; + } + + .input-group-button:hover { + cursor: pointer; + } + + #sidebar-button:hover { + cursor: pointer; + } + + .input-group-button { + border: 1px solid #ccc; + /* padding:auto; */ + /* flex:1; */ + z-index: 9; + padding: 10px; + margin: auto; + } + + ul.search-results { flex: 1; /* Fill the remaining space */ - overflow-y: auto; + overflow-y: scroll; /* Enable vertical scrolling */ list-style: none; padding: 0; @@ -386,452 +471,740 @@ margin-top: 0; } - ul li { + ul.search-results li { background-color: #f2f2f2; margin-bottom: 10px; /*border-radius: 10px;*/ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1); } + + #drawer_content>div>span:hover { + cursor: pointer; + text-decoration: underline; + } -
              - -
              -
              -
              + + + + +
              +
              +
              +
              +
              + + +
              +

              +
              + Document index of: +
              +
              + + +
              +

              +
              + +
              - - -
              -

              Document Index of: - -
              - - -
              -

              - - -
              -
                - -
                + + //document.addEventListener('DOMContentLoaded', registerSearchEventListener); + // waitForDOMContentLoaded(); + registerSearchEventListener() + // console.log("event listener registered") + } + async_main() + + diff --git a/docs/metadata.json b/docs/metadata.json index 237aaa5..1fdb531 100644 --- a/docs/metadata.json +++ b/docs/metadata.json @@ -7,187 +7,187 @@ "0": { "filepath": "/README.md", "entry_id": 0, - "language_id": "plain-text" + "language_id": "markdown" }, "1": { - "filepath": "/pyproject.toml", + "filepath": "/examples/README.md", "entry_id": 14, - "language_id": "toml" + "language_id": "markdown" }, "2": { - "filepath": "/examples/README.md", - "entry_id": 20, - "language_id": "plain-text" - }, - "3": { "filepath": "/examples/doc_merge/README.md", - "entry_id": 24, - "language_id": "plain-text" + "entry_id": 18, + "language_id": "markdown" }, - "4": { + "3": { "filepath": "/examples/doc_merge/doc_merge.py", - "entry_id": 30, + "entry_id": 24, "language_id": "python" }, - "5": { + "4": { "filepath": "/examples/doc_merge/plot.py", - "entry_id": 86, + "entry_id": 80, "language_id": "python" }, - "6": { + "5": { "filepath": "/examples/doc_merge/pure_documents.json", - "entry_id": 100, + "entry_id": 94, "language_id": "json" }, - "7": { + "6": { "filepath": "/examples/keyword_counting/README.md", - "entry_id": 244, + "entry_id": 238, "language_id": "markdown" }, - "8": { + "7": { "filepath": "/examples/keyword_counting/dataset_gen_countries.py", - "entry_id": 250, + "entry_id": 244, "language_id": "python" }, - "9": { + "8": { "filepath": "/examples/keyword_counting/plot.py", - "entry_id": 280, + "entry_id": 274, "language_id": "python" }, - "10": { + "9": { "filepath": "/examples/set_intersection/README.md", - "entry_id": 294, - "language_id": "markdown" + "entry_id": 288, + "language_id": "plain-text" }, - "11": { + "10": { "filepath": "/examples/set_intersection/dataset_gen_intersection.py", - "entry_id": 302, + "entry_id": 296, "language_id": "python" }, - "12": { + "11": { "filepath": "/examples/set_intersection/plot.py", - "entry_id": 310, + "entry_id": 304, "language_id": "python" }, - "13": { + "12": { "filepath": "/examples/set_intersection/utils.py", - "entry_id": 324, + "entry_id": 318, "language_id": "python" }, - "14": { + "13": { "filepath": "/examples/sorting/README.md", - "entry_id": 332, - "language_id": "plain-text" + "entry_id": 326, + "language_id": "markdown" }, - "15": { + "14": { "filepath": "/examples/sorting/plot.py", - "entry_id": 338, + "entry_id": 332, "language_id": "python" }, - "16": { + "15": { "filepath": "/examples/sorting/utils.py", - "entry_id": 352, + "entry_id": 346, "language_id": "python" }, - "17": { + "16": { "filepath": "/graph_of_thoughts/controller/README.md", - "entry_id": 360, - "language_id": "plain-text" + "entry_id": 354, + "language_id": "markdown" }, - "18": { + "17": { "filepath": "/graph_of_thoughts/controller/__init__.py", - "entry_id": 366, + "entry_id": 360, "language_id": "python" }, - "19": { + "18": { "filepath": "/graph_of_thoughts/controller/controller.py", - "entry_id": 370, + "entry_id": 364, "language_id": "python" }, - "20": { + "19": { "filepath": "/graph_of_thoughts/language_models/README.md", - "entry_id": 384, - "language_id": "plain-text" + "entry_id": 378, + "language_id": "markdown" }, - "21": { + "20": { "filepath": "/graph_of_thoughts/language_models/__init__.py", - "entry_id": 404, + "entry_id": 398, "language_id": "python" }, - "22": { + "21": { "filepath": "/graph_of_thoughts/language_models/abstract_language_model.py", - "entry_id": 408, + "entry_id": 402, "language_id": "python" }, - "23": { + "22": { "filepath": "/graph_of_thoughts/language_models/chatgpt.py", - "entry_id": 416, + "entry_id": 410, "language_id": "python" }, - "24": { + "23": { "filepath": "/graph_of_thoughts/language_models/config_template.json", - "entry_id": 432, + "entry_id": 426, "language_id": "json" }, - "25": { + "24": { "filepath": "/graph_of_thoughts/language_models/llamachat_hf.py", - "entry_id": 438, + "entry_id": 432, "language_id": "python" }, - "26": { + "25": { "filepath": "/graph_of_thoughts/operations/README.md", - "entry_id": 450, - "language_id": "plain-text" + "entry_id": 444, + "language_id": "markdown" }, - "27": { + "26": { "filepath": "/graph_of_thoughts/operations/__init__.py", - "entry_id": 462, + "entry_id": 456, "language_id": "python" }, - "28": { + "27": { "filepath": "/graph_of_thoughts/operations/graph_of_operations.py", - "entry_id": 466, + "entry_id": 460, "language_id": "python" }, - "29": { + "28": { "filepath": "/graph_of_thoughts/operations/operations.py", - "entry_id": 474, + "entry_id": 468, "language_id": "python" }, - "30": { + "29": { "filepath": "/graph_of_thoughts/operations/thought.py", - "entry_id": 540, + "entry_id": 534, "language_id": "python" }, - "31": { + "30": { "filepath": "/graph_of_thoughts/parser/__init__.py", - "entry_id": 550, + "entry_id": 544, "language_id": "python" }, - "32": { + "31": { "filepath": "/graph_of_thoughts/parser/parser.py", - "entry_id": 554, + "entry_id": 548, "language_id": "python" }, - "33": { + "32": { "filepath": "/graph_of_thoughts/prompter/__init__.py", - "entry_id": 564, + "entry_id": 558, "language_id": "python" }, - "34": { + "33": { "filepath": "/graph_of_thoughts/prompter/prompter.py", - "entry_id": 568, + "entry_id": 562, "language_id": "python" }, - "35": { + "34": { "filepath": "/paper/README.md", - "entry_id": 576, + "entry_id": 570, "language_id": "markdown" }, - "36": { + "35": { "filepath": "/paper/plots.py", - "entry_id": 580, + "entry_id": 574, "language_id": "python" + }, + "36": { + "filepath": "/pyproject.toml", + "entry_id": 598, + "language_id": "toml" } }, "project_name": "graph-of-thoughts", diff --git a/docs/metadata_title.json b/docs/metadata_title.json new file mode 100644 index 0000000..0103dd2 --- /dev/null +++ b/docs/metadata_title.json @@ -0,0 +1 @@ +{"split_count": 1} \ No newline at end of file diff --git a/docs/tree.html b/docs/tree.html index bb2f112..885f4ce 100644 --- a/docs/tree.html +++ b/docs/tree.html @@ -66,87 +66,90 @@ ul { list-style: none; } + #feeling-lucky:hover{ + cursor: pointer; + }
                -

                Project Structure of: spcl/graph-of-thoughts

                +

                Project structure of: spcl/graph-of-thoughts

                • graph-of-thoughts Data analysis, visualization, and language modeling tools.
                    -
                  • README.md GoT framework: Python language model for sorting and JSON graphs
                  • examples Interactive examples of data analysis and visualization.
                      -
                    • README.md Runnable examples for Graph of Thoughts package with standalone scripts and prompt files.
                    • doc_merge Efficient NDA merging with language models and redundancy handling.
                        -
                      • README.md Document merging methods comparison with debug logs.
                      • -
                      • doc_merge.py Efficient NDA merging with language model and redundancy handling.
                      • -
                      • plot.py Plot script, imports, sorts, plots boxplots and bars.
                      • -
                      • pure_documents.json Company-supplier agreement discussions, covering various aspects.
                      • +
                      • doc_merge.py Efficient NDA merging with language model and redundancy handling.
                      • +
                      • plot.py Plot script, imports, sorts, plots boxplots and bars.
                      • +
                      • pure_documents.json Company-supplier agreement discussions, covering various aspects.
                      • +
                      • README.md Document merging methods comparison with debug logs.
                    • keyword_counting Count country occurrences in text, generate dataset, and plot results
                        -
                      • README.md Count frequency methods for countries in text
                      • -
                      • dataset_gen_countries.py Generate dataset for country occurrences using language model
                      • -
                      • plot.py Python script for plotting boxplots, bar charts from JSON data.
                      • +
                      • dataset_gen_countries.py Generate dataset for country occurrences using language model
                      • +
                      • plot.py Python script for plotting boxplots, bar charts from JSON data.
                      • +
                      • README.md Count frequency methods for countries in text
                    • +
                    • README.md Runnable examples for Graph of Thoughts package with standalone scripts and prompt files.
                    • set_intersection Set intersection data tools and visualizations
                    • sorting Sorting algorithms examples, Python, plotting, utilities.
                        -
                      • README.md Sorting algorithm examples in Python.
                      • -
                      • plot.py Sorts, plots, customizes boxplots for sorting algorithms.
                      • -
                      • utils.py Sorting utility functions, list conversion and testing.
                      • +
                      • plot.py Sorts, plots, customizes boxplots for sorting algorithms.
                      • +
                      • README.md Sorting algorithm examples in Python.
                      • +
                      • utils.py Sorting utility functions, list conversion and testing.
                  • graph_of_thoughts Graph-of-thoughts engine with language models and operations.
                    • controller Language model graph processing controller.
                        -
                      • README.md Controller class manages graph execution with LLM, custom prompter, parser.
                      • -
                      • __init__.py Imports Controller class from package's controller module.
                      • -
                      • controller.py Graph processing controller for language models.
                      • +
                      • __init__.py Imports Controller class from package's controller module.
                      • +
                      • controller.py Graph processing controller for language models.
                      • +
                      • README.md Controller class manages graph execution with LLM, custom prompter, parser.
                    • language_models Language model library with GPT-4, GPT-3.5, and Llama-2 support
                    • operations Manage graph-of-thought operations with language models and helper classes.
                        -
                      • README.md Manage thought operations with language models and helper classes.
                      • -
                      • __init__.py Imports classes for operations in graph-of-thoughts.
                      • -
                      • graph_of_operations.py Graph of Operations: Manages operation execution plans
                      • -
                      • operations.py Graph of Thoughts operations preservation.
                      • -
                      • thought.py Thought class: LLM operation, state, score.
                      • +
                      • __init__.py Imports classes for operations in graph-of-thoughts.
                      • +
                      • graph_of_operations.py Graph of Operations: Manages operation execution plans
                      • +
                      • operations.py Graph of Thoughts operations preservation.
                      • +
                      • README.md Manage thought operations with language models and helper classes.
                      • +
                      • thought.py Thought class: LLM operation, state, score.
                    • parser Python language model response parsing abstract class library.
                        -
                      • __init__.py Import Parser class from "parser" module for easier usage.
                      • -
                      • parser.py Abstract class with 3 parsing methods for language model responses using thought states and texts.
                      • +
                      • __init__.py Import Parser class from "parser" module for easier usage.
                      • +
                      • parser.py Abstract class with 3 parsing methods for language model responses using thought states and texts.
                    • prompter Generate prompt models for language.
                  • paper ArXiv preprint data visualization scripts and plots.
                      -
                    • README.md Unpack, execute plots.py for arXiv preprint data visualization
                    • -
                    • plots.py Generate customized boxplots with Python and JSON data
                    • +
                    • plots.py Generate customized boxplots with Python and JSON data
                    • +
                    • README.md Unpack, execute plots.py for arXiv preprint data visualization
                  • -
                  • pyproject.toml Python package settings with Hatchling and entry point.
                  • +
                  • pyproject.toml Python package settings with Hatchling and entry point.
                  • +
                  • README.md GoT framework: Python language model for sorting and JSON graphs
                @@ -166,14 +169,27 @@

                Project Structure of: spc const queryParams = getQueryParams(window.location.search); const show_full = queryParams.full == "true"; + //const is_random = queryParams.random == 'true'; + function feelingLucky() { + var elements = document.getElementsByClassName("file_link"); + var randomIndex = Math.floor(Math.random() * elements.length); + window.location = elements[randomIndex].href; + } + //if (is_random) {feelingLucky();} + document.getElementById("feeling-lucky").onclick = feelingLucky; if (!show_full) { const spans = document.querySelectorAll('span'); for (let span of spans) { if (span.getAttribute("hierarchy") == '0') { continue } - toggleVisibility(span); } + } else { + const expand_elem = document.getElementById('expand-tree'); + expand_elem.setAttribute("href", "tree.html"); + expand_elem.setAttribute("title", "Undo expand"); + expand_elem.innerHTML = '' } + From b51ec56cdac5bb4d2358c899c175d96f0ba8c37a Mon Sep 17 00:00:00 2001 From: GitJournal Date: Sat, 13 Jan 2024 20:46:27 +0800 Subject: [PATCH 06/25] update --- docs/codeview.html | 17 ++++++++++++++++- docs/index.html | 32 +++++++++++++++++++++++++------- docs/metadata.json | 12 ++++++------ docs/tree.html | 1 + 4 files changed, 48 insertions(+), 14 deletions(-) diff --git a/docs/codeview.html b/docs/codeview.html index d0da4af..9785df3 100644 --- a/docs/codeview.html +++ b/docs/codeview.html @@ -9,6 +9,7 @@ href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='currentColor' class='bi bi-code-square' viewBox='0 0 16 16'%3E%3Cpath d='M14 1a1 1 0 0 1 1 1v12a1 1 0 0 1-1 1H2a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1zM2 0a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2z'/%3E%3Cpath d='M6.854 4.646a.5.5 0 0 1 0 .708L4.207 8l2.647 2.646a.5.5 0 0 1-.708.708l-3-3a.5.5 0 0 1 0-.708l3-3a.5.5 0 0 1 .708 0zm2.292 0a.5.5 0 0 0 0 .708L11.793 8l-2.647 2.646a.5.5 0 0 0 .708.708l3-3a.5.5 0 0 0 0-.708l-3-3a.5.5 0 0 0-.708 0z'/%3E%3C/svg%3E" type="image/svg+xml"> Code View + @@ -405,6 +406,13 @@ const language = queryParams.language; const code_path = queryParams.file; const project_name = queryParams.project; + const keywords = queryParams.keywords; + var keywordList + try { + keywordList = JSON.parse(keywords); + + } catch (e) { } + document.title = `Code view of: ${code_path} - Project: ${project_name}`; const h1_element = document.getElementById('code-path'); // h1_element.textContent = code_path.slice('src/'.length); // debugger; @@ -502,10 +510,17 @@ } else { // Prism.highlightAll(); // Prism.highlightElement(pre_elem); - // setTimeout(() => {pre_elem.classList.add("wrap_pre");}, 1000); // pre_elem.style.whiteSpace="pre-wrap !important"; } + + if (keywordList != undefined) { + setInterval(() => { + const markInstance = new Mark(document.getElementById('code-div')); + markInstance.unmark(); // Clear previous marks + markInstance.mark(keywordList); + }, 1000) + } // applyHash(); // Prism.highlightElement(pre_elem, () => {applyHash()}); // Prism.highlightElement(pre_elem).then(applyHash); diff --git a/docs/index.html b/docs/index.html index 41dcc9f..48b7d04 100644 --- a/docs/index.html +++ b/docs/index.html @@ -619,7 +619,8 @@

                // Position the target element at the same top and left coordinates as the reference element drawer_title.style.position = "absolute"; - drawer_title.style.top = topPosition + "px"; + drawer_title.style.top = topPosition * 1 + "px"; + // drawer_title.style.top = topPosition*1.5 + "px"; if (pageWidth > 768) { drawer_title.style.left = pageWidth * 0.02 + "px"; // drawer_title.style.left = pageWidth * 0.03 + "px"; @@ -673,13 +674,14 @@

                const progressOverlay = document.getElementById('progress-overlay'); const progressBar = document.querySelector('.progress'); var isDebugMode = false; - function navigateToPage(base_filepath, language_id, project_id, detail_filepath = "") { + function navigateToPage(base_filepath, language_id, project_id, keywords, detail_filepath = "") { // Use a relative path to navigate to a specific page let page_param = "codeview.html"; let file_param = 'src' + base_filepath; file_param = encodeURIComponent(file_param); let language_param = language_id - let jump_link = `${page_param}?file=${file_param}&language=${language_id}&project=${project_id}`; + let keywords_encoded = encodeURIComponent(JSON.stringify(keywords)); + let jump_link = `${page_param}?file=${file_param}&language=${language_id}&project=${project_id}&keywords=${keywords_encoded}`; if (detail_filepath !== "") { let location_range = detail_filepath.slice(base_filepath.length + 1); let location_param = `mycode.${location_range}`; @@ -715,6 +717,8 @@

                const github_url = metadata.url.full; const project_id = metadata.project_name; + const myDefaultTitle = `Document index of: ${project_id}` + document.title = myDefaultTitle; const github_partial_url = metadata.url.partial; const file_mapping = metadata.file_mapping const split_count = metadata.split_count @@ -815,11 +819,16 @@

                return inputString.split(searchValue).join(replaceValue); } function getSubTerms(it) { - var m_str = it + it = it.trim(); + var ret = [it] for (const sym of englishSymbols) { + var m_str = it m_str = replaceAll(m_str, sym, " "); + ret.concat(m_str.split(" ")) } - return m_str.split(" "); + ret = ret.concat(it.split(" ")) + ret = ret.concat(m_str.split(" ")); + return ret } const searchInputElem = document.getElementById('searchInput'); @@ -876,6 +885,7 @@

                // console.log('search term file index:', searchTermFileIndex) if (searchTermFileIndex != -1) { isFileSearch = true; + document.title = `File: ${searchTerm} - ${myDefaultTitle}` // do something with the buttons // file_previous file_next if (searchTermFileIndex != 0) { @@ -954,6 +964,7 @@

                }); }); var isDesktopScreen = window.innerWidth > 768; + var title_topics = [] for (var file_id of searchResultItemIds) { const it = searchResultItems[file_id]; const item = document.createElement('li'); @@ -966,6 +977,7 @@

                const file_title_elem_id = generateUUID() file_title_elem.setAttribute('id', file_title_elem_id) const mylocation = it.header.filepath.slice(1) + title_topics.push(title_data["/" + mylocation]) overview_file_title_elem.innerHTML = `${title_data["/" + mylocation]}`; overview_file_title_elem.onclick = () => { @@ -982,7 +994,8 @@

                file_title_elem.appendChild(file_title_span); // location_p.innerHTML = (mylocation); - file_title_span.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)})`) + file_title_span.onclick = () => { navigateToPage(it.header.filepath, it.language_id, project_id, searchHighlightTerms) } + // file_title_span.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)},${JSON.stringify(project_id)})`) // location_p.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)})`) // search_header.appendChild(location_p); search_header.appendChild(file_title_elem); @@ -1038,7 +1051,8 @@

                // code_location_p.className = "monospace-text codelink"; // code_location_p.innerHTML = (pair_location_text); code_title_span.setAttribute("title", pair_location_text) - code_title_span.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)}, ${JSON.stringify(pair.location)})`) + code_title_span.onclick = () => { navigateToPage(it.header.filepath, it.language_id, project_id, searchHighlightTerms, pair.location) } + // code_title_span.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)}, ${JSON.stringify(pair.location)})`) // code_location_p.setAttribute('onclick', `navigateToPage(${JSON.stringify(it.header.filepath)}, ${JSON.stringify(it.language_id)}, ${JSON.stringify(project_id)}, ${JSON.stringify(pair.location)})`) // pair_left.appendChild(code_location_p); if (pair_title_text != undefined) { @@ -1081,6 +1095,8 @@

                } Prism.highlightAllUnder(searchResults); + const title_topics_joined = title_topics.join(" | ") + document.title = `${document.title} - Topics: ${title_topics_joined}` // let's try understand that. const markInstance = new Mark(document.getElementById('searchResults')); markInstance.unmark(); // Clear previous marks @@ -1133,9 +1149,11 @@

                if (inputBoxText.trim() !== "") { // Construct the new URL with the updated query string newURL = `${window.location.protocol}//${window.location.host}${window.location.pathname}?q=${inputBoxText}`; + document.title = `Query: ${inputBoxText.trim()} - ${myDefaultTitle}`; } else { newURL = `${window.location.protocol}//${window.location.host}${window.location.pathname}`; + document.title = myDefaultTitle; } diff --git a/docs/metadata.json b/docs/metadata.json index 1fdb531..35d01ed 100644 --- a/docs/metadata.json +++ b/docs/metadata.json @@ -7,12 +7,12 @@ "0": { "filepath": "/README.md", "entry_id": 0, - "language_id": "markdown" + "language_id": "plain-text" }, "1": { "filepath": "/examples/README.md", "entry_id": 14, - "language_id": "markdown" + "language_id": "plain-text" }, "2": { "filepath": "/examples/doc_merge/README.md", @@ -37,7 +37,7 @@ "6": { "filepath": "/examples/keyword_counting/README.md", "entry_id": 238, - "language_id": "markdown" + "language_id": "plain-text" }, "7": { "filepath": "/examples/keyword_counting/dataset_gen_countries.py", @@ -72,7 +72,7 @@ "13": { "filepath": "/examples/sorting/README.md", "entry_id": 326, - "language_id": "markdown" + "language_id": "plain-text" }, "14": { "filepath": "/examples/sorting/plot.py", @@ -87,7 +87,7 @@ "16": { "filepath": "/graph_of_thoughts/controller/README.md", "entry_id": 354, - "language_id": "markdown" + "language_id": "plain-text" }, "17": { "filepath": "/graph_of_thoughts/controller/__init__.py", @@ -177,7 +177,7 @@ "34": { "filepath": "/paper/README.md", "entry_id": 570, - "language_id": "markdown" + "language_id": "plain-text" }, "35": { "filepath": "/paper/plots.py", diff --git a/docs/tree.html b/docs/tree.html index 885f4ce..9e3b717 100644 --- a/docs/tree.html +++ b/docs/tree.html @@ -6,6 +6,7 @@ + Project structure of: spcl/graph-of-thoughts