TensorOpsAI · diogoncalves · Feb 12, 2025 · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,41 @@
+name: Tests
+
+on:
+  pull_request:
+    branches:
+      - main
+    types:
+      - opened
+      - synchronize
+      - reopened
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      # Set up Python environment
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ">=3.9 <3.13"
+
+      # Install Poetry
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+      # Install lib and dev dependencies
+      - name: Install llmstudio-core
+        working-directory: ./libs/core
+        run: |
+          poetry install
+          POETRY_ENV=$(poetry env info --path)
+          echo $POETRY_ENV
+          echo "POETRY_ENV=$POETRY_ENV" >> $GITHUB_ENV
+
+      - name: Run unit tests
+        run: |
+          echo ${{ env.POETRY_ENV }}
+          source ${{ env.POETRY_ENV }}/bin/activate
+          poetry run pytest libs/core
diff --git a/.github/workflows/upload-pypi-dev.yml b/.github/workflows/upload-pypi-dev.yml
@@ -1,4 +1,4 @@
-name: PyPI prerelease and build/push Docker image.
+name: PyPI prerelease any module.
 
 on:
   workflow_dispatch:

diff --git a/.gitignore b/.gitignore
@@ -56,6 +56,7 @@ env3
 .env*
 .env*.local
 .venv*
+*venv*
 env*/
 venv*/
 ENV/
@@ -66,6 +67,7 @@ venv.bak/
 config.yaml
 bun.lockb
 
+
 # Jupyter Notebook
 .ipynb_checkpoints
 
@@ -76,4 +78,4 @@ bun.lockb
 llmstudio/llm_engine/logs/execution_logs.jsonl
 *.db
 .prettierignore
-db
+db
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -9,9 +9,30 @@ Thank you for expressing your interest in contributing to LLMstudio. To ensure t
 3. Follow our repo guidelines
    - Ensure that you update any relevant docstrings and comments within your code
    - Run `pre-commit run --all-files` to lint your code
+4. Sign your commits. Without signed commits, your changes will not be accepted for main.
 
 ## Branches
 
 - All development happens in per-feature branches prefixed by contributor's
   initials. For example `feat/feature_name`.
 - Approved PRs are merged to the `main` branch.
+
+## Alpha releases:
+You need to have your changes in the `develop` branch in order to push a new alpha version of any library `(llmstudio, llmstudio-proxy, llmstudio-tracker)`. Therefore, first guarantee that you feature branch is reviewed and working before merging to develop.
+
+Process:
+- Ensure the `feature/**` you worked is passing the tests and has the approvals necessary.
+- Merge to `develop`
+- Ensure the changes are in the develop branch
+- Use GitHub Actions to initiate the pre-release process: [PyPI pre-release any module](https://github.com/TensorOpsAI/LLMstudio/actions/workflows/upload-pypi-dev.yml)
+- Select the target library `(llmstudio, llmstudio-proxy, llmstudio-tracker)` and the target version for the final release (e.g., 1.1.0). Consult main branch and PyPI for current versions.
+- Run the workflow.
+- The workflow will automatically bump the version and create an alpha release of the library/module specified
+- The workflow will automatically push changes back (bump version) to the develop branch
+
+Repeat the process in case your `development` branch contains changes in multiple libraries.
+
+## Final releases:
+Once you're happy with the versions, create the Release notes on the PR between `develop` and `main` and merge to main branch when ready for full release. The workflow will automatically remove any `alpha` tag in your libraries and push the versions for every library/module that suffered changes.
+
+
diff --git a/Makefile b/Makefile
@@ -1,2 +1,5 @@
 format:
 	pre-commit run --all-files
+
+unit-tests:
+	pytest libs/core/tests/unit_tests
diff --git a/examples/_config.yaml b/examples/_config.yaml
@@ -115,16 +115,29 @@ providers:
     keys:
       - OPENAI_API_KEY
     models:
+      o1-preview:
+        mode: chat
+        max_completion_tokens: 128000
+        input_token_cost: 0.000015
+        output_token_cost: 0.000060
+      o1-mini:
+        mode: chat
+        max_completion_tokens: 128000
+        input_token_cost: 0.000003
+        cached_token_cost: 0.0000015
+        output_token_cost: 0.000012
       gpt-4o-mini:
         mode: chat
         max_tokens: 128000
         input_token_cost: 0.00000015
+        cached_token_cost: 0.000000075
         output_token_cost: 0.00000060
       gpt-4o:
         mode: chat
         max_tokens: 128000
-        input_token_cost: 0.000005
-        output_token_cost: 0.000015
+        input_token_cost: 0.0000025
+        cached_token_cost: 0.00000125
+        output_token_cost: 0.00001
       gpt-4-turbo:
         mode: chat
         max_tokens: 128000

diff --git a/examples/core.py b/examples/core.py
@@ -4,22 +4,59 @@
 
 from pprint import pprint
 import os
+import asyncio
 from dotenv import load_dotenv
 load_dotenv()
 
 def run_provider(provider, model, api_key, **kwargs):
+    print(f"\n\n###RUNNING for <{provider}>, <{model}> ###")
     llm = LLMCore(provider=provider, api_key=api_key, **kwargs)
 
     latencies = {}
+
+    print("\nAsync Non-Stream")
     chat_request = build_chat_request(model, chat_input="Hello, my name is Jason Json", is_stream=False)
+    string = """
+What is Lorem Ipsum? json
+Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
+
+Why do we use it?
+It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like).
+
+
+Where does it come from?
+Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32.
+
+What is Lorem Ipsum? json
+Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
+
+Why do we use it?
+It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like).
+
+
+Where does it come from?
+Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32.
+
+What is Lorem Ipsum? json
+Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
+
+Why do we use it?
+It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like).
+
+
+Where does it come from?
+Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32.
+
+    """
+    #chat_request = build_chat_request(model, chat_input=string, is_stream=False)
+
 
-    import asyncio
     response_async = asyncio.run(llm.achat(**chat_request))
     pprint(response_async)
     latencies["async (ms)"]= response_async.metrics["latency_s"]*1000
-
-    # stream
-    print("\nasync stream")
+    
+
+    print("\nAsync Stream")
     async def async_stream():
         chat_request = build_chat_request(model, chat_input="Hello, my name is Tom Json", is_stream=True)
 
@@ -36,14 +73,15 @@ async def async_stream():
     asyncio.run(async_stream())
 
 
-    print("# Now sync calls")
+    print("\nSync Non-Stream")
     chat_request = build_chat_request(model, chat_input="Hello, my name is Alice Json", is_stream=False)
 
     response_sync = llm.chat(**chat_request)
     pprint(response_sync)
     latencies["sync (ms)"]= response_sync.metrics["latency_s"]*1000
+
 
-    print("# Now sync calls streaming")
+    print("\nSync Stream")
     chat_request = build_chat_request(model, chat_input="Hello, my name is Mary Json", is_stream=True)
 
     response_sync_stream = llm.chat(**chat_request)
@@ -85,72 +123,33 @@ def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens:
     return chat_request
 
 
+def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, **kwargs):
+    for _ in range(num_runs):
+        latencies = run_provider(provider=provider, model=model, api_key=api_key, **kwargs)
+        pprint(latencies)
+
+
 
+# OpenAI
+multiple_provider_runs(provider="openai", model="gpt-4o-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1)
+multiple_provider_runs(provider="openai", model="o1-mini", api_key=os.environ["OPENAI_API_KEY"], num_runs=1)
+#multiple_provider_runs(provider="openai", model="o1-preview", api_key=os.environ["OPENAI_API_KEY"], num_runs=1)
 
 
-provider = "openai"
-model = "gpt-4o-mini"
-for _ in range(1):
-    latencies = run_provider(provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"])
-    pprint(latencies)
+# Azure
+multiple_provider_runs(provider="azure", model="gpt-4o-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"])
+#multiple_provider_runs(provider="azure", model="gpt-4o", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"])
+#multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"])
+#multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"])
 
-
-provider = "openai"
-model = "o1-preview"
-for _ in range(1):
-    latencies = run_provider(provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"])
-    pprint(latencies)
-
-provider = "openai"
-model = "o1-mini"
-for _ in range(1):
-    latencies = run_provider(provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"])
-    pprint(latencies)
-
-# provider = "anthropic"
-# model = "claude-3-opus-20240229"
-# for _ in range(1):
-#     latencies = run_provider(provider=provider, model=model, api_key=os.environ["ANTHROPIC_API_KEY"])
-#     pprint(latencies)
-# # we need credits
-
-provider = "azure"
-model = "gpt-4o-mini"
-for _ in range(1):
-    latencies = run_provider(provider=provider, model=model, 
-                            api_key=os.environ["AZURE_API_KEY"], 
-                            api_version=os.environ["AZURE_API_VERSION"],
-                            api_endpoint=os.environ["AZURE_API_ENDPOINT"])
-    pprint(latencies)
-
-provider = "azure"
-model = "o1-preview"
-for _ in range(1):
-    latencies = run_provider(provider=provider, model=model, 
-                            api_key=os.environ["AZURE_API_KEY"], 
-                            api_version=os.environ["AZURE_API_VERSION"],
-                            api_endpoint=os.environ["AZURE_API_ENDPOINT"])
-    pprint(latencies)
-
-provider = "azure"
-model = "o1-mini"
-for _ in range(1):
-    latencies = run_provider(provider=provider, model=model, 
-                            api_key=os.environ["AZURE_API_KEY"], 
-                            api_version=os.environ["AZURE_API_VERSION"],
-                            api_endpoint=os.environ["AZURE_API_ENDPOINT"])
-    pprint(latencies)
-
-# provider = "azure"
-# model = "gpt-4o"
-# for _ in range(1):
-#     latencies = run_provider(provider=provider, model=model, 
-#                             api_key=os.environ["AZURE_API_KEY_llama"], 
-#                             base_url=os.environ["AZURE_BASE_URL"]
-#                             )
-#     pprint(latencies)
+
+#multiple_provider_runs(provider="anthropic", model="claude-3-opus-20240229", num_runs=1, api_key=os.environ["ANTHROPIC_API_KEY"])
+
+#multiple_provider_runs(provider="azure", model="o1-preview", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"])
+#multiple_provider_runs(provider="azure", model="o1-mini", num_runs=1, api_key=os.environ["AZURE_API_KEY"], api_version=os.environ["AZURE_API_VERSION"], api_endpoint=os.environ["AZURE_API_ENDPOINT"])
 
 
+multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"])
 # provider = "vertexai"
 # model = "gemini-1.5-pro-latest"
 # for _ in range(1):

diff --git a/examples/langchain_integration.py b/examples/langchain_integration.py
@@ -18,7 +18,12 @@
 
 # %%
 from langchain.tools import tool
-from langchain.agents import AgentType, initialize_agent
+from langchain.agents import AgentType, initialize_agent, AgentExecutor
+
+from langchain.agents.openai_functions_agent.base import (
+    create_openai_functions_agent,
+)
+from langchain import hub
 
 # # %%
 # print("\n", chat_llm.invoke('Hello'))
@@ -192,9 +197,16 @@ def assistant(question: str)->str:
     tools = [power_disco_ball, start_music, dim_lights]
     print(tools)
 
-    #rebuild agent with new tools
-    agent_executor = initialize_agent(
-        tools, chat_llm, agent=AgentType.OPENAI_FUNCTIONS, verbose = True, debug = True
+    #rebuild agent with new tools - This is the old outdated way of using agents in langchain
+    #agent_executor = initialize_agent(
+    #    tools, chat_llm, agent=AgentType.OPENAI_FUNCTIONS, verbose = True, debug = True
+    #) 
+    prompt = hub.pull("hwchase17/openai-functions-agent")
+
+    agent = create_openai_functions_agent(llm=chat_llm, tools=tools, prompt=prompt)
+
+    agent_executor = AgentExecutor(
+        agent=agent, tools=tools, verbose=True, return_intermediate_steps=True
     )
 
     response = agent_executor.invoke(