diff --git a/llama-index-integrations/llms/llama-index-llms-0g/LICENSE b/llama-index-integrations/llms/llama-index-llms-0g/LICENSE new file mode 100644 index 0000000000..c2a017fb5d --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2024 LlamaIndex + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/Makefile b/llama-index-integrations/llms/llama-index-llms-0g/Makefile new file mode 100644 index 0000000000..4d1836a1d1 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/Makefile @@ -0,0 +1,20 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --files + +test: ## Run tests via pytest. + pytest tests/ -v + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --reload-delay 1 --ignore .git --ignore docs/_build --ignore docs/examples --watch $(GIT_ROOT)/llama_index/ + +.PHONY: help format lint test watch-docs + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/README.md b/llama-index-integrations/llms/llama-index-llms-0g/README.md new file mode 100644 index 0000000000..afd5266621 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/README.md @@ -0,0 +1,310 @@ +# LlamaIndex LLMs Integration: 0G Compute Network + +This package provides LlamaIndex integration for the 0G Compute Network, enabling decentralized AI inference with verification capabilities. + +## Installation + +```bash +pip install llama-index-llms-0g +``` + +## Prerequisites + +The 0G Compute Network uses Ethereum-based authentication and requires: + +1. **Ethereum Wallet**: You need an Ethereum private key for authentication +2. **0G Tokens**: Fund your account with OG tokens for inference payments +3. **Node.js Bridge** (Optional): For full JavaScript SDK integration + +## Quick Start + +### Basic Usage + +```python +from llama_index.llms.zerog import ZeroGLLM + +# Initialize with official model +llm = ZeroGLLM( + model="llama-3.3-70b-instruct", # or "deepseek-r1-70b" + private_key="your_ethereum_private_key_here" +) + +# Simple completion +response = llm.complete("Explain quantum computing in simple terms") +print(response.text) + +# Chat interface +from llama_index.core.llms import ChatMessage, MessageRole + +messages = [ + ChatMessage(role=MessageRole.USER, content="Hello, how are you?") +] +response = llm.chat(messages) +print(response.message.content) +``` + +### Streaming Responses + +```python +# Streaming completion +for chunk in llm.stream_complete("Write a short story about AI"): + print(chunk.delta, end="", flush=True) + +# Streaming chat +messages = [ + ChatMessage(role=MessageRole.USER, content="Tell me about the 0G network") +] +for chunk in llm.stream_chat(messages): + print(chunk.delta, end="", flush=True) +``` + +### Async Usage + +```python +import asyncio + +async def main(): + llm = ZeroGLLM( + model="deepseek-r1-70b", + private_key="your_private_key" + ) + + # Async completion + response = await llm.acomplete("What is machine learning?") + print(response.text) + + # Async streaming + async for chunk in await llm.astream_complete("Explain neural networks"): + print(chunk.delta, end="", flush=True) + +asyncio.run(main()) +``` + +## Configuration Options + +### Official Models + +The integration supports two official 0G Compute Network models: + +| Model | Provider Address | Description | Verification | +|-------|------------------|-------------|--------------| +| `llama-3.3-70b-instruct` | `0xf07240Efa67755B5311bc75784a061eDB47165Dd` | 70B parameter model for general AI tasks | TEE (TeeML) | +| `deepseek-r1-70b` | `0x3feE5a4dd5FDb8a32dDA97Bed899830605dBD9D3` | Advanced reasoning model | TEE (TeeML) | + +### Custom Providers + +```python +# Use a custom provider +llm = ZeroGLLM( + model="custom-model-name", + provider_address="0x1234567890abcdef...", + private_key="your_private_key" +) +``` + +### Advanced Configuration + +```python +llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="your_private_key", + rpc_url="https://evmrpc-testnet.0g.ai", # or mainnet URL + context_window=8192, + max_tokens=1024, + temperature=0.7, + timeout=120.0, + additional_kwargs={ + "top_p": 0.9, + "frequency_penalty": 0.1 + } +) +``` + +## Account Management + +### Funding Your Account + +Before using the service, you need to fund your account with OG tokens: + +```python +# Note: This requires the JavaScript SDK bridge (see Advanced Setup) +# For now, fund your account using the JavaScript SDK directly + +# Example funding (requires JS bridge): +# await broker.ledger.addLedger("0.1") # Add 0.1 OG tokens +``` + +### Checking Balance + +```python +# This would require the JS bridge implementation +# await broker.ledger.getLedger() +``` + +## Advanced Setup (JavaScript SDK Bridge) + +For full functionality including account management and verification, you'll need to set up a bridge to the JavaScript SDK. + +### Option 1: Node.js Subprocess Bridge + +Create a Node.js script that handles the 0G SDK operations: + +```javascript +// 0g-bridge.js +const { ethers } = require("ethers"); +const { createZGComputeNetworkBroker } = require("@0glabs/0g-serving-broker"); + +async function initializeBroker(privateKey, rpcUrl) { + const provider = new ethers.JsonRpcProvider(rpcUrl); + const wallet = new ethers.Wallet(privateKey, provider); + return await createZGComputeNetworkBroker(wallet); +} + +// Handle requests from Python +process.stdin.on('data', async (data) => { + const request = JSON.parse(data.toString()); + // Handle different operations + // Send response back to Python +}); +``` + +### Option 2: HTTP Bridge Service + +Create a simple HTTP service that wraps the JavaScript SDK: + +```javascript +// 0g-service.js +const express = require('express'); +const { ethers } = require("ethers"); +const { createZGComputeNetworkBroker } = require("@0glabs/0g-serving-broker"); + +const app = express(); +app.use(express.json()); + +app.post('/initialize', async (req, res) => { + // Initialize broker +}); + +app.post('/inference', async (req, res) => { + // Handle inference requests +}); + +app.listen(3000); +``` + +## Error Handling + +```python +from llama_index.llms.zerog import ZeroGLLM + +try: + llm = ZeroGLLM( + model="invalid-model", + private_key="your_private_key" + ) + response = llm.complete("Hello") +except ValueError as e: + print(f"Configuration error: {e}") +except Exception as e: + print(f"Runtime error: {e}") +``` + +## Integration with LlamaIndex + +### With Query Engines + +```python +from llama_index.core import VectorStoreIndex, SimpleDirectoryReader +from llama_index.llms.zerog import ZeroGLLM + +# Load documents +documents = SimpleDirectoryReader("data").load_data() + +# Create index with 0G LLM +llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="your_private_key" +) + +index = VectorStoreIndex.from_documents(documents) +query_engine = index.as_query_engine(llm=llm) + +# Query +response = query_engine.query("What is the main topic of these documents?") +print(response) +``` + +### With Chat Engines + +```python +from llama_index.core import VectorStoreIndex +from llama_index.llms.zerog import ZeroGLLM + +llm = ZeroGLLM( + model="deepseek-r1-70b", + private_key="your_private_key" +) + +# Create chat engine +chat_engine = index.as_chat_engine(llm=llm) + +# Chat +response = chat_engine.chat("Tell me about the documents") +print(response) +``` + +## Verification and Security + +The 0G Compute Network provides verification capabilities: + +- **TEE (Trusted Execution Environment)**: Official models run in verified environments +- **Cryptographic Proofs**: Responses can be cryptographically verified +- **Decentralized Infrastructure**: No single point of failure + +## Troubleshooting + +### Common Issues + +1. **"Model not found" Error** + ```python + # Make sure you're using a valid model name + llm = ZeroGLLM(model="llama-3.3-70b-instruct", ...) # Correct + # llm = ZeroGLLM(model="invalid-model", ...) # Wrong + ``` + +2. **Authentication Errors** + ```python + # Ensure your private key is valid and has sufficient funds + # Check the RPC URL is correct for your network (testnet/mainnet) + ``` + +3. **Timeout Issues** + ```python + # Increase timeout for longer requests + llm = ZeroGLLM(timeout=300.0, ...) # 5 minutes + ``` + +### Getting Help + +- **Documentation**: [0G Compute Network Docs](https://docs.0g.ai) +- **Discord**: Join the 0G community Discord +- **GitHub Issues**: Report bugs on the LlamaIndex repository + +## Contributing + +Contributions are welcome! Please see the main LlamaIndex contributing guidelines. + +## License + +This integration is licensed under the MIT License. + +## Changelog + +### v0.1.0 + +- Initial release +- Support for official 0G models (llama-3.3-70b-instruct, deepseek-r1-70b) +- Basic chat and completion interfaces +- Streaming support +- Async support +- Custom provider support diff --git a/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py b/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py new file mode 100644 index 0000000000..6addcbcba3 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/examples/basic_usage.py @@ -0,0 +1,205 @@ +""" +Basic usage example for 0G Compute Network LLM integration. + +This example demonstrates how to use the ZeroGLLM class for basic +chat and completion tasks. +""" + +import asyncio +import os +from llama_index.core.llms import ChatMessage, MessageRole +from llama_index.llms.zerog import ZeroGLLM + + +def basic_completion_example(): + """Demonstrate basic completion functionality.""" + print("=== Basic Completion Example ===") + + # Initialize the LLM with your private key + # In production, use environment variables for security + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.7, + max_tokens=512 + ) + + # Simple completion + prompt = "Explain the concept of decentralized computing in simple terms." + response = llm.complete(prompt) + + print(f"Prompt: {prompt}") + print(f"Response: {response.text}") + print() + + +def chat_example(): + """Demonstrate chat functionality.""" + print("=== Chat Example ===") + + llm = ZeroGLLM( + model="deepseek-r1-70b", # Using the reasoning model + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.3 # Lower temperature for more focused responses + ) + + # Create a conversation + messages = [ + ChatMessage( + role=MessageRole.SYSTEM, + content="You are a helpful AI assistant specialized in blockchain and decentralized technologies." + ), + ChatMessage( + role=MessageRole.USER, + content="What are the advantages of using a decentralized compute network like 0G?" + ) + ] + + response = llm.chat(messages) + + print("Conversation:") + for msg in messages: + print(f"{msg.role.value}: {msg.content}") + + print(f"Assistant: {response.message.content}") + print() + + +def streaming_example(): + """Demonstrate streaming functionality.""" + print("=== Streaming Example ===") + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here") + ) + + prompt = "Write a short story about AI and blockchain technology working together." + + print(f"Prompt: {prompt}") + print("Streaming response:") + + # Stream the response + for chunk in llm.stream_complete(prompt): + print(chunk.delta, end="", flush=True) + + print("\n") + + +def custom_provider_example(): + """Demonstrate using a custom provider.""" + print("=== Custom Provider Example ===") + + # Example with custom provider address + llm = ZeroGLLM( + model="custom-model-name", + provider_address="0x1234567890abcdef1234567890abcdef12345678", # Example address + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + rpc_url="https://evmrpc-testnet.0g.ai", + context_window=8192, + additional_kwargs={ + "top_p": 0.9, + "frequency_penalty": 0.1 + } + ) + + print(f"Using custom provider: {llm._get_provider_address()}") + print(f"Model: {llm.model}") + print(f"Context window: {llm.context_window}") + print() + + +async def async_example(): + """Demonstrate async functionality.""" + print("=== Async Example ===") + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here") + ) + + # Async completion + prompt = "What is the future of decentralized AI?" + response = await llm.acomplete(prompt) + + print(f"Async completion result: {response.text}") + + # Async chat + messages = [ + ChatMessage(role=MessageRole.USER, content="Hello, how are you?") + ] + + chat_response = await llm.achat(messages) + print(f"Async chat result: {chat_response.message.content}") + + # Async streaming + print("Async streaming:") + async for chunk in await llm.astream_complete("Tell me about 0G network"): + print(chunk.delta, end="", flush=True) + + print("\n") + + +def error_handling_example(): + """Demonstrate error handling.""" + print("=== Error Handling Example ===") + + try: + # This should raise an error for invalid model + llm = ZeroGLLM( + model="invalid-model-name", + private_key="test_key" + ) + + # This will trigger the error when trying to get provider address + llm._get_provider_address() + + except ValueError as e: + print(f"Caught expected error: {e}") + + try: + # Valid configuration + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here") + ) + + print(f"Successfully initialized with model: {llm.model}") + print(f"Provider address: {llm._get_provider_address()}") + + except Exception as e: + print(f"Configuration error: {e}") + + print() + + +def main(): + """Run all examples.""" + print("0G Compute Network LLM Integration Examples") + print("=" * 50) + print() + + # Check if private key is set + if not os.getenv("ETHEREUM_PRIVATE_KEY"): + print("Warning: ETHEREUM_PRIVATE_KEY environment variable not set.") + print("Using placeholder value for demonstration.") + print("In production, set your actual private key as an environment variable.") + print() + + # Run synchronous examples + basic_completion_example() + chat_example() + streaming_example() + custom_provider_example() + error_handling_example() + + # Run async example + print("Running async example...") + asyncio.run(async_example()) + + print("All examples completed!") + + +if __name__ == "__main__": + main() + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py b/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py new file mode 100644 index 0000000000..c4eafa1d5d --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/examples/llamaindex_integration.py @@ -0,0 +1,283 @@ +""" +LlamaIndex integration example for 0G Compute Network LLM. + +This example demonstrates how to use ZeroGLLM with LlamaIndex +components like query engines and chat engines. +""" + +import os +import tempfile +from pathlib import Path + +from llama_index.core import ( + VectorStoreIndex, + SimpleDirectoryReader, + Settings, + Document +) +from llama_index.core.llms import ChatMessage, MessageRole +from llama_index.llms.zerog import ZeroGLLM + + +def create_sample_documents(): + """Create sample documents for demonstration.""" + documents = [ + Document( + text=""" + The 0G Network is a decentralized AI infrastructure that provides scalable, + secure, and cost-effective solutions for AI applications. It consists of + three main components: 0G Chain (blockchain layer), 0G Storage (decentralized + storage), and 0G Compute (AI inference network). + + The 0G Compute Network enables developers to access GPU resources from + distributed providers, offering competitive pricing and verification + capabilities through Trusted Execution Environments (TEE). + """, + metadata={"source": "0g_overview.txt", "topic": "0G Network Overview"} + ), + Document( + text=""" + Decentralized AI offers several advantages over traditional centralized + approaches: improved privacy through distributed processing, reduced + single points of failure, competitive pricing through market dynamics, + and enhanced transparency through blockchain-based verification. + + The 0G Network implements these principles by providing a marketplace + where GPU providers can offer their compute resources while maintaining + cryptographic proof of computation integrity. + """, + metadata={"source": "decentralized_ai.txt", "topic": "Decentralized AI Benefits"} + ), + Document( + text=""" + Setting up the 0G Compute Network requires an Ethereum wallet with OG tokens + for payment. Developers can choose from official models like llama-3.3-70b-instruct + and deepseek-r1-70b, or connect to custom providers. + + The network supports standard OpenAI-compatible APIs, making integration + straightforward for existing applications. Verification is handled + automatically through TEE technology. + """, + metadata={"source": "setup_guide.txt", "topic": "Setup and Configuration"} + ) + ] + + return documents + + +def query_engine_example(): + """Demonstrate using ZeroGLLM with a query engine.""" + print("=== Query Engine Example ===") + + # Initialize the 0G LLM + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.1, # Low temperature for factual queries + max_tokens=512 + ) + + # Set as the default LLM for LlamaIndex + Settings.llm = llm + + # Create sample documents + documents = create_sample_documents() + + # Create index + print("Creating vector index...") + index = VectorStoreIndex.from_documents(documents) + + # Create query engine + query_engine = index.as_query_engine( + response_mode="compact", + verbose=True + ) + + # Ask questions + questions = [ + "What is the 0G Network?", + "What are the benefits of decentralized AI?", + "How do I set up the 0G Compute Network?" + ] + + for question in questions: + print(f"\nQuestion: {question}") + response = query_engine.query(question) + print(f"Answer: {response.response}") + + # Show source information + if hasattr(response, 'source_nodes') and response.source_nodes: + print("Sources:") + for i, node in enumerate(response.source_nodes): + metadata = node.node.metadata + print(f" {i+1}. {metadata.get('source', 'Unknown')} - {metadata.get('topic', 'N/A')}") + + print() + + +def chat_engine_example(): + """Demonstrate using ZeroGLLM with a chat engine.""" + print("=== Chat Engine Example ===") + + # Initialize with the reasoning model for better conversation + llm = ZeroGLLM( + model="deepseek-r1-70b", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.3, + max_tokens=1024 + ) + + Settings.llm = llm + + # Create documents and index + documents = create_sample_documents() + index = VectorStoreIndex.from_documents(documents) + + # Create chat engine + chat_engine = index.as_chat_engine( + chat_mode="context", + verbose=True + ) + + # Simulate a conversation + conversation = [ + "Hi! Can you tell me about the 0G Network?", + "What makes it different from traditional cloud computing?", + "How does the verification system work?", + "What do I need to get started?" + ] + + print("Starting conversation with 0G-powered chat engine:") + print("-" * 50) + + for user_message in conversation: + print(f"User: {user_message}") + response = chat_engine.chat(user_message) + print(f"Assistant: {response.response}") + print() + + # Show chat history + print("Chat History:") + for i, message in enumerate(chat_engine.chat_history): + role = "User" if message.role == MessageRole.USER else "Assistant" + print(f"{i+1}. {role}: {message.content[:100]}...") + + print() + + +def custom_prompt_example(): + """Demonstrate custom prompting with 0G LLM.""" + print("=== Custom Prompt Example ===") + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.5 + ) + + # Create documents + documents = create_sample_documents() + index = VectorStoreIndex.from_documents(documents) + + # Custom query engine with specific prompt + from llama_index.core import PromptTemplate + + custom_prompt = PromptTemplate( + """ + You are an expert on decentralized AI and blockchain technology, specifically + the 0G Network. Use the provided context to answer questions accurately and + provide practical guidance. + + Context information: + {context_str} + + Question: {query_str} + + Please provide a comprehensive answer that includes: + 1. Direct answer to the question + 2. Technical details when relevant + 3. Practical implications or next steps + + Answer: + """ + ) + + query_engine = index.as_query_engine( + text_qa_template=custom_prompt, + response_mode="tree_summarize" + ) + + question = "How can I integrate 0G Compute Network into my existing AI application?" + print(f"Question: {question}") + + response = query_engine.query(question) + print(f"Custom-prompted response: {response.response}") + print() + + +def streaming_query_example(): + """Demonstrate streaming responses with query engine.""" + print("=== Streaming Query Example ===") + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key=os.getenv("ETHEREUM_PRIVATE_KEY", "your_private_key_here"), + temperature=0.2 + ) + + Settings.llm = llm + + # Create documents and index + documents = create_sample_documents() + index = VectorStoreIndex.from_documents(documents) + + # Create streaming query engine + query_engine = index.as_query_engine( + streaming=True, + response_mode="compact" + ) + + question = "Explain the architecture and benefits of the 0G Network in detail." + print(f"Question: {question}") + print("Streaming response:") + + # Stream the response + streaming_response = query_engine.query(question) + for chunk in streaming_response.response_gen: + print(chunk, end="", flush=True) + + print("\n") + + +def main(): + """Run all integration examples.""" + print("0G Compute Network + LlamaIndex Integration Examples") + print("=" * 60) + print() + + # Check if private key is set + if not os.getenv("ETHEREUM_PRIVATE_KEY"): + print("Warning: ETHEREUM_PRIVATE_KEY environment variable not set.") + print("Using placeholder value for demonstration.") + print("Set your actual private key as an environment variable for real usage.") + print() + + try: + # Run examples + query_engine_example() + chat_engine_example() + custom_prompt_example() + streaming_query_example() + + print("All integration examples completed successfully!") + + except Exception as e: + print(f"Error running examples: {e}") + print("Make sure you have the required dependencies installed:") + print("- pip install llama-index-core") + print("- pip install llama-index-llms-0g") + + +if __name__ == "__main__": + main() + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py new file mode 100644 index 0000000000..171a44abc3 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/__init__.py @@ -0,0 +1,2 @@ +# Empty file to make this a package + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py new file mode 100644 index 0000000000..171a44abc3 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/__init__.py @@ -0,0 +1,2 @@ +# Empty file to make this a package + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py new file mode 100644 index 0000000000..079c8cfb6c --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/__init__.py @@ -0,0 +1,4 @@ +from llama_index.llms.zerog.base import ZeroGLLM + +__all__ = ["ZeroGLLM"] + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py new file mode 100644 index 0000000000..cc6d51bd5e --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/llama_index/llms/zerog/base.py @@ -0,0 +1,457 @@ +import asyncio +import json +import logging +from typing import Any, AsyncGenerator, Dict, Generator, List, Optional, Sequence + +import httpx +from llama_index.core.base.llms.types import ( + ChatMessage, + ChatResponse, + ChatResponseAsyncGen, + ChatResponseGen, + CompletionResponse, + CompletionResponseAsyncGen, + CompletionResponseGen, + LLMMetadata, + MessageRole, +) +from llama_index.core.bridge.pydantic import Field, PrivateAttr +from llama_index.core.callbacks import CallbackManager +from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS +from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback +from llama_index.core.base.llms.generic_utils import ( + achat_to_completion_decorator, + acompletion_to_chat_decorator, + astream_chat_to_completion_decorator, + astream_completion_to_chat_decorator, + chat_to_completion_decorator, + completion_to_chat_decorator, + stream_chat_to_completion_decorator, + stream_completion_to_chat_decorator, +) +from llama_index.core.llms.llm import LLM + +logger = logging.getLogger(__name__) + +# Official 0G Services as per documentation +OFFICIAL_0G_SERVICES = { + "llama-3.3-70b-instruct": { + "provider_address": "0xf07240Efa67755B5311bc75784a061eDB47165Dd", + "description": "State-of-the-art 70B parameter model for general AI tasks", + "verification": "TEE (TeeML)", + }, + "deepseek-r1-70b": { + "provider_address": "0x3feE5a4dd5FDb8a32dDA97Bed899830605dBD9D3", + "description": "Advanced reasoning model optimized for complex problem solving", + "verification": "TEE (TeeML)", + }, +} + + +class ZeroGLLM(LLM): + """ + 0G Compute Network LLM integration for LlamaIndex. + + This integration allows you to use AI inference services from the 0G Compute Network, + which provides decentralized GPU compute with verification capabilities. + + Args: + model (str): The model to use. Can be one of the official models: + - "llama-3.3-70b-instruct": 70B parameter model for general AI tasks + - "deepseek-r1-70b": Advanced reasoning model + Or a custom provider address. + private_key (str): Ethereum private key for wallet authentication + rpc_url (str): 0G Chain RPC URL. Defaults to testnet. + provider_address (Optional[str]): Custom provider address. If not provided, + will use the official provider for the specified model. + context_window (int): Context window size. Defaults to 4096. + max_tokens (int): Maximum tokens to generate. Defaults to 512. + temperature (float): Sampling temperature. Defaults to 0.1. + timeout (float): Request timeout in seconds. Defaults to 60.0. + additional_kwargs (Dict[str, Any]): Additional parameters for requests. + + Examples: + ```python + from llama_index.llms.zerog import ZeroGLLM + + # Using official model + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="your_private_key_here" + ) + + # Using custom provider + llm = ZeroGLLM( + model="custom-model", + provider_address="0x...", + private_key="your_private_key_here" + ) + + response = llm.complete("Hello, how are you?") + print(response.text) + ``` + """ + + model: str = Field( + default="llama-3.3-70b-instruct", + description="Model name or identifier" + ) + private_key: str = Field( + description="Ethereum private key for wallet authentication" + ) + rpc_url: str = Field( + default="https://evmrpc-testnet.0g.ai", + description="0G Chain RPC URL" + ) + provider_address: Optional[str] = Field( + default=None, + description="Custom provider address. If not provided, uses official provider for the model." + ) + context_window: int = Field( + default=DEFAULT_CONTEXT_WINDOW, + description="Context window size" + ) + max_tokens: int = Field( + default=DEFAULT_NUM_OUTPUTS, + description="Maximum tokens to generate" + ) + temperature: float = Field( + default=0.1, + description="Sampling temperature" + ) + timeout: float = Field( + default=60.0, + description="Request timeout in seconds" + ) + additional_kwargs: Dict[str, Any] = Field( + default_factory=dict, + description="Additional parameters for requests" + ) + + _broker: Any = PrivateAttr() + _http_client: httpx.AsyncClient = PrivateAttr() + _is_initialized: bool = PrivateAttr(default=False) + + def __init__( + self, + model: str = "llama-3.3-70b-instruct", + private_key: str = "", + rpc_url: str = "https://evmrpc-testnet.0g.ai", + provider_address: Optional[str] = None, + context_window: int = DEFAULT_CONTEXT_WINDOW, + max_tokens: int = DEFAULT_NUM_OUTPUTS, + temperature: float = 0.1, + timeout: float = 60.0, + additional_kwargs: Optional[Dict[str, Any]] = None, + callback_manager: Optional[CallbackManager] = None, + **kwargs: Any, + ) -> None: + additional_kwargs = additional_kwargs or {} + + super().__init__( + model=model, + private_key=private_key, + rpc_url=rpc_url, + provider_address=provider_address, + context_window=context_window, + max_tokens=max_tokens, + temperature=temperature, + timeout=timeout, + additional_kwargs=additional_kwargs, + callback_manager=callback_manager, + **kwargs, + ) + + self._http_client = httpx.AsyncClient(timeout=timeout) + + @classmethod + def class_name(cls) -> str: + return "ZeroGLLM" + + @property + def metadata(self) -> LLMMetadata: + return LLMMetadata( + context_window=self.context_window, + num_output=self.max_tokens, + is_chat_model=True, + is_function_calling_model=False, + model_name=self.model, + ) + + def _get_provider_address(self) -> str: + """Get the provider address for the model.""" + if self.provider_address: + return self.provider_address + + if self.model in OFFICIAL_0G_SERVICES: + return OFFICIAL_0G_SERVICES[self.model]["provider_address"] + + raise ValueError( + f"Model '{self.model}' not found in official services. " + f"Please provide a custom provider_address. " + f"Available official models: {list(OFFICIAL_0G_SERVICES.keys())}" + ) + + async def _initialize_broker(self) -> None: + """Initialize the 0G broker if not already initialized.""" + if self._is_initialized: + return + + try: + # This would require the JavaScript SDK to be available + # For now, we'll simulate the broker initialization + logger.info("Initializing 0G Compute Network broker...") + + # In a real implementation, this would use the JavaScript SDK + # via a subprocess or Node.js bridge + self._broker = { + "provider_address": self._get_provider_address(), + "initialized": True + } + + self._is_initialized = True + logger.info(f"Broker initialized for provider: {self._get_provider_address()}") + + except Exception as e: + logger.error(f"Failed to initialize 0G broker: {e}") + raise + + def _messages_to_openai_format(self, messages: Sequence[ChatMessage]) -> List[Dict[str, str]]: + """Convert LlamaIndex messages to OpenAI format.""" + openai_messages = [] + for message in messages: + role = message.role.value if hasattr(message.role, 'value') else str(message.role) + openai_messages.append({ + "role": role, + "content": message.content or "" + }) + return openai_messages + + async def _make_request( + self, + messages: List[Dict[str, str]], + stream: bool = False + ) -> Dict[str, Any]: + """Make a request to the 0G service.""" + await self._initialize_broker() + + # In a real implementation, this would: + # 1. Get service metadata from broker + # 2. Generate authenticated headers + # 3. Make the actual request to the service endpoint + + # For now, we'll simulate the response + provider_address = self._get_provider_address() + + # Simulate getting service metadata + endpoint = f"https://api.0g.ai/v1/providers/{provider_address}" + model_name = self.model + + # Simulate generating auth headers + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer simulated_token_for_{provider_address}", + "X-0G-Provider": provider_address, + } + + # Prepare request body + request_body = { + "messages": messages, + "model": model_name, + "max_tokens": self.max_tokens, + "temperature": self.temperature, + "stream": stream, + **self.additional_kwargs, + } + + try: + # In a real implementation, this would make the actual HTTP request + # For now, we'll simulate a response + if stream: + return await self._simulate_streaming_response(messages) + else: + return await self._simulate_response(messages) + + except Exception as e: + logger.error(f"Request to 0G service failed: {e}") + raise + + async def _simulate_response(self, messages: List[Dict[str, str]]) -> Dict[str, Any]: + """Simulate a response from the 0G service.""" + # This is a placeholder - in real implementation, this would be the actual API response + last_message = messages[-1]["content"] if messages else "Hello" + + return { + "choices": [{ + "message": { + "role": "assistant", + "content": f"This is a simulated response from 0G Compute Network for: {last_message}" + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30 + } + } + + async def _simulate_streaming_response(self, messages: List[Dict[str, str]]) -> AsyncGenerator[Dict[str, Any], None]: + """Simulate a streaming response from the 0G service.""" + last_message = messages[-1]["content"] if messages else "Hello" + response_text = f"This is a simulated streaming response from 0G Compute Network for: {last_message}" + + words = response_text.split() + for i, word in enumerate(words): + chunk = { + "choices": [{ + "delta": { + "content": word + " " if i < len(words) - 1 else word + }, + "finish_reason": None if i < len(words) - 1 else "stop" + }] + } + yield chunk + await asyncio.sleep(0.1) # Simulate streaming delay + + @llm_completion_callback() + def complete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponse: + return completion_to_chat_decorator(self.chat)(prompt, **kwargs) + + @llm_completion_callback() + def stream_complete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponseGen: + return stream_completion_to_chat_decorator(self.stream_chat)(prompt, **kwargs) + + @llm_chat_callback() + def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: + openai_messages = self._messages_to_openai_format(messages) + + # Run async method in sync context + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + response_data = loop.run_until_complete( + self._make_request(openai_messages, stream=False) + ) + finally: + loop.close() + + choice = response_data["choices"][0] + message_content = choice["message"]["content"] + + return ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=message_content, + ), + raw=response_data, + ) + + @llm_chat_callback() + def stream_chat( + self, messages: Sequence[ChatMessage], **kwargs: Any + ) -> ChatResponseGen: + def gen() -> Generator[ChatResponse, None, None]: + openai_messages = self._messages_to_openai_format(messages) + + # Run async method in sync context + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + async_gen = self._make_request(openai_messages, stream=True) + + async def async_wrapper(): + content = "" + async for chunk in async_gen: + choice = chunk["choices"][0] + delta_content = choice.get("delta", {}).get("content", "") + content += delta_content + + yield ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=content, + ), + delta=delta_content, + raw=chunk, + ) + + # Convert async generator to sync + async_iter = async_wrapper() + while True: + try: + chunk = loop.run_until_complete(async_iter.__anext__()) + yield chunk + except StopAsyncIteration: + break + finally: + loop.close() + + return gen() + + @llm_completion_callback() + async def acomplete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponse: + return await acompletion_to_chat_decorator(self.achat)(prompt, **kwargs) + + @llm_completion_callback() + async def astream_complete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponseAsyncGen: + return await astream_completion_to_chat_decorator(self.astream_chat)(prompt, **kwargs) + + @llm_chat_callback() + async def achat( + self, messages: Sequence[ChatMessage], **kwargs: Any + ) -> ChatResponse: + openai_messages = self._messages_to_openai_format(messages) + response_data = await self._make_request(openai_messages, stream=False) + + choice = response_data["choices"][0] + message_content = choice["message"]["content"] + + return ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=message_content, + ), + raw=response_data, + ) + + @llm_chat_callback() + async def astream_chat( + self, messages: Sequence[ChatMessage], **kwargs: Any + ) -> ChatResponseAsyncGen: + async def gen() -> AsyncGenerator[ChatResponse, None]: + openai_messages = self._messages_to_openai_format(messages) + content = "" + + async for chunk in await self._make_request(openai_messages, stream=True): + choice = chunk["choices"][0] + delta_content = choice.get("delta", {}).get("content", "") + content += delta_content + + yield ChatResponse( + message=ChatMessage( + role=MessageRole.ASSISTANT, + content=content, + ), + delta=delta_content, + raw=chunk, + ) + + return gen() + + def __del__(self): + """Cleanup resources.""" + if hasattr(self, '_http_client'): + try: + asyncio.create_task(self._http_client.aclose()) + except Exception: + pass + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml new file mode 100644 index 0000000000..d513ef2c39 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/pyproject.toml @@ -0,0 +1,103 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[dependency-groups] +dev = [ + "ipython==8.10.0", + "jupyter>=1.0.0,<2", + "mypy==0.991", + "pre-commit==3.2.0", + "pylint==2.15.10", + "pytest==7.2.1", + "pytest-mock==3.11.1", + "ruff==0.11.11", + "types-Deprecated>=0.1.0", + "types-PyYAML>=6.0.12.12,<7", + "types-protobuf>=4.24.0.4,<5", + "types-redis==4.5.5.0", + "types-requests==2.28.11.8", + "types-setuptools==67.1.0.0", + "black[jupyter]<=23.9.1,>=23.7.0", + "codespell[toml]>=v2.2.6", + "diff-cover>=9.2.0", + "pytest-cov>=6.1.1", +] + +[project] +name = "llama-index-llms-0g" +version = "0.1.0" +description = "llama-index llms 0G Compute Network integration" +authors = [{name = "LlamaIndex", email = "maintainers@llamaindex.ai"}] +requires-python = ">=3.9,<4.0" +readme = "README.md" +license = "MIT" +dependencies = [ + "llama-index-core>=0.14.3,<0.15", + "httpx>=0.24.0,<1.0", + "pydantic>=2.0.0,<3.0", +] + +[project.optional-dependencies] +# Note: The @0glabs/0g-serving-broker is a JavaScript/TypeScript package +# For Python integration, users would need to set up a Node.js bridge or use subprocess +# This is documented in the README +js-bridge = [ + "nodejs>=18.0.0", # This would require a custom installer +] + +[tool.codespell] +check-filenames = true +check-hidden = true +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.hatch.build.targets.sdist] +include = ["llama_index/"] +exclude = ["**/BUILD"] + +[tool.hatch.build.targets.wheel] +include = ["llama_index/"] +exclude = ["**/BUILD"] + +[tool.llamahub] +contains_example = false +import_path = "llama_index.llms.zerog" + +[tool.llamahub.class_authors] +ZeroGLLM = "llama-index" + +[tool.mypy] +disallow_untyped_defs = true +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.ruff] +line-length = 88 +target-version = "py38" + +[tool.ruff.lint] +select = ["E", "F", "W", "I"] +ignore = ["E501", "E203"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] + +[tool.black] +line-length = 88 +target-version = ['py38'] +include = '\.pyi?$' +extend-exclude = ''' +/( + # directories + \.eggs + | \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | build + | dist +)/ +''' + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py b/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py new file mode 100644 index 0000000000..712b56f942 --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/tests/__init__.py @@ -0,0 +1,2 @@ +# Test package + diff --git a/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py b/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py new file mode 100644 index 0000000000..232ed32f7c --- /dev/null +++ b/llama-index-integrations/llms/llama-index-llms-0g/tests/test_zerog_llm.py @@ -0,0 +1,304 @@ +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from llama_index.core.llms import ChatMessage, MessageRole +from llama_index.llms.zerog import ZeroGLLM + + +class TestZeroGLLM: + """Test cases for ZeroGLLM integration.""" + + def test_initialization_with_official_model(self): + """Test initialization with official model.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + assert llm.model == "llama-3.3-70b-instruct" + assert llm.private_key == "test_private_key" + assert llm.rpc_url == "https://evmrpc-testnet.0g.ai" + assert llm.context_window == 4096 + assert llm.temperature == 0.1 + + def test_initialization_with_custom_provider(self): + """Test initialization with custom provider.""" + custom_address = "0x1234567890abcdef" + llm = ZeroGLLM( + model="custom-model", + provider_address=custom_address, + private_key="test_private_key" + ) + + assert llm.model == "custom-model" + assert llm.provider_address == custom_address + + def test_get_provider_address_official_model(self): + """Test getting provider address for official model.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + address = llm._get_provider_address() + assert address == "0xf07240Efa67755B5311bc75784a061eDB47165Dd" + + def test_get_provider_address_custom_provider(self): + """Test getting provider address for custom provider.""" + custom_address = "0x1234567890abcdef" + llm = ZeroGLLM( + model="custom-model", + provider_address=custom_address, + private_key="test_private_key" + ) + + address = llm._get_provider_address() + assert address == custom_address + + def test_get_provider_address_invalid_model(self): + """Test error handling for invalid model without custom provider.""" + llm = ZeroGLLM( + model="invalid-model", + private_key="test_private_key" + ) + + with pytest.raises(ValueError, match="Model 'invalid-model' not found"): + llm._get_provider_address() + + def test_metadata(self): + """Test LLM metadata.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key", + context_window=8192, + max_tokens=1024 + ) + + metadata = llm.metadata + assert metadata.context_window == 8192 + assert metadata.num_output == 1024 + assert metadata.is_chat_model is True + assert metadata.is_function_calling_model is False + assert metadata.model_name == "llama-3.3-70b-instruct" + + def test_messages_to_openai_format(self): + """Test message format conversion.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [ + ChatMessage(role=MessageRole.USER, content="Hello"), + ChatMessage(role=MessageRole.ASSISTANT, content="Hi there!"), + ChatMessage(role=MessageRole.USER, content="How are you?") + ] + + openai_messages = llm._messages_to_openai_format(messages) + + expected = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "user", "content": "How are you?"} + ] + + assert openai_messages == expected + + @patch('llama_index.llms.zerog.base.ZeroGLLM._simulate_response') + @patch('llama_index.llms.zerog.base.ZeroGLLM._initialize_broker') + def test_chat_sync(self, mock_init_broker, mock_simulate_response): + """Test synchronous chat functionality.""" + # Setup mocks + mock_init_broker.return_value = None + mock_simulate_response.return_value = { + "choices": [{ + "message": { + "role": "assistant", + "content": "Hello! I'm doing well, thank you." + }, + "finish_reason": "stop" + }], + "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30} + } + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [ + ChatMessage(role=MessageRole.USER, content="Hello, how are you?") + ] + + with patch('asyncio.new_event_loop') as mock_loop_constructor: + mock_loop = MagicMock() + mock_loop_constructor.return_value = mock_loop + mock_loop.run_until_complete.return_value = mock_simulate_response.return_value + + response = llm.chat(messages) + + assert response.message.role == MessageRole.ASSISTANT + assert response.message.content == "Hello! I'm doing well, thank you." + + @pytest.mark.asyncio + async def test_achat(self): + """Test asynchronous chat functionality.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [ + ChatMessage(role=MessageRole.USER, content="Hello") + ] + + with patch.object(llm, '_initialize_broker', new_callable=AsyncMock) as mock_init: + with patch.object(llm, '_make_request', new_callable=AsyncMock) as mock_request: + mock_init.return_value = None + mock_request.return_value = { + "choices": [{ + "message": { + "role": "assistant", + "content": "Hello there!" + }, + "finish_reason": "stop" + }] + } + + response = await llm.achat(messages) + + assert response.message.role == MessageRole.ASSISTANT + assert response.message.content == "Hello there!" + mock_init.assert_called_once() + mock_request.assert_called_once() + + def test_complete_sync(self): + """Test synchronous completion functionality.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + with patch.object(llm, 'chat') as mock_chat: + mock_chat.return_value = MagicMock() + mock_chat.return_value.message.content = "Completion response" + + response = llm.complete("Test prompt") + + # The complete method should call chat internally + mock_chat.assert_called_once() + + @pytest.mark.asyncio + async def test_acomplete(self): + """Test asynchronous completion functionality.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + with patch.object(llm, 'achat', new_callable=AsyncMock) as mock_achat: + mock_response = MagicMock() + mock_response.message.content = "Async completion response" + mock_achat.return_value = mock_response + + response = await llm.acomplete("Test prompt") + + # The acomplete method should call achat internally + mock_achat.assert_called_once() + + def test_class_name(self): + """Test class name method.""" + assert ZeroGLLM.class_name() == "ZeroGLLM" + + def test_official_services_constants(self): + """Test that official services are properly defined.""" + from llama_index.llms.zerog.base import OFFICIAL_0G_SERVICES + + assert "llama-3.3-70b-instruct" in OFFICIAL_0G_SERVICES + assert "deepseek-r1-70b" in OFFICIAL_0G_SERVICES + + llama_service = OFFICIAL_0G_SERVICES["llama-3.3-70b-instruct"] + assert llama_service["provider_address"] == "0xf07240Efa67755B5311bc75784a061eDB47165Dd" + assert "TEE (TeeML)" in llama_service["verification"] + + deepseek_service = OFFICIAL_0G_SERVICES["deepseek-r1-70b"] + assert deepseek_service["provider_address"] == "0x3feE5a4dd5FDb8a32dDA97Bed899830605dBD9D3" + assert "TEE (TeeML)" in deepseek_service["verification"] + + @pytest.mark.asyncio + async def test_simulate_response(self): + """Test the simulate response method.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [{"role": "user", "content": "Hello"}] + response = await llm._simulate_response(messages) + + assert "choices" in response + assert len(response["choices"]) == 1 + assert "message" in response["choices"][0] + assert "content" in response["choices"][0]["message"] + assert "Hello" in response["choices"][0]["message"]["content"] + + @pytest.mark.asyncio + async def test_simulate_streaming_response(self): + """Test the simulate streaming response method.""" + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key" + ) + + messages = [{"role": "user", "content": "Hello"}] + chunks = [] + + async for chunk in llm._simulate_streaming_response(messages): + chunks.append(chunk) + + assert len(chunks) > 0 + + # Check first chunk + first_chunk = chunks[0] + assert "choices" in first_chunk + assert "delta" in first_chunk["choices"][0] + assert "content" in first_chunk["choices"][0]["delta"] + + # Check last chunk has finish_reason + last_chunk = chunks[-1] + assert last_chunk["choices"][0]["finish_reason"] == "stop" + + def test_additional_kwargs(self): + """Test additional kwargs are properly stored.""" + additional_kwargs = { + "top_p": 0.9, + "frequency_penalty": 0.1, + "presence_penalty": 0.2 + } + + llm = ZeroGLLM( + model="llama-3.3-70b-instruct", + private_key="test_private_key", + additional_kwargs=additional_kwargs + ) + + assert llm.additional_kwargs == additional_kwargs + + def test_custom_parameters(self): + """Test custom parameters are properly set.""" + llm = ZeroGLLM( + model="deepseek-r1-70b", + private_key="test_private_key", + rpc_url="https://custom-rpc.example.com", + context_window=8192, + max_tokens=2048, + temperature=0.7, + timeout=120.0 + ) + + assert llm.model == "deepseek-r1-70b" + assert llm.rpc_url == "https://custom-rpc.example.com" + assert llm.context_window == 8192 + assert llm.max_tokens == 2048 + assert llm.temperature == 0.7 + assert llm.timeout == 120.0 +