diff --git a/.envrc b/.envrc new file mode 100644 index 000000000..8ee86bf24 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +source_env .env diff --git a/agentops/instrumentation/agentic/agno/instrumentor.py b/agentops/instrumentation/agentic/agno/instrumentor.py index 0707f5e54..0913c2554 100644 --- a/agentops/instrumentation/agentic/agno/instrumentor.py +++ b/agentops/instrumentation/agentic/agno/instrumentor.py @@ -115,6 +115,42 @@ def __getattr__(self, name): return getattr(self.original_result, name) +class AsyncStreamingResultWrapper: + """Wrapper for async streaming results that maintains agent span as active throughout iteration.""" + + def __init__(self, original_result, span, agent_id, agent_context, streaming_context_manager): + self.original_result = original_result + self.span = span + self.agent_id = agent_id + self.agent_context = agent_context + self.streaming_context_manager = streaming_context_manager + self._consumed = False + + def __aiter__(self): + """Return async iterator that keeps agent span active during iteration.""" + return self + + async def __anext__(self): + """Async iteration that keeps agent span active.""" + context_token = otel_context.attach(self.agent_context) + try: + item = await self.original_result.__anext__() + return item + except StopAsyncIteration: + # Clean up when iteration is complete + if not self._consumed: + self._consumed = True + self.span.end() + self.streaming_context_manager.remove_context(self.agent_id) + raise + finally: + otel_context.detach(context_token) + + def __getattr__(self, name): + """Delegate attribute access to the original result.""" + return getattr(self.original_result, name) + + def create_streaming_workflow_wrapper(tracer, streaming_context_manager): """Create a streaming-aware wrapper for workflow run methods.""" @@ -442,7 +478,11 @@ async def wrapper(wrapped, instance, args, kwargs): span.set_status(Status(StatusCode.OK)) # Wrap the result to maintain context and end span when complete - if hasattr(result, "__iter__"): + if hasattr(result, "__aiter__"): + return AsyncStreamingResultWrapper( + result, span, agent_id, current_context, streaming_context_manager + ) + elif hasattr(result, "__iter__"): return StreamingResultWrapper(result, span, agent_id, current_context, streaming_context_manager) else: # Not actually streaming, clean up immediately @@ -835,7 +875,9 @@ def wrapper(wrapped, instance, args, kwargs): def create_team_async_wrapper(tracer, streaming_context_manager): """Create an async wrapper for Team methods that establishes the team context.""" - async def wrapper(wrapped, instance, args, kwargs): + def wrapper(wrapped, instance, args, kwargs): + import inspect + # Get team ID for context storage team_id = getattr(instance, "team_id", None) or getattr(instance, "id", None) or id(instance) team_id = str(team_id) @@ -863,17 +905,20 @@ async def wrapper(wrapped, instance, args, kwargs): # Execute the original function within team context context_token = otel_context.attach(current_context) try: - result = await wrapped(*args, **kwargs) - - # For non-streaming, close the span - if not is_streaming: - span.end() - streaming_context_manager.remove_context(team_id) - - return result + result = wrapped(*args, **kwargs) finally: otel_context.detach(context_token) + # For streaming, wrap the result to maintain context + if is_streaming and inspect.isasyncgen(result): + return AsyncStreamingResultWrapper(result, span, team_id, current_context, streaming_context_manager) + elif hasattr(result, "__iter__"): + return StreamingResultWrapper(result, span, team_id, current_context, streaming_context_manager) + else: + span.end() + streaming_context_manager.remove_context(team_id) + return result + except Exception as e: span.set_status(Status(StatusCode.ERROR, str(e))) span.record_exception(e) diff --git a/app/.env.example b/app/.env.example index ead146b32..48316a945 100644 --- a/app/.env.example +++ b/app/.env.example @@ -49,12 +49,19 @@ SUPABASE_S3_ACCESS_KEY_ID= SUPABASE_S3_SECRET_ACCESS_KEY= # ClickHouse -CLICKHOUSE_HOST=YOUR_CLICKHOUSE_HOST -CLICKHOUSE_PORT=8443 +# ClickHouse Configuration +# For LOCAL development, use: +# CLICKHOUSE_HOST=127.0.0.1 +# CLICKHOUSE_PORT=8123 +# CLICKHOUSE_SECURE=false +# CLICKHOUSE_PASSWORD=password +# For CLOUD deployment, use your ClickHouse Cloud credentials: +CLICKHOUSE_HOST=127.0.0.1 # Change to YOUR_CLICKHOUSE_HOST for cloud +CLICKHOUSE_PORT=8123 # Change to 8443 for cloud CLICKHOUSE_USER=default -CLICKHOUSE_PASSWORD= +CLICKHOUSE_PASSWORD=password # Set your password CLICKHOUSE_DATABASE=otel_2 -CLICKHOUSE_SECURE=true +CLICKHOUSE_SECURE=false # Change to true for cloud CLICKHOUSE_ENDPOINT= CLICKHOUSE_USERNAME= diff --git a/app/README.md b/app/README.md index 517c78154..4670d0405 100644 --- a/app/README.md +++ b/app/README.md @@ -1,3 +1,61 @@ +Restart local stack and verify + +- Restart services: + docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml down --remove-orphans + docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml up -d + +- Check logs: + docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml logs --since=90s api + docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml logs --since=90s dashboard + docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml logs --since=90s otelcollector + docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml logs --since=90s clickhouse + +- Open dashboard: + http://localhost:3000/signin + +- Generate a trace (example): + AGENTOPS_API_KEY=<key> \ + AGENTOPS_API_ENDPOINT=http://localhost:8000 \ + AGENTOPS_APP_URL=http://localhost:3000 \ + AGENTOPS_EXPORTER_ENDPOINT=http://localhost:4318/v1/traces \ + OPENAI_API_KEY=<openai_key> \ + python examples/openai/openai_example_sync.py + +- Verify ClickHouse and dashboard: + curl -s -u default:password "http://localhost:8123/?query=SELECT%20count()%20FROM%20otel_2.otel_traces%20WHERE%20TraceId%20=%20'<TRACE_ID>'" + + http://localhost:3000/traces?trace_id=<TRACE_ID> + + +Local ClickHouse (self-hosted) +- Set in .env: + CLICKHOUSE_HOST=127.0.0.1 + CLICKHOUSE_PORT=8123 + CLICKHOUSE_USER=default + CLICKHOUSE_PASSWORD=password + CLICKHOUSE_DATABASE=otel_2 + CLICKHOUSE_SECURE=false + CLICKHOUSE_ENDPOINT=http://clickhouse:8123 + CLICKHOUSE_USERNAME=default + +- Start services (includes otelcollector + local ClickHouse): + docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml up -d + +- Initialize ClickHouse schema: + curl -u default:password 'http://localhost:8123/?query=CREATE%20DATABASE%20IF%20NOT%20EXISTS%20otel_2' + curl --data-binary @app/clickhouse/migrations/0000_init.sql -u default:password 'http://localhost:8123/?query=' + +- Run example with local OTLP exporter: + AGENTOPS_API_KEY= \ + AGENTOPS_API_ENDPOINT=http://localhost:8000 \ + AGENTOPS_APP_URL=http://localhost:3000 \ + AGENTOPS_EXPORTER_ENDPOINT=http://localhost:4318/v1/traces \ + OPENAI_API_KEY= \ + python examples/openai/openai_example_sync.py + +- Verify: + - Dashboard: http://localhost:3000/traces?trace_id= + - CH rows: curl -s -u default:password "http://localhost:8123/?query=SELECT%20count()%20FROM%20otel_2.otel_traces%20WHERE%20TraceId%20=%20''" # AgentOps [![License: ELv2](https://img.shields.io/badge/License-ELv2-blue.svg)](https://www.elastic.co/licensing/elastic-license) @@ -312,6 +370,7 @@ AgentOps requires several external services. Here's how to set them up: ``` 4. Run migrations: `supabase db push` +For Linux environments with CLI install issues, see docs/local_supabase_linux.md for a manual binary install and env mapping steps. **Option B: Cloud Supabase** 1. Create a new project at [supabase.com](https://supabase.com) diff --git a/app/SETUP_GUIDE.md b/app/SETUP_GUIDE.md new file mode 100644 index 000000000..cfa49556e --- /dev/null +++ b/app/SETUP_GUIDE.md @@ -0,0 +1,559 @@ +# AgentOps Self-Hosted Setup Guide + +Complete step-by-step instructions for setting up AgentOps locally from scratch with authentication and end-to-end trace demonstration. + +## Prerequisites + +Before starting, ensure you have the following installed on your system: + +- **Docker & Docker Compose** ([Download](https://www.docker.com/get-started)) +- **Python 3.12+** ([Download](https://www.python.org/downloads/)) +- **Node.js 18+** ([Download](https://nodejs.org/)) +- **Git** ([Download](https://git-scm.com/downloads)) +- **curl** (usually pre-installed on Linux/macOS) + +### Optional but Recommended: +- **Bun** for faster Node.js package management ([Install Bun](https://bun.sh/)) +- **uv** for faster Python package management ([Install uv](https://github.com/astral-sh/uv)) + +## Step 1: Clone and Navigate to Repository + +```bash +# Clone the repository +git clone https://github.com/AgentOps-AI/agentops.git + +# Navigate to the app directory (this is your working directory for all subsequent steps) +cd agentops/app +``` + +### Directory Structure Overview + +After cloning, your directory structure should look like this: +``` +agentops/ +├── app/ # Main application directory (your working directory) +│ ├── .env # Main environment configuration (you'll edit this) +│ ├── compose.yaml # Docker Compose for core services +│ ├── api/ # FastAPI backend +│ │ ├── run.py # API server startup script +│ │ └── pyproject.toml # Python dependencies +│ ├── dashboard/ # Next.js frontend +│ │ ├── .env.local # Dashboard environment (you'll create this) +│ │ └── package.json # Node.js dependencies +│ ├── clickhouse/ +│ │ └── migrations/ +│ │ └── 0000_init.sql # ClickHouse schema +│ ├── opentelemetry-collector/ +│ │ └── compose.yaml # OpenTelemetry collector service +│ └── supabase/ +│ ├── config.toml # Supabase configuration +│ └── seed.sql # Database seed data (contains test user) +├── examples/ # Example scripts for testing +│ └── openai/ +│ └── openai_example_sync.py +└── docs/ # Documentation +``` + +**Important**: All subsequent commands should be run from the `agentops/app/` directory unless otherwise specified. + +## Step 2: Install Supabase CLI + +```bash +# Install Supabase CLI +npm install -g supabase + +# Verify installation +supabase --version +``` + +## Step 3: Start Local Supabase + +```bash +# Initialize and start Supabase locally +supabase start + +# This will output connection details - save these for later: +# - API URL: http://127.0.0.1:54321 +# - anon key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9... +# - service_role key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9... +``` + +## Step 4: Configure Environment Variables + +Start from the example file and update for local development: + +```bash +# Copy the example file +cp .env.example .env + +# The example file now includes LOCAL settings by default +# Just verify ClickHouse is set for local (not cloud): +grep CLICKHOUSE .env +``` + +Update `app/.env` with the following configuration. + +**IMPORTANT**: If an `app/api/.env` file exists, you must also ensure it contains the `PROTOCOL=http` variable for local development, otherwise authentication cookies will not work properly: + +```bash +# Dashboard (Next.js) +NEXT_PUBLIC_SUPABASE_URL=http://127.0.0.1:54321 +NEXT_PUBLIC_SUPABASE_ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0 +SUPABASE_SERVICE_ROLE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU +SUPABASE_PROJECT_ID=YOUR_PROJECT_ID + +NEXT_PUBLIC_API_URL=http://localhost:8000 +NEXT_PUBLIC_OTEL_ENDPOINT=http://localhost:4318/v1/traces +NEXT_PUBLIC_SITE_URL=http://localhost:3000 + +NEXT_PUBLIC_ENVIRONMENT_TYPE=development +NEXT_PUBLIC_PLAYGROUND=false + +# Optional analytics/monitoring +NEXT_PUBLIC_POSTHOG_KEY= +NEXT_PUBLIC_POSTHOG_HOST= +NEXT_PUBLIC_SENTRY_DSN= +NEXT_PUBLIC_SENTRY_ORG= +NEXT_PUBLIC_SENTRY_PROJECT= + +# API Server +API_HOST=0.0.0.0 +API_PORT=8000 +API_WORKERS=1 +API_RELOAD=true +API_LOG_LEVEL=info + +# Logging +LOG_LEVEL=INFO +SQLALCHEMY_LOG_LEVEL=WARNING + +# Auth - CRITICAL: Must match Supabase JWT secret +JWT_SECRET_KEY=super-secret-jwt-token-with-at-least-32-characters-long + +# Supabase DB + Storage +SUPABASE_URL=http://127.0.0.1:54321 +SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU +SUPABASE_MAX_POOL_SIZE=10 +SUPABASE_HOST=127.0.0.1 +SUPABASE_PORT=54322 +SUPABASE_DATABASE=postgres +SUPABASE_USER=postgres +SUPABASE_PASSWORD=postgres +SUPABASE_S3_BUCKET=user-uploads +SUPABASE_S3_LOGS_BUCKET=agentops-logs +SUPABASE_S3_ACCESS_KEY_ID= +SUPABASE_S3_SECRET_ACCESS_KEY= + +# ClickHouse +CLICKHOUSE_HOST=127.0.0.1 +CLICKHOUSE_PORT=8123 +CLICKHOUSE_USER=default +CLICKHOUSE_PASSWORD=password +CLICKHOUSE_DATABASE=otel_2 +CLICKHOUSE_SECURE=false +CLICKHOUSE_ENDPOINT=http://clickhouse:8123 +CLICKHOUSE_USERNAME=default + +# Stripe (server-side, optional) +STRIPE_SECRET_KEY= +STRIPE_WEBHOOK_SECRET= + +# Redis +REDIS_URL=redis://localhost:6379 + +# OpenTelemetry +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf +OTEL_SERVICE_NAME=agentops-api +OTEL_RESOURCE_ATTRIBUTES=service.name=agentops-api,service.version=1.0.0 +``` + +## Step 5: Configure Dashboard Environment + +Create `app/dashboard/.env.local`: + +```bash +NEXT_PUBLIC_SUPABASE_URL=http://127.0.0.1:54321 +NEXT_PUBLIC_SUPABASE_ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0 +NEXT_PUBLIC_API_URL=http://localhost:8000 +NEXT_PUBLIC_PLAYGROUND=false +``` + +## Step 6: Start Services with Docker Compose + +```bash +# Start all services (ClickHouse, Redis, OpenTelemetry Collector) +# Use both compose files to include OpenTelemetry collector +docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml up -d + +# Verify services are running +docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml ps + +# Expected services: +# - app-clickhouse-1 (running) +# - app-redis-1 (running) +# - app-otelcollector-1 (running) +``` + +## Step 7: Set Up ClickHouse Schema + +```bash +# Wait for ClickHouse to be ready +sleep 15 + +# Method 1: Using curl (recommended) +curl -u default:password 'http://localhost:8123/?query=CREATE%20DATABASE%20IF%20NOT%20EXISTS%20otel_2' +curl --data-binary @clickhouse/migrations/0000_init.sql -u default:password 'http://localhost:8123/?query=' + +# Method 2: Using docker exec (alternative) +# docker exec -it app-clickhouse-1 clickhouse-client --query "CREATE DATABASE IF NOT EXISTS otel_2" +# docker exec -i app-clickhouse-1 clickhouse-client --database=otel_2 < clickhouse/migrations/0000_init.sql + +# Verify ClickHouse setup +curl -s -u default:password "http://localhost:8123/?query=SHOW%20TABLES%20FROM%20otel_2" + +# Expected output should include tables like: +# - otel_traces +# - otel_spans +# - otel_logs +``` + +## Step 8: Apply Supabase Migrations and Seed Data + +```bash +# Apply database migrations +supabase db reset + +# Verify seed data was applied +supabase db dump --data-only --table=users +``` + +## Step 9: Start API Server + +**CRITICAL**: The API requires its own `.env` file at `api/.env` with the CORRECT ClickHouse settings. If this file exists with wrong settings (e.g., pointing to cloud ClickHouse), traces won't appear even if they're stored locally. + +**⚠️ Common Issue**: If you pulled this repo and `api/.env` already exists, it may have cloud ClickHouse settings. You MUST update it for local development: + +```bash +# Check if api/.env exists +ls api/.env + +# Option 1: Copy from the example file (RECOMMENDED for new setup) +cp api/.env.example api/.env + +# Option 2: Create a symlink to the main .env: +ln -s ../.env api/.env + +# Option 3: Create api/.env manually with these minimum required variables: +cat > api/.env << 'EOF' +PROTOCOL=http +APP_DOMAIN=localhost:3000 +API_DOMAIN=localhost:8000 +SUPABASE_URL=http://127.0.0.1:54321 +SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU +JWT_SECRET_KEY=super-secret-jwt-token-with-at-least-32-characters-long +AUTH_COOKIE_SECRET=super-secret-cookie-token-with-at-least-32-characters-long +SUPABASE_HOST=127.0.0.1 +SUPABASE_PORT=54322 +SUPABASE_DATABASE=postgres +SUPABASE_USER=postgres +SUPABASE_PASSWORD=postgres +CLICKHOUSE_HOST=127.0.0.1 +CLICKHOUSE_PORT=8123 +CLICKHOUSE_USER=default +CLICKHOUSE_PASSWORD=password +CLICKHOUSE_DATABASE=otel_2 +CLICKHOUSE_SECURE=false +EOF +``` + +```bash +# Navigate to API directory +cd api + +# Install Python dependencies +pip install -e . + +# Start the API server +python run.py +``` + +The API server should start on `http://localhost:8000`. Look for these log messages: +- `INFO: Started server process` +- `INFO: Uvicorn running on http://0.0.0.0:8000` +- `INFO: Application startup complete` + +**Important**: Keep this terminal open - the API server must remain running. + +### Verify API Server +Test the API server is working: +```bash +# In a new terminal +curl http://localhost:8000/health +# Expected: {"status": "healthy"} +``` + +## Step 10: Start Dashboard + +```bash +# In a new terminal, navigate to dashboard directory +cd dashboard + +# Install Node.js dependencies +npm install +# OR if using bun: bun install + +# Start the dashboard in development mode +npm run dev +# OR if using bun: bun run dev +``` + +The dashboard should start on `http://localhost:3000`. Look for these log messages: +- `Ready - started server on 0.0.0.0:3000` +- `Local: http://localhost:3000` + +**Important**: Keep this terminal open - the dashboard must remain running. + +### Verify Dashboard +Open your browser and navigate to `http://localhost:3000` - you should see the AgentOps login page. + +## Step 11: Verify Authentication + +1. Navigate to `http://localhost:3000/signin` +2. **CRITICAL**: Use the correct seed data credentials: + - **Email**: `test@agentops.ai` + - **Password**: `password` +3. You should be redirected to the dashboard after successful login + +**Note**: Do NOT use `demo@agentops.ai` - this user doesn't exist in the seed data and will cause authorization failures. + +## Step 12: Test Trace Generation + +Create a test script `test_trace.py`: + +```python +#!/usr/bin/env python3 + +import agentops +import os +import time + +# Configure AgentOps for local setup +os.environ["AGENTOPS_API_KEY"] = "6b7a1469-bdcb-4d47-85ba-c4824bc8486e" # From seed data +os.environ["AGENTOPS_API_ENDPOINT"] = "http://localhost:8000" +os.environ["AGENTOPS_APP_URL"] = "http://localhost:3000" +os.environ["AGENTOPS_EXPORTER_ENDPOINT"] = "http://localhost:4318/v1/traces" + +def test_agentops_trace(): + """Test AgentOps trace generation""" + try: + print("🚀 Starting AgentOps trace test...") + + agentops.init(auto_start_session=True, trace_name="Test Trace", tags=["local-test"]) + print("✓ AgentOps initialized successfully") + + tracer = agentops.start_trace(trace_name="Test Trace", tags=["test"]) + print("✓ Trace started successfully") + + print("📝 Simulating work...") + time.sleep(2) + + agentops.end_trace(tracer, end_state="Success") + print("✓ Trace ended successfully") + + print("\n🎉 AgentOps trace test completed successfully!") + print("🖇 Check the AgentOps output above for the session replay URL") + + return True + + except Exception as e: + print(f"❌ Error during trace test: {e}") + return False + +if __name__ == "__main__": + success = test_agentops_trace() + if success: + print("\n✅ Trace test passed!") + else: + print("\n❌ Trace test failed!") +``` + +Run the test: + +```bash +python test_trace.py +``` + +## Step 13: Verify End-to-End Trace Flow + +1. Run the test script above +2. Note the session URL in the output (e.g., `http://localhost:3000/sessions?trace_id=...`) +3. Navigate to that URL in your browser while logged in as `test@agentops.ai` +4. You should see the trace details including duration, costs, and timeline + +## Step 14: Test with OpenAI Example (Optional) + +If you have an OpenAI API key, test with a real LLM call: + +```python +#!/usr/bin/env python3 + +import os +from openai import OpenAI +import agentops + +# Configure environment +os.environ['AGENTOPS_API_KEY'] = '6b7a1469-bdcb-4d47-85ba-c4824bc8486e' +os.environ['AGENTOPS_API_ENDPOINT'] = 'http://localhost:8000' +os.environ['AGENTOPS_APP_URL'] = 'http://localhost:3000' +os.environ['AGENTOPS_EXPORTER_ENDPOINT'] = 'http://localhost:4318/v1/traces' +os.environ['OPENAI_API_KEY'] = 'your-openai-api-key-here' + +agentops.init(auto_start_session=True, trace_name="OpenAI Test") +tracer = agentops.start_trace(trace_name="OpenAI Test", tags=["openai", "test"]) + +client = OpenAI() + +try: + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Write a short story about AI."}], + ) + + print("Generated story:") + print(response.choices[0].message.content) + + agentops.end_trace(tracer, end_state="Success") + print("✅ OpenAI test completed successfully!") + +except Exception as e: + print(f"❌ Error: {e}") + agentops.end_trace(tracer, end_state="Error") +``` + +## Troubleshooting + +### Traces Not Showing in Dashboard Despite Successful Ingestion + +If your test script says traces were sent successfully but they don't appear in the dashboard: + +1. **Check the API's ClickHouse configuration**: + ```bash + grep CLICKHOUSE api/.env + ``` + Should show LOCAL settings (127.0.0.1, port 8123, not cloud URLs) + +2. **Verify traces are in ClickHouse**: + ```bash + docker exec app-clickhouse-1 clickhouse-client --user default --password password \ + -q "SELECT TraceId, project_id FROM otel_2.otel_traces" + ``` + +3. **Ensure you're viewing the correct project**: The test data goes to "test_project", not "Default Project" + +4. **Fix api/.env if needed** (see Step 9 above) + +### Common Issues + +1. **Authentication not working / Can't log in** + - **CRITICAL**: Ensure `api/.env` file exists with `PROTOCOL=http` for local development + - Check cookies are being set without `Secure` flag: `curl -v -X POST http://localhost:8000/auth/login ...` + - Clear browser cookies for localhost:3000 and try again + - Verify Redis is running for session storage: `docker run -d --name redis -p 6379:6379 redis:alpine` + - Ensure both `JWT_SECRET_KEY` and `AUTH_COOKIE_SECRET` are set in `api/.env` + +2. **"Project not found" errors in dashboard** + - Ensure you're logged in as `test@agentops.ai` (not `demo@agentops.ai`) + - Verify the API key `6b7a1469-bdcb-4d47-85ba-c4824bc8486e` is being used + +3. **JWT signature verification errors** + - Ensure `JWT_SECRET_KEY=super-secret-jwt-token-with-at-least-32-characters-long` in both `.env` files + - Restart the API server after changing JWT secret + +4. **ClickHouse connection errors** + - Verify ClickHouse is running: `docker-compose ps` + - Check ClickHouse logs: `docker-compose logs clickhouse` + +5. **Traces not appearing in dashboard** + - Verify you're using the correct user credentials + - Check API server logs for authorization errors + - Ensure OpenTelemetry collector is running + +### Service Status Commands + +```bash +# Check all services +docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml ps + +# View logs for specific services +docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml logs --since=90s clickhouse +docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml logs --since=90s redis +docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml logs --since=90s otelcollector + +# Check Supabase status +supabase status + +# Test ClickHouse connection +curl -s -u default:password "http://localhost:8123/?query=SELECT%201" +# Expected output: 1 + +# Test Redis connection +docker exec -it app-redis-1 redis-cli ping +# Expected output: PONG + +# Restart all services if needed +docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml down --remove-orphans +docker compose -f compose.yaml -f opentelemetry-collector/compose.yaml up -d +``` + +### Key Configuration Points + +1. **JWT Secret**: Must match between Supabase and API server +2. **API Key**: Use `6b7a1469-bdcb-4d47-85ba-c4824bc8486e` from seed data +3. **User Credentials**: Use `test@agentops.ai` / `password` from seed data +4. **Playground Mode**: Must be disabled (`NEXT_PUBLIC_PLAYGROUND=false`) + +## Success Criteria + +When everything is working correctly, you should be able to: + +1. ✅ **Authentication**: Log into dashboard at `http://localhost:3000/signin` with `test@agentops.ai` / `password` +2. ✅ **Trace Generation**: Run AgentOps SDK scripts that generate traces and session URLs +3. ✅ **Dashboard Access**: View traces in dashboard with full details (duration, costs, timeline) +4. ✅ **Data Visualization**: See LLM calls, tool usage, and other telemetry data in waterfall view +5. ✅ **Navigation**: Switch between different trace views (waterfall, terminal logs) +6. ✅ **End-to-End Flow**: Complete workflow from trace generation to dashboard visualization + +## Final Verification Checklist + +Before considering the setup complete, verify each component: + +- [ ] **Supabase**: `supabase status` shows all services running +- [ ] **Docker Services**: All containers (ClickHouse, Redis, OpenTelemetry) are running +- [ ] **ClickHouse**: Database `otel_2` exists with required tables +- [ ] **API Server**: Responds to health checks at `http://localhost:8000/health` +- [ ] **Dashboard**: Loads at `http://localhost:3000` without errors +- [ ] **Authentication**: Can log in with `test@agentops.ai` / `password` +- [ ] **Trace Generation**: Test script runs without errors and outputs session URL +- [ ] **Trace Viewing**: Can navigate to trace URL and see detailed trace data +- [ ] **No Authorization Errors**: No "Project not found" or JWT signature errors in logs + +The setup is complete when you can perform the complete end-to-end trace generation and visualization workflow without any authorization, persistence, or configuration errors. + +## Next Steps + +Once your self-hosted AgentOps is working: + +1. **Integrate with your applications**: Use the API key `6b7a1469-bdcb-4d47-85ba-c4824bc8486e` in your AI applications +2. **Customize configuration**: Modify environment variables for your specific needs +3. **Set up production deployment**: Consider using production-grade databases and security configurations +4. **Monitor performance**: Use the dashboard to track your AI application performance and costs + +## Support + +If you encounter issues not covered in the troubleshooting section: + +1. Check all service logs for error messages +2. Verify all environment variables are set correctly +3. Ensure all services are running and accessible +4. Review the AgentOps documentation for additional configuration options diff --git a/app/api/agentops/api/db/clickhouse_client.py b/app/api/agentops/api/db/clickhouse_client.py index 359a76e38..9064ec118 100644 --- a/app/api/agentops/api/db/clickhouse_client.py +++ b/app/api/agentops/api/db/clickhouse_client.py @@ -13,6 +13,7 @@ CLICKHOUSE_PASSWORD, CLICKHOUSE_PORT, CLICKHOUSE_USER, + CLICKHOUSE_SECURE, ) # Global variables to store client instances @@ -37,7 +38,7 @@ class ConnectionConfig: database: str = CLICKHOUSE_DATABASE username: str = CLICKHOUSE_USER password: str = CLICKHOUSE_PASSWORD - secure: bool = True + secure: bool = CLICKHOUSE_SECURE def __init__(self) -> None: """Non-instantiable class has a lower chance of being printed.""" diff --git a/app/api/agentops/api/environment.py b/app/api/agentops/api/environment.py index 0831a4c6c..9a8c6cedc 100644 --- a/app/api/agentops/api/environment.py +++ b/app/api/agentops/api/environment.py @@ -18,6 +18,7 @@ CLICKHOUSE_USER: str = os.getenv("CLICKHOUSE_USER", "") CLICKHOUSE_PASSWORD: str = os.getenv("CLICKHOUSE_PASSWORD", "") CLICKHOUSE_DATABASE: str = os.getenv("CLICKHOUSE_DATABASE", "") +CLICKHOUSE_SECURE: bool = os.getenv("CLICKHOUSE_SECURE", "false").lower() in ("1", "true", "yes") PROFILING_ENABLED: bool = os.environ.get("PROFILING_ENABLED", "false").lower() == "true" diff --git a/app/api/agentops/api/routes/v2.py b/app/api/agentops/api/routes/v2.py index ad506ee48..50d6f446a 100644 --- a/app/api/agentops/api/routes/v2.py +++ b/app/api/agentops/api/routes/v2.py @@ -83,7 +83,7 @@ async def create_session(request: Request, supabase: AsyncSupabaseClient): except ExpiredJWTError: logger.warning("Expired JWT") return JSONResponse({"path": request.url.path, "message": "Expired Token"}, status_code=401) - except InvalidAPIKeyError as e: + except InvalidAPIKeyError: # This is a user error (invalid API key format), not a system error # Log as warning to avoid Sentry alerts try: @@ -93,7 +93,7 @@ async def create_session(request: Request, supabase: AsyncSupabaseClient): ) except Exception: logger.warning(f"{request.url.path}: Invalid API key format") - + return JSONResponse( { "path": request.url.path, @@ -151,8 +151,8 @@ async def v2_reauthorize_jwt(request: Request, supabase: AsyncSupabaseClient): token = generate_jwt(data["session_id"], jwt_secret) logger.info(colored(f"Completed request for session: {data['session_id']}", "yellow")) return JSONResponse({"status": "Success", "jwt": token}) - - except InvalidAPIKeyError as e: + + except InvalidAPIKeyError: # This is a user error (invalid API key format), not a system error # Log as warning to avoid Sentry alerts try: @@ -162,7 +162,7 @@ async def v2_reauthorize_jwt(request: Request, supabase: AsyncSupabaseClient): ) except Exception: logger.warning(f"{request.url.path}: Invalid API key format") - + return JSONResponse( { "path": request.url.path, @@ -252,7 +252,7 @@ async def v2_create_session(request: Request, supabase: AsyncSupabaseClient): except ExpiredJWTError: logger.warning("Expired JWT") return JSONResponse({"path": str(request.url.path), "message": "Expired Token"}, status_code=401) - except InvalidAPIKeyError as e: + except InvalidAPIKeyError: # This is a user error (invalid API key format), not a system error # Log as warning to avoid Sentry alerts try: @@ -262,7 +262,7 @@ async def v2_create_session(request: Request, supabase: AsyncSupabaseClient): ) except Exception: logger.warning(f"{request.url.path}: Invalid API key format") - + return JSONResponse( { "path": request.url.path, @@ -784,7 +784,7 @@ async def v2_get_session_stats(session_id: str, request: Request, supabase: Asyn return JSONResponse(stats) - except InvalidAPIKeyError as e: + except InvalidAPIKeyError: # This is a user error (invalid API key format), not a system error # Log as warning to avoid Sentry alerts logger.warning(f"{request.url.path}: Invalid API key format for session {session_id}") @@ -837,7 +837,7 @@ async def v2_export_session(session_id: str, request: Request, supabase: AsyncSu return JSONResponse(export_data) - except InvalidAPIKeyError as e: + except InvalidAPIKeyError: # This is a user error (invalid API key format), not a system error # Log as warning to avoid Sentry alerts logger.warning(f"{request.url.path}: Invalid API key format for session {session_id}") diff --git a/app/api/agentops/auth/app.py b/app/api/agentops/auth/app.py index 2a70a039e..2d881e162 100644 --- a/app/api/agentops/auth/app.py +++ b/app/api/agentops/auth/app.py @@ -91,4 +91,4 @@ router = APIRouter(route_class=AuthenticatedRoute) register_routes(router, route_config, prefix="/auth") -app.include_router(router) \ No newline at end of file +app.include_router(router) diff --git a/app/api/agentops/deploy/views/setup.py b/app/api/agentops/deploy/views/setup.py index 4498492ba..19aef9805 100644 --- a/app/api/agentops/deploy/views/setup.py +++ b/app/api/agentops/deploy/views/setup.py @@ -80,11 +80,11 @@ async def __call__( "Authorization": f"Bearer {project.github_oath_access_token}", "Accept": "application/vnd.github+json", } - + all_repos = [] page = 1 per_page = 100 # Maximum allowed by GitHub API - + try: while True: params = { @@ -93,15 +93,15 @@ async def __call__( "sort": "created", # Sort by creation date "direction": "desc" # Most recent first } - + resp = requests.get("https://api.github.com/user/repos", headers=headers, params=params) resp.raise_for_status() repos = resp.json() - + # If no repos returned, we've reached the end if not repos: break - + # Add repos from this page all_repos.extend([ { @@ -114,14 +114,14 @@ async def __call__( } for repo in repos ]) - + # If we got fewer repos than per_page, we've reached the end if len(repos) < per_page: break - + page += 1 - + return all_repos - + except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to fetch repos from GitHub: {str(e)}") \ No newline at end of file + raise HTTPException(status_code=500, detail=f"Failed to fetch repos from GitHub: {str(e)}") diff --git a/app/api/agentops/public/agent/__init__.py b/app/api/agentops/public/agent/__init__.py index d8380b0bc..fb7bd1dfd 100644 --- a/app/api/agentops/public/agent/__init__.py +++ b/app/api/agentops/public/agent/__init__.py @@ -1 +1 @@ -# these are the public views for interacting with hosted agents \ No newline at end of file +# these are the public views for interacting with hosted agents diff --git a/app/api/agentops/public/agent/base.py b/app/api/agentops/public/agent/base.py index cefffc388..c6a9a7eee 100644 --- a/app/api/agentops/public/agent/base.py +++ b/app/api/agentops/public/agent/base.py @@ -5,8 +5,7 @@ from fastapi import Request, HTTPException, Depends from agentops.common.route_config import BaseView from agentops.common.orm import Session, get_orm_session -from agentops.api.auth import JWTPayload, verify_jwt -from agentops.opsboard.models import BaseProjectModel, ProjectModel, SparseProjectModel +from agentops.opsboard.models import ProjectModel from agentops.deploy.models import HostingProjectModel @@ -35,7 +34,7 @@ async def create(cls, request: Request) -> "BaseAgentAPIView": # we use a constructor to allow us to execute async methods on creation instance = await super().create(request=request) return instance - + class AuthenticatedByKeyAgentAPIView(BaseAgentAPIView, ABC): """ @@ -59,7 +58,7 @@ async def get_project(self, orm: Session = Depends(get_orm_session)) -> ProjectM self._validate_api_key(api_key) project = ProjectModel.get_by_api_key(orm, api_key) return project - + async def get_hosted_project(self, orm: Session = Depends(get_orm_session)) -> HostingProjectModel: """Get hosted project for authenticated use cases via API key.""" # For API key auth, hosted project is the same as regular project @@ -67,4 +66,4 @@ async def get_hosted_project(self, orm: Session = Depends(get_orm_session)) -> H self._validate_api_key(api_key) project = ProjectModel.get_by_api_key(orm, api_key) hosted_project = HostingProjectModel.get_by_id(orm, project.id) - return hosted_project \ No newline at end of file + return hosted_project diff --git a/app/api/agentops/public/agent/job.py b/app/api/agentops/public/agent/job.py index ab12e6c53..09c2444f5 100644 --- a/app/api/agentops/public/agent/job.py +++ b/app/api/agentops/public/agent/job.py @@ -1,5 +1,5 @@ import pydantic -from fastapi import Depends, HTTPException +from fastapi import Depends from agentops.common.orm import Session, get_orm_session from .base import AuthenticatedByKeyAgentAPIView, BaseResponse from jockey.backend.models.job import Job @@ -21,7 +21,7 @@ class JobResponse(BaseResponse): @classmethod def validate_uuid(cls, v): return str(v) - + @pydantic.field_validator("job", mode="before") @classmethod def to_string(cls, v) -> str: @@ -37,11 +37,11 @@ class KickoffRunView(AuthenticatedByKeyAgentAPIView): async def __call__(self, body: JobRequest, orm: Session = Depends(get_orm_session)) -> JobResponse: job = await self.start_run(body=body, orm=orm) return JobResponse.model_validate(job) - + async def start_run(self, body: JobRequest, orm: Session) -> Job: project = await self.get_project(orm=orm) run_request = RunJobRequest(inputs=body.inputs, callback_url=body.callback_url) - + initiate_run_view = InitiateRunView() initiate_run_view.request = self.request deployment_response = await initiate_run_view.__call__( @@ -49,13 +49,13 @@ async def start_run(self, body: JobRequest, orm: Session) -> Job: body=run_request, orm=orm ) - + job = Job( name=f"agent-job-{deployment_response.job_id}", image_url="", namespace="", ) - + return JobResponse( id=deployment_response.job_id, agent_id=project.id, @@ -73,13 +73,13 @@ class JobStatusView(AuthenticatedByKeyAgentAPIView): async def __call__(self, orm: Session = Depends(get_orm_session)) -> JobResponse: job = await self.get_job(orm=orm) return JobResponse.model_validate(job) - + async def get_job(self, orm: Session) -> Job: """Get job details - implement based on your requirements.""" # This is a placeholder implementation # You'll need to implement this based on how you want to retrieve job information raise NotImplementedError("get_job method not implemented") - + class JobHistoryView(AuthenticatedByKeyAgentAPIView): __name__ = "Get Project" @@ -92,9 +92,9 @@ class JobHistoryView(AuthenticatedByKeyAgentAPIView): async def __call__(self, orm: Session = Depends(get_orm_session)) -> JobResponse: project = await self.get_project(orm=orm) return JobResponse.model_validate(project) - + async def get_project(self, orm: Session) -> Job: """Get project details - implement based on your requirements.""" # This is a placeholder implementation # You'll need to implement this based on how you want to retrieve project information - raise NotImplementedError("get_project method not implemented") \ No newline at end of file + raise NotImplementedError("get_project method not implemented") diff --git a/app/api/tests/auth/test_public_routes.py b/app/api/tests/auth/test_public_routes.py index 1583ecfe4..14e2d4f9f 100644 --- a/app/api/tests/auth/test_public_routes.py +++ b/app/api/tests/auth/test_public_routes.py @@ -14,7 +14,7 @@ # Mark to skip rate limit tests when Redis is not available redis_required = pytest.mark.skipif( - not (REDIS_HOST and REDIS_PORT), + not (REDIS_HOST and REDIS_PORT), reason="Rate limit tests require Redis (REDIS_HOST and REDIS_PORT env vars)" ) @@ -211,7 +211,7 @@ async def __call__(self): # Create an instance and test validation view_instance = TestView(mock_request) - + # The wrapped __call__ method should validate the request with patch('agentops.auth.views._validate_request') as mock_validate: import asyncio @@ -220,10 +220,10 @@ async def __call__(self): mock_validate.assert_called_once_with(mock_request) -@patch("agentops.auth.views.API_URL", "https://api.agentops.ai") +@patch("agentops.auth.views.API_URL", "https://api.agentops.ai") def test_public_route_decorator_invalid_class(): """Test that the public_route decorator raises TypeError for non-BaseView classes.""" - + # Should raise TypeError when decorating a non-BaseView class with pytest.raises(TypeError, match="must inherit from BaseView"): @public_route diff --git a/app/api/tests/common/test_freeplan.py b/app/api/tests/common/test_freeplan.py index 6d6e106c7..58897e7b2 100644 --- a/app/api/tests/common/test_freeplan.py +++ b/app/api/tests/common/test_freeplan.py @@ -156,8 +156,8 @@ def test_combined_exclude_maxlines_and_maxitems(): multiline_text = "line1\nline2\nline3\nline4" test_list = ["item1", "item2", "item3", "item4"] response = CombinedResponse( - excluded_field="excluded", - multiline_field=multiline_text, + excluded_field="excluded", + multiline_field=multiline_text, list_field=test_list, regular_field="regular" ) @@ -245,11 +245,11 @@ def test_start_time_is_none(self): """Test that None start_time returns cutoff and modified=True.""" days_cutoff = 30 result, modified = freeplan_clamp_start_time(None, days_cutoff) - + # Should return cutoff datetime expected_cutoff = datetime.now(timezone.utc) - timedelta(days=days_cutoff) assert abs((result - expected_cutoff).total_seconds()) < 1 - + # Should be marked as modified since None was converted to cutoff assert modified is True @@ -259,13 +259,13 @@ def test_start_time_before_cutoff(self): days_cutoff = 30 # Date older than cutoff (further in the past) old_start_time = now - timedelta(days=days_cutoff + 10) - + result, modified = freeplan_clamp_start_time(old_start_time, days_cutoff) - + # Should return cutoff, not the original old date expected_cutoff = now - timedelta(days=days_cutoff) assert abs((result - expected_cutoff).total_seconds()) < 1 - + # Should be marked as modified since it was clamped assert modified is True @@ -275,12 +275,12 @@ def test_start_time_after_cutoff(self): days_cutoff = 30 # Date newer than cutoff (more recent) recent_start_time = now - timedelta(days=days_cutoff - 5) - + result, modified = freeplan_clamp_start_time(recent_start_time, days_cutoff) - + # Should return the original date unchanged assert result == recent_start_time - + # Should not be marked as modified since no clamping occurred assert modified is False @@ -289,12 +289,12 @@ def test_start_time_at_cutoff(self): now = datetime.now(timezone.utc) days_cutoff = 30 cutoff_start_time = now - timedelta(days=days_cutoff) - + result, modified = freeplan_clamp_start_time(cutoff_start_time, days_cutoff) - + # Should return a date very close to the cutoff date (allowing for timing differences) assert abs((result - cutoff_start_time).total_seconds()) < 1 - + # The timing precision issue means this might be marked as modified due to microsecond differences # So we'll accept either outcome for this edge case assert modified in [True, False] @@ -305,21 +305,21 @@ def test_start_time_with_mocked_time(self, mock_datetime): # Fix the current time fixed_now = datetime(2023, 1, 15, tzinfo=timezone.utc) mock_datetime.now.return_value = fixed_now - + cutoff_days = 30 cutoff = fixed_now - timedelta(days=cutoff_days) - + # Test None case result, modified = freeplan_clamp_start_time(None, cutoff_days) assert result == cutoff assert modified is True - + # Test old date case old_date = fixed_now - timedelta(days=cutoff_days + 5) result, modified = freeplan_clamp_start_time(old_date, cutoff_days) assert result == cutoff assert modified is True - + # Test recent date case recent_date = fixed_now - timedelta(days=cutoff_days - 5) result, modified = freeplan_clamp_start_time(recent_date, cutoff_days) @@ -333,15 +333,15 @@ class TestFreePlanClampEndTime: def test_end_time_is_none(self): """Test that None end_time returns current time and modified=True.""" days_cutoff = 30 - + # Capture current time before the call before_call = datetime.now(timezone.utc) result, modified = freeplan_clamp_end_time(None, days_cutoff) after_call = datetime.now(timezone.utc) - + # Should return current time (within reasonable bounds) assert before_call <= result <= after_call - + # Should be marked as modified since None was converted to current time assert modified is True @@ -351,13 +351,13 @@ def test_end_time_before_cutoff(self): days_cutoff = 30 # Date older than cutoff (further in the past) old_end_time = now - timedelta(days=days_cutoff + 10) - + result, modified = freeplan_clamp_end_time(old_end_time, days_cutoff) - + # Should return cutoff, not the original old date expected_cutoff = now - timedelta(days=days_cutoff) assert abs((result - expected_cutoff).total_seconds()) < 1 - + # Should be marked as modified since it was clamped assert modified is True @@ -367,12 +367,12 @@ def test_end_time_after_cutoff(self): days_cutoff = 30 # Date newer than cutoff (more recent) recent_end_time = now - timedelta(days=days_cutoff - 5) - + result, modified = freeplan_clamp_end_time(recent_end_time, days_cutoff) - + # Should return the original date unchanged assert result == recent_end_time - + # Should not be marked as modified since no clamping occurred assert modified is False @@ -381,12 +381,12 @@ def test_end_time_at_cutoff(self): now = datetime.now(timezone.utc) days_cutoff = 30 cutoff_end_time = now - timedelta(days=days_cutoff) - + result, modified = freeplan_clamp_end_time(cutoff_end_time, days_cutoff) - + # Should return a date very close to the cutoff date (allowing for timing differences) assert abs((result - cutoff_end_time).total_seconds()) < 1 - + # The timing precision issue means this might be marked as modified due to microsecond differences # So we'll accept either outcome for this edge case assert modified in [True, False] @@ -397,21 +397,21 @@ def test_end_time_with_mocked_time(self, mock_datetime): # Fix the current time fixed_now = datetime(2023, 1, 15, tzinfo=timezone.utc) mock_datetime.now.return_value = fixed_now - + cutoff_days = 30 cutoff = fixed_now - timedelta(days=cutoff_days) - + # Test None case - should return current time result, modified = freeplan_clamp_end_time(None, cutoff_days) assert result == fixed_now assert modified is True - + # Test old date case - should be clamped to cutoff old_date = fixed_now - timedelta(days=cutoff_days + 5) result, modified = freeplan_clamp_end_time(old_date, cutoff_days) assert result == cutoff assert modified is True - + # Test recent date case - should not be clamped recent_date = fixed_now - timedelta(days=cutoff_days - 5) result, modified = freeplan_clamp_end_time(recent_date, cutoff_days) diff --git a/app/api/tests/common/test_orm_require_loaded.py b/app/api/tests/common/test_orm_require_loaded.py index 4769acd6d..c8b36c8f0 100644 --- a/app/api/tests/common/test_orm_require_loaded.py +++ b/app/api/tests/common/test_orm_require_loaded.py @@ -14,18 +14,18 @@ class ModelBase(DeclarativeBase): class AuthorModel(ModelBase): """Test model representing an author for integration testing.""" __tablename__ = "test_authors" - + id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) name = Column(String, nullable=False) - + # Relationship to books books = relationship("BookModel", back_populates="author", lazy="raise") - + @require_loaded("books") def get_book_count(self): """Method that requires books to be loaded.""" return len(self.books) - + @require_loaded("books") def get_book_titles(self): """Method that requires books to be loaded to access titles.""" @@ -35,15 +35,15 @@ def get_book_titles(self): class BookModel(ModelBase): """Test model representing a book for integration testing.""" __tablename__ = "test_books" - + id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) title = Column(String, nullable=False) page_count = Column(Integer, default=100) author_id = Column(String, ForeignKey("test_authors.id")) - + # Relationship to author author = relationship("AuthorModel", back_populates="books", lazy="raise") - + @require_loaded("author") def get_author_name(self): """Method that requires author to be loaded.""" @@ -59,12 +59,12 @@ def session(self): engine = create_engine("sqlite:///:memory:", echo=False) Session = sessionmaker(bind=engine) session = Session() - + # Create tables ModelBase.metadata.create_all(bind=engine) - + yield session - + session.close() @pytest.fixture(autouse=True) @@ -73,29 +73,29 @@ def setup_test_data(self, session): # Create test author self.author = AuthorModel(id=str(uuid.uuid4()), name="Test Author") session.add(self.author) - + # Create test books self.book1 = BookModel( id=str(uuid.uuid4()), - title="Book One", + title="Book One", page_count=200, author_id=self.author.id ) self.book2 = BookModel( id=str(uuid.uuid4()), - title="Book Two", + title="Book Two", page_count=150, author_id=self.author.id ) session.add(self.book1) session.add(self.book2) session.commit() - + # Store IDs before expunging self.author_id = self.author.id self.book1_id = self.book1.id self.book2_id = self.book2.id - + # Clear session to ensure fresh loads session.expunge_all() @@ -108,11 +108,11 @@ def test_require_loaded_succeeds_with_preloaded_relationship(self, session): .filter_by(id=self.author_id) .first() ) - + # Should work because books are preloaded book_count = author.get_book_count() assert book_count == 2 - + book_titles = author.get_book_titles() assert "Book One" in book_titles assert "Book Two" in book_titles @@ -121,7 +121,7 @@ def test_require_loaded_fails_without_preloaded_relationship(self, session): """Test that decorator fails when relationship is not preloaded.""" # Load author WITHOUT preloading books author = session.query(AuthorModel).filter_by(id=self.author_id).first() - + # Should fail because books are not preloaded (lazy="raise" by default) with pytest.raises(RuntimeError, match="relationship 'books' not loaded for AuthorModel"): author.get_book_count() @@ -135,7 +135,7 @@ def test_require_loaded_with_joinedload_succeeds(self, session): .filter_by(id=self.author_id) .first() ) - + # Should work because books are loaded book_count = author.get_book_count() assert book_count == 2 @@ -149,7 +149,7 @@ def test_require_loaded_reverse_relationship(self, session): .filter_by(id=self.book1_id) .first() ) - + # Should work because author is preloaded author_name = book.get_author_name() assert author_name == "Test Author" @@ -158,7 +158,7 @@ def test_require_loaded_reverse_relationship_fails_without_preload(self, session """Test decorator fails on reverse relationship without preload.""" # Load book WITHOUT preloading author book = session.query(BookModel).filter_by(id=self.book1_id).first() - + # Should fail because author is not preloaded with pytest.raises(RuntimeError, match="relationship 'author' not loaded for BookModel"): book.get_author_name() @@ -170,10 +170,10 @@ def test_require_loaded_with_multiple_fields(self, session): def get_detailed_info(self): # This method requires books to be loaded return f"Author {self.name} has {len(self.books)} books" - + # Monkey patch the method onto AuthorModel for this test AuthorModel.get_detailed_info = get_detailed_info - + # Load author with books preloaded author = ( session.query(AuthorModel) @@ -181,11 +181,11 @@ def get_detailed_info(self): .filter_by(id=self.author_id) .first() ) - + # Should work because books relationship is loaded info = author.get_detailed_info() assert "Author Test Author has 2 books" == info - + # Clean up the monkey patch delattr(AuthorModel, 'get_detailed_info') @@ -195,11 +195,11 @@ def test_require_loaded_handles_empty_relationships(self, session): author_no_books = AuthorModel(id=str(uuid.uuid4()), name="Author No Books") session.add(author_no_books) session.commit() - + # Store ID before expunging author_no_books_id = author_no_books.id session.expunge_all() - + # Load with books preloaded (but empty) author = ( session.query(AuthorModel) @@ -207,11 +207,11 @@ def test_require_loaded_handles_empty_relationships(self, session): .filter_by(id=author_no_books_id) .first() ) - + # Should work even with empty relationship book_count = author.get_book_count() assert book_count == 0 - + book_titles = author.get_book_titles() assert book_titles == [] @@ -224,12 +224,12 @@ def test_require_loaded_preserves_method_signature(self, session): .filter_by(id=self.author_id) .first() ) - + # Verify the decorated method works correctly assert callable(author.get_book_count) assert author.get_book_count.__name__ == "get_book_count" - + # Verify it returns the expected type and value count = author.get_book_count() assert isinstance(count, int) - assert count == 2 \ No newline at end of file + assert count == 2 diff --git a/app/api/tests/deploy/__init__.py b/app/api/tests/deploy/__init__.py index e9bf83ace..572cd81cc 100644 --- a/app/api/tests/deploy/__init__.py +++ b/app/api/tests/deploy/__init__.py @@ -1 +1 @@ -# Deploy tests package \ No newline at end of file +# Deploy tests package diff --git a/app/api/tests/deploy/test_deployment_integration.py b/app/api/tests/deploy/test_deployment_integration.py index 4686b44da..55af07cf3 100644 --- a/app/api/tests/deploy/test_deployment_integration.py +++ b/app/api/tests/deploy/test_deployment_integration.py @@ -79,7 +79,7 @@ async def test_create_secret_success( # Create the view and call it directly view = CreateUpdateSecretView(mock_request) body = CreateSecretRequest(name="DATABASE_URL", value="postgresql://localhost:5432/test") - + response = await view( project_id="project-123", body=body, @@ -103,7 +103,7 @@ async def test_list_secrets_success( ): # Create the view and call it directly view = ListSecretsView(mock_request) - + response = await view( project_id="project-123", orm=orm_session, @@ -119,13 +119,13 @@ async def test_deployment_not_found(self, mock_request, orm_session): with patch.object(ProjectModel, 'get_by_id', return_value=None): # Create the view and call it directly view = ListSecretsView(mock_request) - + with pytest.raises(HTTPException) as exc_info: await view( project_id="00000000-0000-0000-0000-000000000000", orm=orm_session, ) - + assert exc_info.value.status_code == 404 assert "Project not found" in str(exc_info.value.detail) @@ -155,7 +155,7 @@ async def test_initiate_deployment_success( ): # Create the view and call it directly view = InitiateDeploymentView(mock_request) - + response = await view( project_id="project-123", orm=orm_session, @@ -172,10 +172,10 @@ async def test_deployment_status_success( """Test getting deployment status via view function.""" from datetime import datetime from enum import Enum - + class MockStatus(Enum): SUCCESS = "success" - + # Setup mocks mock_events = [ Mock( @@ -193,7 +193,7 @@ class MockStatus(Enum): ): # Create the view and call it directly view = DeploymentStatusView(mock_request) - + response = await view( project_id="project-123", job_id="test-job-123", @@ -211,7 +211,7 @@ async def test_deployment_status_with_start_date( ): """Test getting deployment status with start date filter via view function.""" from datetime import datetime - + # Setup mocks mock_get_events.return_value = [] @@ -222,7 +222,7 @@ async def test_deployment_status_with_start_date( # Create the view and call it directly view = DeploymentStatusView(mock_request) start_date = datetime.fromisoformat("2024-01-01T12:00:00") - + response = await view( project_id="project-123", job_id="test-job-456", @@ -259,7 +259,7 @@ async def test_create_secret_kubernetes_error( # Create the view and call it directly view = CreateUpdateSecretView(mock_request) body = CreateSecretRequest(name="DATABASE_URL", value="postgresql://localhost:5432/test") - + # The view should let the exception bubble up with pytest.raises(Exception) as exc_info: await view( @@ -267,7 +267,7 @@ async def test_create_secret_kubernetes_error( body=body, orm=orm_session, ) - + assert "Kubernetes API error" in str(exc_info.value) @@ -287,11 +287,11 @@ async def test_deployment_history_success( ): """Test getting deployment history via view function.""" from enum import Enum - + class MockStatus(Enum): SUCCESS = "success" RUNNING = "running" - + # Setup mock jobs mock_jobs = [ { @@ -303,14 +303,14 @@ class MockStatus(Enum): }, { "job_id": "job-456", - "project_id": "project-123", + "project_id": "project-123", "namespace": "test-namespace", "queued_at": "2024-01-01T11:00:00", "config": {}, }, ] mock_get_tasks.return_value = mock_jobs - + # Setup mock status for each job (get_task_status returns BaseEvent) def mock_status_side_effect(job_id): if job_id == "job-123": @@ -324,7 +324,7 @@ def mock_status_side_effect(job_id): message="Deployment in progress", ) return None - + mock_get_status.side_effect = mock_status_side_effect with ( @@ -333,7 +333,7 @@ def mock_status_side_effect(job_id): ): # Create the view and call it directly view = DeploymentHistoryView(mock_request) - + response = await view( project_id="project-123", orm=orm_session, @@ -341,14 +341,14 @@ def mock_status_side_effect(job_id): # Verify response assert len(response.jobs) == 2 - + # Check first job job1 = response.jobs[0] assert job1.id == "job-123" assert job1.queued_at == "2024-01-01T10:00:00" assert job1.status == "success" assert job1.message == "Deployment completed successfully" - + # Check second job job2 = response.jobs[1] assert job2.id == "job-456" @@ -373,7 +373,7 @@ async def test_deployment_history_no_events( { "job_id": "job-789", "project_id": "project-123", - "namespace": "test-namespace", + "namespace": "test-namespace", "queued_at": "2024-01-01T12:00:00", "config": {}, }, @@ -387,7 +387,7 @@ async def test_deployment_history_no_events( ): # Create the view and call it directly view = DeploymentHistoryView(mock_request) - + response = await view( project_id="project-123", orm=orm_session, @@ -419,7 +419,7 @@ async def test_deployment_history_empty_jobs( ): # Create the view and call it directly view = DeploymentHistoryView(mock_request) - + response = await view( project_id="project-123", orm=orm_session, @@ -445,9 +445,8 @@ def mock_project_with_api_key(self): def test_hosting_project_deployment_config_with_fastapi_pack(self, mock_project_with_api_key): """Test that FASTAPI pack creates correct deployment config.""" - from jockey import DeploymentConfig from agentops.deploy.models import HostingProjectModel - + # Create hosting project with FASTAPI pack hosting_project = HostingProjectModel() hosting_project.id = "project-123" @@ -459,11 +458,11 @@ def test_hosting_project_deployment_config_with_fastapi_pack(self, mock_project_ hosting_project.watch_path = None hosting_project.user_callback_url = None hosting_project.project = mock_project_with_api_key - + # Mock list_secrets to return empty list with patch('jockey.list_secrets', return_value=[]): config = hosting_project.deployment_config - + # Verify FASTAPI pack defaults are applied assert config.dockerfile_template == "fastapi-agent" assert config.ports == [8000] @@ -473,9 +472,8 @@ def test_hosting_project_deployment_config_with_fastapi_pack(self, mock_project_ def test_hosting_project_deployment_config_with_crewai_pack(self, mock_project_with_api_key): """Test that CREWAI pack creates correct deployment config.""" - from jockey import DeploymentConfig from agentops.deploy.models import HostingProjectModel - + # Create hosting project with CREWAI pack hosting_project = HostingProjectModel() hosting_project.id = "project-123" @@ -487,11 +485,11 @@ def test_hosting_project_deployment_config_with_crewai_pack(self, mock_project_w hosting_project.watch_path = "src/" hosting_project.user_callback_url = None hosting_project.project = mock_project_with_api_key - + # Mock list_secrets to return empty list with patch('jockey.list_secrets', return_value=[]): config = hosting_project.deployment_config - + # Verify CREWAI pack defaults are applied assert config.dockerfile_template == "crewai-agent" assert config.ports == [8080] @@ -502,9 +500,8 @@ def test_hosting_project_deployment_config_with_crewai_pack(self, mock_project_w def test_hosting_project_deployment_config_with_crewai_job_pack(self, mock_project_with_api_key): """Test that CREWAI_JOB pack creates correct deployment config.""" - from jockey import DeploymentConfig from agentops.deploy.models import HostingProjectModel - + # Create hosting project with CREWAI_JOB pack hosting_project = HostingProjectModel() hosting_project.id = "project-123" @@ -516,11 +513,11 @@ def test_hosting_project_deployment_config_with_crewai_job_pack(self, mock_proje hosting_project.watch_path = "src/" hosting_project.user_callback_url = None hosting_project.project = mock_project_with_api_key - + # Mock list_secrets to return empty list with patch('jockey.list_secrets', return_value=[]): config = hosting_project.deployment_config - + # Verify CREWAI_JOB pack defaults are applied assert config.dockerfile_template == "crewai-job" assert config.ports == [] # No ports for job execution @@ -530,9 +527,8 @@ def test_hosting_project_deployment_config_with_crewai_job_pack(self, mock_proje def test_hosting_project_deployment_config_with_none_pack_fallback(self, mock_project_with_api_key): """Test that None pack_name falls back to FASTAPI.""" - from jockey import DeploymentConfig from agentops.deploy.models import HostingProjectModel - + # Create hosting project with None pack_name hosting_project = HostingProjectModel() hosting_project.id = "project-123" @@ -544,11 +540,11 @@ def test_hosting_project_deployment_config_with_none_pack_fallback(self, mock_pr hosting_project.watch_path = None hosting_project.user_callback_url = None hosting_project.project = mock_project_with_api_key - + # Mock list_secrets to return empty list with patch('jockey.list_secrets', return_value=[]): config = hosting_project.deployment_config - + # Should fall back to FASTAPI defaults assert config.dockerfile_template == "fastapi-agent" assert config.ports == [8000] @@ -556,9 +552,8 @@ def test_hosting_project_deployment_config_with_none_pack_fallback(self, mock_pr def test_hosting_project_deployment_config_with_invalid_pack_raises_error(self, mock_project_with_api_key): """Test that invalid pack_name raises ValueError.""" - from jockey import DeploymentConfig from agentops.deploy.models import HostingProjectModel - + # Create hosting project with invalid pack_name hosting_project = HostingProjectModel() hosting_project.id = "project-123" @@ -570,7 +565,7 @@ def test_hosting_project_deployment_config_with_invalid_pack_raises_error(self, hosting_project.watch_path = None hosting_project.user_callback_url = None hosting_project.project = mock_project_with_api_key - + # Mock list_secrets to return empty list with patch('jockey.list_secrets', return_value=[]): with pytest.raises(ValueError, match="Invalid deployment pack name: INVALID_PACK"): @@ -596,10 +591,10 @@ async def test_initiate_deployment_with_crewai_pack( hosting_project.watch_path = "src/" hosting_project.user_callback_url = None hosting_project.project = mock_project_with_api_key - + # Setup mocks mock_queue.return_value = "job-456" - + with ( patch.object(HostingProjectModel, 'get_by_id', return_value=hosting_project), patch.object(ProjectModel, 'get_by_id', return_value=mock_project_with_api_key), @@ -607,7 +602,7 @@ async def test_initiate_deployment_with_crewai_pack( ): # Create the view and call it directly view = InitiateDeploymentView(mock_request) - + response = await view( project_id="project-123", orm=orm_session, @@ -617,12 +612,12 @@ async def test_initiate_deployment_with_crewai_pack( assert response.success is True assert response.message == "Deployment initiated successfully" assert response.job_id == "job-456" - + # Verify queue_task was called with correct config mock_queue.assert_called_once() call_args = mock_queue.call_args config = call_args[1]["config"] # Get config from kwargs - + # Should have CREWAI pack defaults assert config.dockerfile_template == "crewai-agent" assert config.ports == [8080] @@ -649,10 +644,10 @@ async def test_initiate_deployment_preserves_user_overrides( hosting_project.watch_path = "custom/path/" hosting_project.user_callback_url = "https://custom-callback.com" hosting_project.project = mock_project_with_api_key - + # Setup mocks mock_queue.return_value = "job-789" - + with ( patch.object(HostingProjectModel, 'get_by_id', return_value=hosting_project), patch.object(ProjectModel, 'get_by_id', return_value=mock_project_with_api_key), @@ -660,7 +655,7 @@ async def test_initiate_deployment_preserves_user_overrides( ): # Create the view and call it directly view = InitiateDeploymentView(mock_request) - + response = await view( project_id="project-123", orm=orm_session, @@ -668,17 +663,17 @@ async def test_initiate_deployment_preserves_user_overrides( # Verify response assert response.success is True - + # Verify config has pack defaults but user overrides mock_queue.assert_called_once() call_args = mock_queue.call_args config = call_args[1]["config"] # Get config from kwargs - + # Pack defaults assert config.dockerfile_template == "fastapi-agent" assert config.ports == [8000] assert config.build_files == {} - + # User overrides assert config.repository_url == "https://github.com/test/custom-repo" assert config.branch == "feature-branch" diff --git a/app/api/tests/opsboard/test_auth_user_model.py b/app/api/tests/opsboard/test_auth_user_model.py index 94e718b79..2df3000fc 100644 --- a/app/api/tests/opsboard/test_auth_user_model.py +++ b/app/api/tests/opsboard/test_auth_user_model.py @@ -9,11 +9,11 @@ async def auth_user(orm_session: Session, test_user: UserModel) -> AuthUserModel """Get the auth user corresponding to the test user.""" # Get the auth user that corresponds to our test user auth_user = orm_session.get(AuthUserModel, test_user.id) - + if not auth_user: # This should not happen with proper test setup since auth users should be seeded raise RuntimeError(f"No auth user found for test user ID {test_user.id}. Check seed data.") - + return auth_user @@ -35,7 +35,7 @@ async def test_billing_email_property_with_auth_user(self, orm_session: Session, """Test that billing_email property returns email from auth.users table.""" # Refresh the user to ensure the relationship is loaded orm_session.refresh(test_user) - + # The billing_email should come from auth.users, not public.users # Note: The actual auth email will depend on what's seeded for this test user ID assert test_user.billing_email is not None # Should have an auth email @@ -49,16 +49,16 @@ async def test_billing_email_property_without_auth_user(self, orm_session: Sessi # Get the auth user and temporarily modify its email to None for testing auth_user = test_user.auth_user original_email = auth_user.email - + try: # Temporarily modify the auth user's email in memory only (not persisted) # This simulates the case where auth.users.email is NULL object.__setattr__(auth_user, 'email', None) - + # billing_email should return None when auth email is null assert test_user.billing_email is None assert test_user.email == "test@example.com" # public email remains from fixture - + finally: # Restore original email object.__setattr__(auth_user, 'email', original_email) @@ -67,7 +67,7 @@ async def test_auth_user_relationship_lazy_loading(self, orm_session: Session, t """Test that the auth_user relationship works with lazy loading.""" # Get user without explicitly loading auth_user relationship user = orm_session.get(UserModel, test_user.id) - + # Accessing auth_user should trigger lazy load assert user.auth_user is not None # The auth email should exist (specific value depends on seed data) @@ -79,7 +79,7 @@ async def test_auth_user_model_columns(self, auth_user: AuthUserModel): assert hasattr(auth_user, 'id') assert hasattr(auth_user, 'email') assert hasattr(auth_user, 'created_at') - + # Verify column types are as expected assert isinstance(auth_user.id, uuid.UUID) assert isinstance(auth_user.email, str) @@ -88,7 +88,7 @@ async def test_auth_user_model_read_only(self, auth_user: AuthUserModel): """Test that AuthUserModel prevents modifications.""" # Test that we can read the auth user assert auth_user.email is not None - + # Test that attempting to modify a persistent auth user raises an error with pytest.raises(RuntimeError, match="AuthUserModel is read-only"): - auth_user.email = "modified@example.com" \ No newline at end of file + auth_user.email = "modified@example.com" diff --git a/app/api/tests/opsboard/views/test_projects_missing.py b/app/api/tests/opsboard/views/test_projects_missing.py index b875a2637..9a7524490 100644 --- a/app/api/tests/opsboard/views/test_projects_missing.py +++ b/app/api/tests/opsboard/views/test_projects_missing.py @@ -25,8 +25,8 @@ async def test_regenerate_api_key_not_admin(mock_request, orm_session, test_user # Create a user-org relationship with developer role (not admin or owner) user_org = UserOrgModel( - user_id=test_user.id, - org_id=org.id, + user_id=test_user.id, + org_id=org.id, role=OrgRoles.developer, # Developer role, not admin or owner user_email=test_user.email ) @@ -74,4 +74,4 @@ async def test_regenerate_api_key_project_not_found(mock_request, orm_session): regenerate_api_key(request=mock_request, project_id=non_existent_id, orm=orm_session) assert excinfo.value.status_code == 404 - assert excinfo.value.detail == "Project not found" \ No newline at end of file + assert excinfo.value.detail == "Project not found" diff --git a/app/api/tests/v3/test_jwt_generation.py b/app/api/tests/v3/test_jwt_generation.py index c61dcc1bf..ab88293e2 100644 --- a/app/api/tests/v3/test_jwt_generation.py +++ b/app/api/tests/v3/test_jwt_generation.py @@ -14,16 +14,16 @@ async def test_get_token_valid_api_key(async_app_client): mock_project.id = "test-project-id" mock_project.api_key = UUID("11111111-1111-1111-1111-111111111111") mock_project.org.prem_status.value = "premium" - + # Mock the ProjectModel.get_by_api_key method with patch.object(ProjectModel, 'get_by_api_key', return_value=mock_project): # Mock the generate_jwt function to return a predictable token with patch('agentops.api.routes.v3.generate_jwt', return_value="test.jwt.token"): response = await async_app_client.post( - "/v3/auth/token", + "/v3/auth/token", json={"api_key": "11111111-1111-1111-1111-111111111111"} ) - + assert response.status_code == 200 data = response.json() assert "token" in data @@ -36,10 +36,10 @@ async def test_get_token_valid_api_key(async_app_client): async def test_get_token_invalid_api_key_format(async_app_client): """Test getting a JWT token with an invalid API key format""" response = await async_app_client.post( - "/v3/auth/token", + "/v3/auth/token", json={"api_key": "not-a-uuid"} ) - + assert response.status_code == 400 assert "Invalid API key format" in response.json()["error"] @@ -50,10 +50,10 @@ async def test_get_token_nonexistent_api_key(async_app_client): # Mock the ProjectModel.get_by_api_key method to return None with patch.object(ProjectModel, 'get_by_api_key', return_value=None): response = await async_app_client.post( - "/v3/auth/token", + "/v3/auth/token", json={"api_key": "11111111-1111-1111-1111-111111111111"} ) - + assert response.status_code == 403 assert "Invalid API key" in response.json()["error"] @@ -64,9 +64,9 @@ async def test_get_token_server_error(async_app_client): # Mock the ProjectModel.get_by_api_key method to raise an exception with patch.object(ProjectModel, 'get_by_api_key', side_effect=Exception("Database error")): response = await async_app_client.post( - "/v3/auth/token", + "/v3/auth/token", json={"api_key": "11111111-1111-1111-1111-111111111111"} ) - + assert response.status_code == 500 - assert "Authentication failed" in response.json()["error"] \ No newline at end of file + assert "Authentication failed" in response.json()["error"] diff --git a/app/api/tests/v4/test_logs.py b/app/api/tests/v4/test_logs.py index 4b9ecc335..159b013b5 100644 --- a/app/api/tests/v4/test_logs.py +++ b/app/api/tests/v4/test_logs.py @@ -338,4 +338,4 @@ async def test_get_trace_logs_s3_file_not_found(self, mock_s3_client): await get_trace_logs(request=mock_request, orm=mock_orm, trace_id=trace_id) assert exc_info.value.status_code == status.HTTP_404_NOT_FOUND - assert f"No logs found for trace ID: {trace_id}" in exc_info.value.detail \ No newline at end of file + assert f"No logs found for trace ID: {trace_id}" in exc_info.value.detail diff --git a/app/api/tests/v4/test_objects.py b/app/api/tests/v4/test_objects.py index a4a57d123..3691f0cfe 100644 --- a/app/api/tests/v4/test_objects.py +++ b/app/api/tests/v4/test_objects.py @@ -150,4 +150,4 @@ async def async_stream_generator(): # Verify the complete content was uploaded uploaded_content = mock_s3_client.upload_fileobj.call_args[0][0] uploaded_content.seek(0) - assert uploaded_content.read() == b''.join(chunks) \ No newline at end of file + assert uploaded_content.read() == b''.join(chunks) diff --git a/app/clickhouse/migrations/0001_udfs_and_pricing.sql b/app/clickhouse/migrations/0001_udfs_and_pricing.sql new file mode 100644 index 000000000..34fd489a8 --- /dev/null +++ b/app/clickhouse/migrations/0001_udfs_and_pricing.sql @@ -0,0 +1,40 @@ +CREATE TABLE IF NOT EXISTS otel_2.model_costs_source +( + `model` String, + `prompt_cost_per_1k` Float64, + `completion_cost_per_1k` Float64 +) +ENGINE = MergeTree +ORDER BY model; + +DROP DICTIONARY IF EXISTS otel_2.model_costs_dict; +CREATE DICTIONARY otel_2.model_costs_dict +( + `model` String, + `prompt_cost_per_1k` Float64, + `completion_cost_per_1k` Float64 +) +PRIMARY KEY model +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' DB 'otel_2' TABLE 'model_costs_source')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(COMPLEX_KEY_HASHED()); + +DROP FUNCTION IF EXISTS normalize_model_name; +CREATE FUNCTION normalize_model_name AS model -> + multiIf( + lower(model) = 'sonar-pro', 'perplexity/sonar-pro', + lower(model) = 'sonar', 'perplexity/sonar', + lower(model) + ); + +DROP FUNCTION IF EXISTS calculate_prompt_cost; +CREATE FUNCTION calculate_prompt_cost AS (tokens, model) -> + if((tokens > 0) AND (model != ''), + round((toFloat64(tokens) / 1000) * dictGetOrDefault('model_costs_dict', 'prompt_cost_per_1k', normalize_model_name(model), 0.), 7), + 0.); + +DROP FUNCTION IF EXISTS calculate_completion_cost; +CREATE FUNCTION calculate_completion_cost AS (tokens, model) -> + if((tokens > 0) AND (model != ''), + round((toFloat64(tokens) / 1000) * dictGetOrDefault('model_costs_dict', 'completion_cost_per_1k', normalize_model_name(model), 0.), 7), + 0.); diff --git a/app/clickhouse/migrations/0002_span_counts_mv.sql b/app/clickhouse/migrations/0002_span_counts_mv.sql new file mode 100644 index 000000000..8be895050 --- /dev/null +++ b/app/clickhouse/migrations/0002_span_counts_mv.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS otel_2.trace_span_counts +( + `project_id` String, + `TraceId` String, + `span_count_state` AggregateFunction(count) +) +ENGINE = AggregatingMergeTree +ORDER BY (project_id, TraceId); + +DROP VIEW IF EXISTS otel_2.mv_trace_span_counts; +CREATE MATERIALIZED VIEW otel_2.mv_trace_span_counts +TO otel_2.trace_span_counts +AS +SELECT + ResourceAttributes['agentops.project.id'] AS project_id, + TraceId, + countState() AS span_count_state +FROM otel_2.otel_traces +GROUP BY project_id, TraceId; diff --git a/app/clickhouse/migrations/0003_seed_model_costs.sql b/app/clickhouse/migrations/0003_seed_model_costs.sql new file mode 100644 index 000000000..47b8c1708 --- /dev/null +++ b/app/clickhouse/migrations/0003_seed_model_costs.sql @@ -0,0 +1,8 @@ +INSERT INTO otel_2.model_costs_source (model, prompt_cost_per_1k, completion_cost_per_1k) VALUES +('gpt-4o-mini', 0.00015, 0.00060), +('gpt-4o', 0.00500, 0.01500), +('claude-3-5-sonnet', 0.00300, 0.01500), +('perplexity/sonar', 0.00010, 0.00040), +('perplexity/sonar-pro', 0.00050, 0.00150); + +SYSTEM RELOAD DICTIONARY otel_2.model_costs_dict; diff --git a/app/clickhouse/migrations/0004_seed_model_costs_full.sql b/app/clickhouse/migrations/0004_seed_model_costs_full.sql new file mode 100644 index 000000000..b8f2fb85b --- /dev/null +++ b/app/clickhouse/migrations/0004_seed_model_costs_full.sql @@ -0,0 +1,24 @@ + +INSERT INTO otel_2.model_costs_source (model, prompt_cost_per_1k, completion_cost_per_1k) VALUES +('ai21/jamba-instruct', 0.00100, 0.00400), +('ai21/jamba-1-5-large', 0.00250, 0.01000), +('anthropic/claude-3-5-haiku', 0.00080, 0.00400), +('anthropic/claude-3-5-sonnet', 0.00300, 0.01500), +('anthropic/claude-3-opus', 0.01500, 0.07500), +('anthropic/claude-3-sonnet', 0.00300, 0.01500), +('anthropic/claude-3-haiku', 0.00025, 0.00125), +('openai/gpt-4o', 0.00500, 0.01500), +('openai/gpt-4o-mini', 0.00015, 0.00060), +('openai/gpt-4.1', 0.01000, 0.03000), +('openai/gpt-4o-realtime', 0.01500, 0.06000), +('openai/gpt-3.5-turbo', 0.00050, 0.00150), +('perplexity/sonar', 0.00010, 0.00040), +('perplexity/sonar-pro', 0.00050, 0.00150), +('mistral/mistral-large', 0.00300, 0.01200), +('mistral/mistral-small', 0.00020, 0.00060), +('groq/llama-3.1-8b-instant', 0.00005, 0.00010), +('groq/llama-3.1-70b-versatile', 0.00059, 0.00079), +('google/gemini-1.5-pro', 0.00125, 0.00500), +('google/gemini-1.5-flash', 0.000075, 0.00030); + +SYSTEM RELOAD DICTIONARY otel_2.model_costs_dict; diff --git a/app/compose.yaml b/app/compose.yaml index a9dd8609a..ef5a130c6 100644 --- a/app/compose.yaml +++ b/app/compose.yaml @@ -37,6 +37,10 @@ services: CLICKHOUSE_SECURE: ${CLICKHOUSE_SECURE} CLICKHOUSE_ENDPOINT: ${CLICKHOUSE_ENDPOINT} CLICKHOUSE_USERNAME: ${CLICKHOUSE_USERNAME} + SUPABASE_S3_BUCKET: ${SUPABASE_S3_BUCKET} + SUPABASE_S3_LOGS_BUCKET: ${SUPABASE_S3_LOGS_BUCKET} + SUPABASE_S3_ACCESS_KEY_ID: ${SUPABASE_S3_ACCESS_KEY_ID} + SUPABASE_S3_SECRET_ACCESS_KEY: ${SUPABASE_S3_SECRET_ACCESS_KEY} network_mode: 'host' volumes: - ./api:/app/api diff --git a/app/dashboard/app/providers/posthog-provider.tsx b/app/dashboard/app/providers/posthog-provider.tsx index cff2eec39..6a3f52119 100644 --- a/app/dashboard/app/providers/posthog-provider.tsx +++ b/app/dashboard/app/providers/posthog-provider.tsx @@ -9,7 +9,12 @@ import posthog from 'posthog-js'; import { PostHogProvider as PHProvider } from 'posthog-js/react'; export function PostHogProvider({ children }: { children: React.ReactNode }) { + const isProd = process.env.NODE_ENV === 'production'; + + // Initialise PostHog only in production builds to avoid noisy 401/404 errors useEffect(() => { + if (!isProd) return; + const posthogKey = process.env.NEXT_PUBLIC_POSTHOG_KEY; // Only initialize PostHog if we have a valid key @@ -20,7 +25,12 @@ export function PostHogProvider({ children }: { children: React.ReactNode }) { capture_pageview: false, // Disable automatic pageview capture, as we capture manually }); } - }, []); + }, [isProd]); + + // When not in production, skip rendering the PostHogProvider entirely + if (!isProd) { + return <>{children}; + } return ( diff --git a/app/deploy/jockey/__main__.py b/app/deploy/jockey/__main__.py index 0b0783bb4..4ef87a099 100644 --- a/app/deploy/jockey/__main__.py +++ b/app/deploy/jockey/__main__.py @@ -158,13 +158,13 @@ def setup_builder(): """Setup builder namespace and create AWS credentials secret from local AWS CLI.""" try: from .environment import KUBECONFIG_PATH, AWS_PROFILE - + click.echo("🔧 Setting up builder namespace and AWS credentials...") - + # Create builder namespace cmd = ['kubectl', '--kubeconfig', KUBECONFIG_PATH, 'create', 'namespace', BUILDER_NAMESPACE] result = subprocess.run(cmd, capture_output=True, text=True) - + if result.returncode == 0: click.echo("✅ Builder namespace created") elif "already exists" in result.stderr: @@ -172,40 +172,40 @@ def setup_builder(): else: click.echo(f"❌ Failed to create builder namespace: {result.stderr}") raise click.Abort() - + # Get AWS credentials from local AWS CLI aws_cmd = ['aws', 'configure', 'get', 'aws_access_key_id'] if AWS_PROFILE: aws_cmd.extend(['--profile', AWS_PROFILE]) - + access_key_result = subprocess.run(aws_cmd, capture_output=True, text=True) if access_key_result.returncode != 0: click.echo("❌ Failed to get AWS access key from local AWS CLI") click.echo(" Please ensure AWS CLI is configured with 'aws configure'") raise click.Abort() - + aws_cmd = ['aws', 'configure', 'get', 'aws_secret_access_key'] if AWS_PROFILE: aws_cmd.extend(['--profile', AWS_PROFILE]) - + secret_key_result = subprocess.run(aws_cmd, capture_output=True, text=True) if secret_key_result.returncode != 0: click.echo("❌ Failed to get AWS secret key from local AWS CLI") click.echo(" Please ensure AWS CLI is configured with 'aws configure'") raise click.Abort() - + access_key = access_key_result.stdout.strip() secret_key = secret_key_result.stdout.strip() - + if not access_key or not secret_key: click.echo("❌ AWS credentials are empty") click.echo(" Please ensure AWS CLI is configured with 'aws configure'") raise click.Abort() - + # Delete existing secret if it exists delete_cmd = ['kubectl', '--kubeconfig', KUBECONFIG_PATH, 'delete', 'secret', 'aws-credentials', '-n', BUILDER_NAMESPACE] subprocess.run(delete_cmd, capture_output=True, text=True) # Ignore errors - + # Create aws-credentials secret in builder namespace secret_cmd = [ 'kubectl', '--kubeconfig', KUBECONFIG_PATH, 'create', 'secret', 'generic', 'aws-credentials', @@ -213,18 +213,18 @@ def setup_builder(): f'--from-literal=secret-key={secret_key}', '--namespace', BUILDER_NAMESPACE ] - + result = subprocess.run(secret_cmd, capture_output=True, text=True) if result.returncode == 0: click.echo("✅ AWS credentials secret created in builder namespace") else: click.echo(f"❌ Failed to create AWS credentials secret: {result.stderr}") raise click.Abort() - + click.echo("🎉 Builder namespace setup complete!") click.echo(" • Builder namespace created") click.echo(" • AWS credentials secret created from local AWS CLI") - + except FileNotFoundError: click.echo("❌ kubectl or aws CLI not found. Please install them first.") raise click.Abort() @@ -420,40 +420,40 @@ def list_alb_ingresses(namespace): """List all ingresses using the shared ALB.""" try: from .backend.models.service import Ingress - + click.echo(f"🔍 Searching for ingresses using shared ALB: {ALB_SHARED_GROUP_NAME}") click.echo(f" Namespace: {namespace}") click.echo() - + ingresses = Ingress.filter(namespace=namespace) shared_alb_ingresses = [] - + for ingress in ingresses: annotations = ingress.data.metadata.annotations or {} group_name = annotations.get('alb.ingress.kubernetes.io/group.name', '') if group_name == ALB_SHARED_GROUP_NAME: shared_alb_ingresses.append(ingress) - + if not shared_alb_ingresses: click.echo(f"No ingresses found using shared ALB group: {ALB_SHARED_GROUP_NAME}") return - + click.echo(f"Found {len(shared_alb_ingresses)} ingress(es) using shared ALB:") click.echo() - + for ingress in shared_alb_ingresses: annotations = ingress.data.metadata.annotations or {} hostname = ingress.hostname or "N/A" alb_hostname = ingress.load_balancer_hostname or "Pending" tags = annotations.get('alb.ingress.kubernetes.io/tags', 'N/A') - + click.echo(f"📋 {ingress.name}") click.echo(f" Hostname: {hostname}") click.echo(f" ALB Hostname: {alb_hostname}") click.echo(f" Tags: {tags}") click.echo(f" Namespace: {ingress.namespace}") click.echo() - + except Exception as e: click.echo(f"❌ Error listing ALB ingresses: {e}", err=True) raise click.Abort() @@ -464,64 +464,64 @@ def alb_status(): """Check the status of the shared ALB configuration.""" try: from .backend.models.service import Ingress - + click.echo(f"🔍 Checking shared ALB status: {ALB_SHARED_GROUP_NAME}") click.echo() - + # Get all ingresses across all namespaces all_ingresses = [] namespaces = ['default', 'kube-system', 'agentops-deploy'] # Common namespaces - + for namespace in namespaces: try: ingresses = Ingress.filter(namespace=namespace) all_ingresses.extend(ingresses) except: continue # Skip if namespace doesn't exist or no access - + shared_alb_ingresses = [] for ingress in all_ingresses: annotations = ingress.data.metadata.annotations or {} group_name = annotations.get('alb.ingress.kubernetes.io/group.name', '') if group_name == ALB_SHARED_GROUP_NAME: shared_alb_ingresses.append(ingress) - + if not shared_alb_ingresses: click.echo(f"⚠️ No ingresses found using shared ALB group: {ALB_SHARED_GROUP_NAME}") click.echo(" This means the shared ALB is not yet provisioned.") return - + # Check ALB status alb_hostnames = set() ready_count = 0 - + for ingress in shared_alb_ingresses: alb_hostname = ingress.load_balancer_hostname if alb_hostname: alb_hostnames.add(alb_hostname) ready_count += 1 - - click.echo(f"✅ Shared ALB Status:") + + click.echo("✅ Shared ALB Status:") click.echo(f" Group Name: {ALB_SHARED_GROUP_NAME}") click.echo(f" Total Ingresses: {len(shared_alb_ingresses)}") click.echo(f" Ready Ingresses: {ready_count}") click.echo(f" ALB Hostnames: {len(alb_hostnames)}") - + if alb_hostnames: click.echo(" ALB Endpoints:") for hostname in alb_hostnames: click.echo(f" - {hostname}") - + # Show ingresses by namespace namespace_counts = {} for ingress in shared_alb_ingresses: ns = ingress.namespace namespace_counts[ns] = namespace_counts.get(ns, 0) + 1 - + click.echo(" Ingresses by namespace:") for ns, count in namespace_counts.items(): click.echo(f" - {ns}: {count}") - + except Exception as e: click.echo(f"❌ Error checking ALB status: {e}", err=True) raise click.Abort() @@ -534,33 +534,33 @@ def validate_alb_routing(namespace, hostname): """Validate that ALB routing is configured correctly for a hostname.""" try: from .backend.models.service import Ingress - + click.echo(f"🔍 Validating ALB routing for hostname: {hostname}") click.echo(f" Namespace: {namespace}") click.echo(f" Shared ALB Group: {ALB_SHARED_GROUP_NAME}") click.echo() - + ingresses = Ingress.filter(namespace=namespace) matching_ingresses = [] - + for ingress in ingresses: if ingress.hostname == hostname: matching_ingresses.append(ingress) - + if not matching_ingresses: click.echo(f"❌ No ingresses found for hostname: {hostname}") return - + ingress = matching_ingresses[0] annotations = ingress.data.metadata.annotations or {} - + # Check ALB configuration group_name = annotations.get('alb.ingress.kubernetes.io/group.name', '') scheme = annotations.get('alb.ingress.kubernetes.io/scheme', '') target_type = annotations.get('alb.ingress.kubernetes.io/target-type', '') tags = annotations.get('alb.ingress.kubernetes.io/tags', '') - - click.echo(f"✅ Ingress Configuration:") + + click.echo("✅ Ingress Configuration:") click.echo(f" Name: {ingress.name}") click.echo(f" Hostname: {ingress.hostname}") click.echo(f" ALB Group: {group_name}") @@ -568,7 +568,7 @@ def validate_alb_routing(namespace, hostname): click.echo(f" Target Type: {target_type}") click.echo(f" Tags: {tags}") click.echo(f" ALB Hostname: {ingress.load_balancer_hostname or 'Pending'}") - + # Validation checks issues = [] if group_name != ALB_SHARED_GROUP_NAME: @@ -579,7 +579,7 @@ def validate_alb_routing(namespace, hostname): issues.append(f"Target type should be 'ip', got '{target_type}'") if not tags: issues.append("No ALB tags configured") - + if issues: click.echo() click.echo("⚠️ Configuration Issues:") @@ -588,7 +588,7 @@ def validate_alb_routing(namespace, hostname): else: click.echo() click.echo("✅ All validations passed!") - + except Exception as e: click.echo(f"❌ Error validating ALB routing: {e}", err=True) raise click.Abort() diff --git a/app/deploy/jockey/backend/models/job.py b/app/deploy/jockey/backend/models/job.py index e8bfeae6a..cb970b0f8 100644 --- a/app/deploy/jockey/backend/models/job.py +++ b/app/deploy/jockey/backend/models/job.py @@ -428,7 +428,7 @@ def get_logs(self) -> str: except Exception as e: logger.error(f"Failed to get logs for job {self.name}: {e}") return f"Failed to get logs: {e}" - + def to_string(self) -> str: """Convert to string representation.""" return f"Job(name={self.name}, image_url={self.image_url}, namespace={self.namespace})" diff --git a/app/deploy/jockey/backend/models/secret.py b/app/deploy/jockey/backend/models/secret.py index 65b41b915..957a90c1d 100644 --- a/app/deploy/jockey/backend/models/secret.py +++ b/app/deploy/jockey/backend/models/secret.py @@ -41,7 +41,7 @@ class SecretRef: def safe_name(self) -> str: """Transform key to lowercase with dashes for Kubernetes secret name.""" return _safe_name(self.key) - + @property def env_name(self) -> str: """Get the environment variable name (uppercase with underscores).""" diff --git a/app/deploy/jockey/tests/conftest.py b/app/deploy/jockey/tests/conftest.py index c47e452ea..fa5934a13 100644 --- a/app/deploy/jockey/tests/conftest.py +++ b/app/deploy/jockey/tests/conftest.py @@ -127,7 +127,7 @@ def redis_client_with_container(redis_container, monkeypatch): def clean_redis(redis_client_with_container): """Provide a clean Redis client and track keys for cleanup.""" from jockey.worker import queue - + tracked_keys = [] # Helper function to track keys @@ -136,7 +136,7 @@ def track_key(key): return key redis_client_with_container.track_key = track_key - + # Add queue functions as methods redis_client_with_container.store_event = queue.store_event redis_client_with_container.get_task_status = queue.get_task_status diff --git a/app/deploy/jockey/tests/test_config.py b/app/deploy/jockey/tests/test_config.py index 45b37441c..258adc5b4 100644 --- a/app/deploy/jockey/tests/test_config.py +++ b/app/deploy/jockey/tests/test_config.py @@ -3,7 +3,6 @@ import pytest from jockey.config import ( DeploymentConfig, - DeploymentPack, DEPLOYMENT_PACKS, _get_instance_build_files, ) @@ -16,7 +15,7 @@ def test_deployment_packs_dictionary_contains_all_packs(self): """Test that DEPLOYMENT_PACKS contains all expected pack types.""" expected_packs = {"FASTAPI", "CREWAI", "CREWAI_JOB"} assert set(DEPLOYMENT_PACKS.keys()) == expected_packs - + # Verify they're the correct DeploymentPack instances assert DEPLOYMENT_PACKS["FASTAPI"] == DEPLOYMENT_PACKS["FASTAPI"] assert DEPLOYMENT_PACKS["CREWAI"] == DEPLOYMENT_PACKS["CREWAI"] @@ -54,7 +53,7 @@ def test_from_pack_with_fastapi(self): namespace="test-ns", project_id="test-project" ) - + assert config.dockerfile_template == "fastapi-agent" assert config.ports == [8000] assert config.build_files == {} @@ -68,7 +67,7 @@ def test_from_pack_with_crewai(self): namespace="test-ns", project_id="test-project" ) - + assert config.dockerfile_template == "crewai-agent" assert config.ports == [8080] assert isinstance(config.build_files, dict) @@ -82,7 +81,7 @@ def test_from_pack_with_crewai_job(self): namespace="test-ns", project_id="test-project" ) - + assert config.dockerfile_template == "crewai-job" assert config.ports == [] assert isinstance(config.build_files, dict) @@ -96,7 +95,7 @@ def test_from_pack_with_none_uses_fastapi_fallback(self): namespace="test-ns", project_id="test-project" ) - + assert config.dockerfile_template == "fastapi-agent" assert config.ports == [8000] assert config.build_files == {} @@ -129,7 +128,7 @@ def test_from_pack_kwargs_override_pack_defaults(self): dockerfile_template="custom-template", # Override default template replicas=3 ) - + assert config.ports == [9000, 9001] # Should use provided ports assert config.dockerfile_template == "custom-template" # Should use provided template assert config.replicas == 3 @@ -143,7 +142,7 @@ def test_from_pack_computes_derived_fields(self): namespace="test-ns", project_id="test-project-123" ) - + # Derived fields should be computed in __post_init__ assert config.tag == "test-project-123" assert config.hostname == "test-project-123.deploy.agentops.ai" @@ -165,12 +164,12 @@ def test_from_pack_with_all_optional_fields(self): create_ingress=False, force_recreate=True ) - + # Pack defaults should be applied assert config.dockerfile_template == "crewai-agent" assert config.ports == [8080] assert isinstance(config.build_files, dict) - + # Provided fields should be set assert config.repository_url == "https://github.com/test/repo.git" assert config.branch == "feature-branch" @@ -195,9 +194,9 @@ def test_serialize_config_created_from_pack(self): project_id="test-project", repository_url="https://github.com/test/repo.git" ) - + serialized = config.serialize() - + # Should contain all required fields assert serialized["namespace"] == "test-ns" assert serialized["project_id"] == "test-project" @@ -213,10 +212,10 @@ def test_deserialize_config_works(self): namespace="test-ns", project_id="test-project" ) - + serialized = original_config.serialize() deserialized_config = DeploymentConfig.from_serialized(serialized) - + # Should have same values assert deserialized_config.namespace == original_config.namespace assert deserialized_config.project_id == original_config.project_id @@ -236,11 +235,11 @@ def test_roundtrip_serialization_preserves_data(self): secret_names=["secret1"], agentops_api_key="test-key" ) - + # Serialize and deserialize serialized = original_config.serialize() deserialized_config = DeploymentConfig.from_serialized(serialized) - + # All fields should be preserved assert deserialized_config.namespace == original_config.namespace assert deserialized_config.project_id == original_config.project_id @@ -261,7 +260,7 @@ def test_get_instance_build_files_returns_dict(self): """Test that _get_instance_build_files returns a dictionary.""" build_files = _get_instance_build_files() assert isinstance(build_files, dict) - + # Should contain Python files from instance directory if they exist for key, value in build_files.items(): assert key.startswith("instance/") @@ -272,10 +271,10 @@ def test_crewai_packs_have_same_build_files(self): """Test that both CREWAI packs have the same build files.""" crewai_pack = DEPLOYMENT_PACKS["CREWAI"] crewai_job_pack = DEPLOYMENT_PACKS["CREWAI_JOB"] - + # Both should use the same build files assert crewai_pack.build_files == crewai_job_pack.build_files - + # FASTAPI should have empty build files assert DEPLOYMENT_PACKS["FASTAPI"].build_files == {} @@ -296,7 +295,7 @@ def test_all_packs_have_required_attributes(self): assert isinstance(pack.ports, list) assert all(isinstance(port, int) for port in pack.ports) assert isinstance(pack.build_files, dict) - + # All build file keys should be strings, values should be strings for key, value in pack.build_files.items(): assert isinstance(key, str) @@ -307,4 +306,4 @@ def test_pack_names_match_constants(self): # Ensure we have exactly the packs we expect expected_packs = {"FASTAPI", "CREWAI", "CREWAI_JOB"} actual_packs = set(DEPLOYMENT_PACKS.keys()) - assert actual_packs == expected_packs \ No newline at end of file + assert actual_packs == expected_packs diff --git a/app/deploy/jockey/tests/test_integration.py b/app/deploy/jockey/tests/test_integration.py index a5840f93f..204465c1c 100644 --- a/app/deploy/jockey/tests/test_integration.py +++ b/app/deploy/jockey/tests/test_integration.py @@ -44,7 +44,7 @@ def test_store_and_retrieve_events(self, clean_redis): """Test storing and retrieving events end-to-end.""" from jockey.worker.queue import queue_task from jockey.config import DeploymentConfig, TaskType - + # Create a job first config = DeploymentConfig( project_id="test-project-id", @@ -84,7 +84,7 @@ def test_timestamp_filtering(self, clean_redis): """Test timestamp-based event filtering.""" from jockey.worker.queue import queue_task from jockey.config import DeploymentConfig, TaskType - + # Create a job first config = DeploymentConfig( project_id="test-project-id", @@ -122,7 +122,7 @@ def test_event_cleanup(self, clean_redis): """Test that all events are kept (current implementation has no cleanup).""" from jockey.worker.queue import queue_task from jockey.config import DeploymentConfig, TaskType - + # Create a job first config = DeploymentConfig( project_id="test-project-id", @@ -147,7 +147,7 @@ def test_real_event_types(self, clean_redis): """Test with actual event types from the system.""" from jockey.worker.queue import queue_task from jockey.config import DeploymentConfig, TaskType - + # Create a job first config = DeploymentConfig( project_id="test-project-id", diff --git a/app/deploy/jockey/tests/test_queue.py b/app/deploy/jockey/tests/test_queue.py index 55edb23ec..e5f25e214 100644 --- a/app/deploy/jockey/tests/test_queue.py +++ b/app/deploy/jockey/tests/test_queue.py @@ -13,9 +13,7 @@ get_queued_tasks, _get_task_key, _get_queue_key, - _get_event_key, TASKS_HASH_NAME, - REDIS_KEY_PREFIX, ) from jockey.config import DeploymentConfig, TaskType diff --git a/app/deploy/jockey/tests/test_queue_integration.py b/app/deploy/jockey/tests/test_queue_integration.py index 452f6b1a4..9d3a8b9b5 100644 --- a/app/deploy/jockey/tests/test_queue_integration.py +++ b/app/deploy/jockey/tests/test_queue_integration.py @@ -8,7 +8,6 @@ import time import json from datetime import datetime, UTC -from unittest.mock import Mock from jockey.config import DeploymentConfig, TaskType from jockey.worker.queue import ( @@ -26,19 +25,18 @@ _get_queue_key, _get_task_key, _get_event_key, - REDIS_KEY_PREFIX, ) from jockey.backend.event import BaseEvent, EventStatus, register_event class IntegrationDeploymentEvent(BaseEvent): """Test event for integration testing.""" - + event_type = "test_deployment" - + def __init__(self, status: EventStatus, message: str = "", **kwargs): super().__init__(status, message=message, **kwargs) - + def format_message(self) -> str: return self.message @@ -56,7 +54,7 @@ def sample_config(self): """Sample deployment configuration for testing.""" return DeploymentConfig( project_id="test-project-id", - namespace="integration-test", + namespace="integration-test", ports=[8080, 9090], replicas=2, repository_url="https://github.com/test/repo.git", @@ -73,33 +71,33 @@ def test_complete_job_lifecycle(self, redis_client_with_container, sample_config # 1. Queue a deployment project_id = "integration-test-project" job_id = queue_task(TaskType.SERVE, sample_config, project_id) - + # Verify job ID is a full UUID assert len(job_id) == 36 assert job_id.count('-') == 4 - + # 2. Verify job is in queue assert get_queue_length() == 1 queued_jobs = get_queued_tasks() assert len(queued_jobs) == 1 assert queued_jobs[0] == job_id - + # 3. Claim the job claimed_job = claim_next_task() assert claimed_job is not None assert claimed_job["job_id"] == job_id assert claimed_job["project_id"] == project_id assert claimed_job["namespace"] == "integration-test" - + # Verify job config was preserved config_data = claimed_job["config"] assert config_data["project_id"] == "test-project-id" assert config_data["namespace"] == "integration-test" - + # 4. Verify queue is now empty assert get_queue_length() == 0 assert get_queued_tasks() == [] - + # 5. Test that claiming from empty queue returns None empty_claim = claim_next_task() assert empty_claim is None @@ -108,18 +106,18 @@ def test_job_data_persistence(self, redis_client_with_container, sample_config): """Test that job data persists correctly in Redis.""" project_id = "persistence-test" job_id = queue_task(TaskType.SERVE, sample_config, project_id) - + # Test get_job_data by ID job_data = get_task_data(job_id) assert job_data is not None assert job_data["job_id"] == job_id assert job_data["project_id"] == project_id assert job_data["namespace"] == sample_config.namespace - + # Verify timestamp format queued_at = datetime.fromisoformat(job_data["queued_at"]) assert abs((datetime.now(UTC) - queued_at).total_seconds()) < 5 - + # Test get_jobs by project jobs = get_tasks(sample_config.namespace, project_id) assert len(jobs) == 1 @@ -128,7 +126,7 @@ def test_job_data_persistence(self, redis_client_with_container, sample_config): def test_multiple_jobs_same_project(self, redis_client_with_container, sample_config): """Test queuing multiple jobs for the same project.""" project_id = "multi-job-test" - + # Queue 3 jobs job_ids = [] for i in range(3): @@ -139,25 +137,25 @@ def test_multiple_jobs_same_project(self, redis_client_with_container, sample_co job_id = queue_task(TaskType.SERVE, config, project_id) job_ids.append(job_id) time.sleep(0.01) # Small delay to ensure different timestamps - + # Verify all jobs are queued assert get_queue_length() == 3 - + # Get jobs for the project jobs = get_tasks("multi-test", project_id) assert len(jobs) == 3 - + # Jobs should be returned in order (implementation detail) returned_job_ids = [job["job_id"] for job in jobs] assert set(returned_job_ids) == set(job_ids) - + # Claim all jobs and verify FIFO order claimed_jobs = [] for _ in range(3): job = claim_next_task() assert job is not None claimed_jobs.append(job) - + # First job queued should be first claimed (FIFO) assert claimed_jobs[0]["job_id"] == job_ids[0] assert claimed_jobs[1]["job_id"] == job_ids[1] @@ -167,7 +165,7 @@ def test_event_storage_and_retrieval(self, redis_client_with_container, sample_c """Test storing and retrieving events for jobs.""" project_id = "event-test" job_id = queue_task(TaskType.SERVE, sample_config, project_id) - + # Store multiple events events = [ IntegrationDeploymentEvent(EventStatus.STARTED, "Deployment started"), @@ -175,21 +173,21 @@ def test_event_storage_and_retrieval(self, redis_client_with_container, sample_c IntegrationDeploymentEvent(EventStatus.PROGRESS, "Deploying to cluster"), IntegrationDeploymentEvent(EventStatus.COMPLETED, "Deployment completed"), ] - + for event in events: store_event(job_id, event) time.sleep(0.01) # Small delay for timestamp ordering - + # Retrieve all events retrieved_events = get_task_events(job_id) assert len(retrieved_events) == 4 - + # Events should be in reverse chronological order (newest first) assert retrieved_events[0].status == EventStatus.COMPLETED assert retrieved_events[0].message == "Deployment completed" assert retrieved_events[-1].status == EventStatus.STARTED assert retrieved_events[-1].message == "Deployment started" - + # Test get_task_status (should return latest event) latest_event = get_task_status(job_id) assert latest_event is not None @@ -200,23 +198,23 @@ def test_event_timestamp_filtering(self, redis_client_with_container, sample_con """Test timestamp-based event filtering.""" project_id = "timestamp-test" job_id = queue_task(TaskType.SERVE, sample_config, project_id) - + # Store initial event store_event(job_id, IntegrationDeploymentEvent(EventStatus.STARTED, "Started")) time.sleep(1) # Ensure clear time separation - + # Record filter time filter_time = datetime.now(UTC) time.sleep(1) - + # Store events after filter time store_event(job_id, IntegrationDeploymentEvent(EventStatus.PROGRESS, "Building")) store_event(job_id, IntegrationDeploymentEvent(EventStatus.COMPLETED, "Done")) - + # Get all events all_events = get_task_events(job_id) assert len(all_events) == 3 - + # Get events after filter time filtered_events = get_task_events(job_id, start_time=filter_time) assert len(filtered_events) == 2 @@ -226,32 +224,32 @@ def test_redis_key_structure(self, redis_client_with_container, sample_config): """Test that Redis keys are structured correctly.""" project_id = "key-structure-test" job_id = queue_task(TaskType.SERVE, sample_config, project_id) - + # Test key generation functions queue_key = _get_queue_key() job_key = _get_task_key(sample_config.namespace, project_id, job_id) event_key = _get_event_key(job_id) - + assert queue_key == _get_queue_key() assert job_key == f"{sample_config.namespace}:{project_id}:{job_id}" assert event_key == _get_event_key(job_id) - + # Verify actual Redis keys exist redis_client = redis_client_with_container - + # Queue should contain the job ID assert redis_client.lrange(queue_key, 0, -1) == [job_id] - + # Job hash should contain the job data job_data_raw = redis_client.hget(TASKS_HASH_NAME, job_key) assert job_data_raw is not None job_data = json.loads(job_data_raw) assert job_data["job_id"] == job_id - + # Store an event and verify event key test_event = IntegrationDeploymentEvent(EventStatus.STARTED, "Test event") store_event(job_id, test_event) - + # Event sorted set should exist event_count = redis_client.zcard(event_key) assert event_count == 1 @@ -259,7 +257,7 @@ def test_redis_key_structure(self, redis_client_with_container, sample_config): def test_concurrent_job_claiming(self, redis_client_with_container, sample_config): """Test that multiple workers can safely claim jobs concurrently.""" project_id = "concurrent-test" - + # Queue multiple jobs job_ids = [] for i in range(5): @@ -269,19 +267,19 @@ def test_concurrent_job_claiming(self, redis_client_with_container, sample_confi ) job_id = queue_task(TaskType.SERVE, config, project_id) job_ids.append(job_id) - + # Simulate concurrent claiming claimed_jobs = [] for _ in range(5): job = claim_next_task() if job: claimed_jobs.append(job) - + # All jobs should be claimed exactly once assert len(claimed_jobs) == 5 claimed_job_ids = [job["job_id"] for job in claimed_jobs] assert set(claimed_job_ids) == set(job_ids) - + # Queue should be empty assert get_queue_length() == 0 assert claim_next_task() is None @@ -299,14 +297,14 @@ def test_job_data_integrity(self, redis_client_with_container): replicas=3, secret_names=["db-secret", "api-key", "ssl-cert"], ) - + project_id = "integrity-test-project" job_id = queue_task(TaskType.SERVE, complex_config, project_id) - + # Claim and verify all data is intact job_data = claim_next_task() assert job_data is not None - + config = job_data["config"] assert config["project_id"] == "complex-project" assert config["namespace"] == "integrity-test" @@ -322,7 +320,7 @@ def test_event_data_integrity(self, redis_client_with_container, sample_config): """Test that event data with complex payloads is preserved.""" project_id = "event-integrity-test" job_id = queue_task(TaskType.SERVE, sample_config, project_id) - + # Create event with complex payload complex_event = IntegrationDeploymentEvent( EventStatus.PROGRESS, @@ -341,13 +339,13 @@ def test_event_data_integrity(self, redis_client_with_container, sample_config): ] } ) - + store_event(job_id, complex_event) - + # Retrieve and verify data integrity events = get_task_events(job_id) assert len(events) == 1 - + retrieved_event = events[0] assert retrieved_event.status == EventStatus.PROGRESS assert retrieved_event.message == "Complex deployment step" @@ -361,15 +359,15 @@ def test_error_handling(self, redis_client_with_container): # Test getting non-existent job non_existent_job = get_task_data("non-existent-job-id") assert non_existent_job is None - + # Test getting events for non-existent job non_existent_events = get_task_events("non-existent-job-id") assert non_existent_events == [] - + # Test getting status for non-existent job non_existent_status = get_task_status("non-existent-job-id") assert non_existent_status is None - + # Test getting jobs for non-existent project non_existent_project_jobs = get_tasks("non-existent-namespace", "non-existent-project") - assert non_existent_project_jobs == [] \ No newline at end of file + assert non_existent_project_jobs == [] diff --git a/app/deploy/test_secret_fix.py b/app/deploy/test_secret_fix.py index fa066cb8d..876198616 100644 --- a/app/deploy/test_secret_fix.py +++ b/app/deploy/test_secret_fix.py @@ -5,42 +5,40 @@ def test_secret_ref_transformations(): """Test that SecretRef properly transforms between formats.""" - + # Test case 1: Creating from lowercase secret name secret_ref = SecretRef(key="agentops-api-key") print(f"Input key: {secret_ref.key}") print(f"Safe name (k8s secret): {secret_ref.safe_name}") print(f"Env name (env var): {secret_ref.env_name}") - + # Generate the env var env_var = secret_ref.to_env_var() - print(f"\nGenerated env var:") + print("\nGenerated env var:") print(f" name: {env_var.name}") print(f" secret_name: {env_var.value_from.secret_key_ref.name}") print(f" secret_key: {env_var.value_from.secret_key_ref.key}") - + assert env_var.name == "AGENTOPS_API_KEY", f"Expected AGENTOPS_API_KEY, got {env_var.name}" assert env_var.value_from.secret_key_ref.name == "agentops-api-key" assert env_var.value_from.secret_key_ref.key == "AGENTOPS_API_KEY" - + print("\n✓ Test case 1 passed!") - + # Test case 2: Creating with explicit env var name secret_ref2 = SecretRef(key="my-secret", env_var_name="CUSTOM_ENV_NAME") env_var2 = secret_ref2.to_env_var() - print(f"\nTest case 2:") + print("\nTest case 2:") print(f" env var name: {env_var2.name}") assert env_var2.name == "CUSTOM_ENV_NAME" print("✓ Test case 2 passed!") - + # Test case 3: Test create_secret function - from jockey.deploy import create_secret print("\n\nTesting create_secret function:") print("Input: key='agentops-api-key', value='test-value'") - + # This would normally create the secret in k8s, but we'll just check the data structure - from jockey.backend.models.secret import Secret - + # Simulate what create_secret does key = "agentops-api-key" data_key = key.upper().replace('-', '_') @@ -50,4 +48,4 @@ def test_secret_ref_transformations(): if __name__ == "__main__": test_secret_ref_transformations() - print("\n✅ All tests passed!") \ No newline at end of file + print("\n✅ All tests passed!") diff --git a/app/opentelemetry-collector/builder/tests/__init__.py b/app/opentelemetry-collector/builder/tests/__init__.py index fec15f296..db01c78cf 100644 --- a/app/opentelemetry-collector/builder/tests/__init__.py +++ b/app/opentelemetry-collector/builder/tests/__init__.py @@ -1 +1 @@ -# Tests for the builder package \ No newline at end of file +# Tests for the builder package diff --git a/app/opentelemetry-collector/builder/tests/integration/test_collector_cost.py b/app/opentelemetry-collector/builder/tests/integration/test_collector_cost.py index 00e027c6c..f88121d9d 100644 --- a/app/opentelemetry-collector/builder/tests/integration/test_collector_cost.py +++ b/app/opentelemetry-collector/builder/tests/integration/test_collector_cost.py @@ -34,16 +34,16 @@ def jwt_token(jwt_secret): "project_id": "test-project-id", "api_key": "test-api-key" } - + # Generate the JWT token token = jwt.encode(payload, jwt_secret, algorithm="HS256") - + # Write to .jwt file for the collector to use project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) jwt_path = os.path.join(project_root, '.jwt') with open(jwt_path, 'w') as f: f.write(token) - + return token diff --git a/app/opentelemetry-collector/builder/tests/test_model_cost.py b/app/opentelemetry-collector/builder/tests/test_model_cost.py index 678ab2c62..c8cb25181 100644 --- a/app/opentelemetry-collector/builder/tests/test_model_cost.py +++ b/app/opentelemetry-collector/builder/tests/test_model_cost.py @@ -35,7 +35,7 @@ def test_init_with_partial_data(self): assert model_cost.model == "gpt-3.5-turbo" assert model_cost.provider is None assert model_cost.input == Decimal("0") - assert model_cost.output == Decimal("0") + assert model_cost.output == Decimal("0") assert model_cost.cached is None assert model_cost.reasoning is None @@ -72,14 +72,14 @@ def test_model_dump_formats_decimals(self): ) dumped = model_cost.model_dump() - + assert dumped["model"] == "gpt-4" assert dumped["provider"] == "openai" assert dumped["input"] == "0.00001" assert dumped["output"] == "0.00003" assert dumped["cached"] == "0.000005" assert dumped["reasoning"] == "0.000015" - + # Check that trailing zeros are removed model_cost = ModelCost( model="test", @@ -162,60 +162,60 @@ def test_validator_methods(self): # Test that zero values are converted to None assert ModelCost.convert_to_decimal(0) is None assert ModelCost.convert_to_decimal(0.0) is None - + # Test that None remains None assert ModelCost.convert_to_decimal(None) is None - + # Test that non-zero values are converted to Decimal correctly assert ModelCost.convert_to_decimal(0.00003) == Decimal("0.00003") - + # Test decimal formatting in model_dump model = ModelCost(model="test", input=Decimal("0.001000")) dumped = model.model_dump() assert dumped["input"] == "0.001" - + def test_get_tokencost_path_integration(self): """Integration test for get_tokencost_path with the real package.""" path = get_tokencost_path() - + # The function should return a path assert path is not None assert isinstance(path, Path) - + # The path should point to a directory that exists assert path.exists() - + # The directory should contain model_prices.json assert (path / "model_prices.json").exists() - + def test_load_model_costs_integration(self): """Integration test for load_model_costs with the real package.""" # Load real model costs from the tokencost package costs = load_model_costs() - + # We should get a non-empty list of ModelCost objects assert costs is not None assert isinstance(costs, list) assert len(costs) > 0 - + # Check that every item is a ModelCost object for cost in costs: assert isinstance(cost, ModelCost) assert cost.model is not None assert isinstance(cost.model, str) - + # Either input or output should have a valid cost has_valid_cost = ( (cost.input is not None and cost.input > 0) or (cost.output is not None and cost.output > 0) ) assert has_valid_cost, f"Model {cost.model} has no valid costs" - + # Let's also check for some common models that should be present # This might need to be adjusted if the package changes significantly model_names = [cost.model for cost in costs] common_models = ['gpt-3.5-turbo', 'gpt-4', 'claude-3-opus'] - + # At least one of these common models should be present assert any(model in model_names for model in common_models), \ - f"None of the common models {common_models} found in {model_names[:5]}..." \ No newline at end of file + f"None of the common models {common_models} found in {model_names[:5]}..." diff --git a/app/opentelemetry-collector/compose.yaml b/app/opentelemetry-collector/compose.yaml index af3e0e2aa..2f45bebad 100644 --- a/app/opentelemetry-collector/compose.yaml +++ b/app/opentelemetry-collector/compose.yaml @@ -5,7 +5,7 @@ services: otelcollector: build: - context: . + context: ./opentelemetry-collector dockerfile: Dockerfile container_name: otel ports: @@ -25,7 +25,7 @@ services: - TRACES_TABLE_NAME=${TRACES_TABLE_NAME:-otel_traces} - CLICKHOUSE_TTL=${CLICKHOUSE_TTL:-12h} - CLICKHOUSE_TIMEOUT=${CLICKHOUSE_TIMEOUT:-10s} - - JWT_SECRET=${JWT_SECRET:-} + - JWT_SECRET=${JWT_SECRET_KEY:-super-secret-jwt-token-with-at-least-32-characters-long} clickhouse: image: clickhouse/clickhouse-server:24.12 diff --git a/docs/images/external/gibsonai/gibsonai.png b/docs/images/external/gibsonai/gibsonai.png new file mode 100644 index 000000000..9e1f85d40 Binary files /dev/null and b/docs/images/external/gibsonai/gibsonai.png differ diff --git a/docs/local_clickhouse_setup.md b/docs/local_clickhouse_setup.md new file mode 100644 index 000000000..63d816e0b --- /dev/null +++ b/docs/local_clickhouse_setup.md @@ -0,0 +1,73 @@ +Restart and verify + +docker compose -f app/compose.yaml -f app/opentelemetry-collector/compose.yaml down --remove-orphans +docker compose -f app/compose.yaml -f app/opentelemetry-collector/compose.yaml up -d +docker compose -f app/compose.yaml -f app/opentelemetry-collector/compose.yaml logs --since=90s api +docker compose -f app/compose.yaml -f app/opentelemetry-collector/compose.yaml logs --since=90s otelcollector +docker compose -f app/compose.yaml -f app/opentelemetry-collector/compose.yaml logs --since=90s clickhouse + +Open http://localhost:3000/signin + + +Local ClickHouse Setup + +1) Environment (.env) +CLICKHOUSE_HOST=127.0.0.1 +CLICKHOUSE_PORT=8123 +CLICKHOUSE_USER=default +CLICKHOUSE_PASSWORD=password +CLICKHOUSE_DATABASE=otel_2 +CLICKHOUSE_SECURE=false +CLICKHOUSE_ENDPOINT=http://clickhouse:8123 +CLICKHOUSE_USERNAME=default + +2) Start services (includes local ClickHouse and otelcollector) +docker compose -f app/compose.yaml -f app/opentelemetry-collector/compose.yaml up -d + +3) Initialize ClickHouse schema +curl -u default:password 'http://localhost:8123/?query=CREATE%20DATABASE%20IF%20NOT%20EXISTS%20otel_2' +curl --data-binary @app/clickhouse/migrations/0000_init.sql -u default:password 'http://localhost:8123/?query=' + +4) Run OpenAI example with local OTLP +AGENTOPS_API_KEY= \ +AGENTOPS_API_ENDPOINT=http://localhost:8000 \ +AGENTOPS_APP_URL=http://localhost:3000 \ +AGENTOPS_EXPORTER_ENDPOINT=http://localhost:4318/v1/traces \ +OPENAI_API_KEY= \ +python examples/openai/openai_example_sync.py + +5) Verify +- Dashboard: http://localhost:3000/traces?trace_id= +- ClickHouse: + curl -s -u default:password "http://localhost:8123/?query=SHOW%20TABLES%20FROM%20otel_2" + curl -s -u default:password "http://localhost:8123/?query=SELECT%20count()%20FROM%20otel_2.otel_traces%20WHERE%20TraceId%20=%20''" + +Quick Start (Local ClickHouse) + +Run these to get OSS working fast: +1) Ensure ClickHouse is running (HTTP 8123, native 9000) +2) Create DB and base schema +curl -u default:password "http://localhost:8123/?query=CREATE%20DATABASE%20IF%20NOT%20EXISTS%20otel_2" +curl -u default:password --data-binary @app/clickhouse/migrations/0000_init.sql "http://localhost:8123/?query=" +3) Enable costs + span counts +curl -u default:password --data-binary @app/clickhouse/migrations/0001_udfs_and_pricing.sql "http://localhost:8123/?query=" +curl -u default:password --data-binary @app/clickhouse/migrations/0002_span_counts_mv.sql "http://localhost:8123/?query=" +4) Seed pricing +# Option A (basic): small starter set +curl -u default:password --data-binary @app/clickhouse/migrations/0003_seed_model_costs.sql "http://localhost:8123/?query=" +# Option B (full): full offline pricing parity +curl -u default:password --data-binary @app/clickhouse/migrations/0004_seed_model_costs_full.sql "http://localhost:8123/?query=" +5) Quick verify +curl -s -u default:password "http://localhost:8123/?query=SHOW%20FUNCTIONS%20LIKE%20'calculate_%25'" +curl -s -u default:password "http://localhost:8123/?query=EXISTS%20TABLE%20otel_2.trace_span_counts" + +Advanced (Optional): Deeper Verification + +Use these to confirm everything loaded: +- UDFs + curl -s -u default:password "http://localhost:8123/?query=SHOW%20FUNCTIONS%20LIKE%20'normalize_model_name'" +- Dictionary + curl -s -u default:password "http://localhost:8123/?query=SELECT%20name,%20status,%20type%20FROM%20system.dictionaries%20WHERE%20name%3D'model_costs_dict'" + curl -s -u default:password "http://localhost:8123/?query=SELECT%20dictGetOrDefault('model_costs_dict','prompt_cost_per_1k','gpt-4o-mini',0.)" +- MV/table + curl -s -u default:password "http://localhost:8123/?query=EXISTS%20TABLE%20otel_2.mv_trace_span_counts" diff --git a/docs/local_supabase_linux.md b/docs/local_supabase_linux.md new file mode 100644 index 000000000..c23c838cc --- /dev/null +++ b/docs/local_supabase_linux.md @@ -0,0 +1,56 @@ +Local Supabase on Linux (CLI install fallback) + +1) Install CLI +- Download latest Linux x86_64 binary from https://github.com/supabase/cli/releases/latest +- Extract to ~/.supabase/bin and chmod +x +- Add to PATH: + export PATH="$HOME/.supabase/bin:$PATH" +- Verify: + supabase --version + +2) Initialize and start in app/ +- cd ~/repos/agentops/app +- supabase init +- supabase start + +3) Capture credentials from output +- URL: http://127.0.0.1:54321 +- anon key +- service_role key +- Postgres: host 127.0.0.1, port 54322, user postgres, password postgres, database postgres + +4) Fill envs +- app/.env + NEXT_PUBLIC_SUPABASE_URL=http://127.0.0.1:54321 + NEXT_PUBLIC_SUPABASE_ANON_KEY= + SUPABASE_SERVICE_ROLE_KEY= + SUPABASE_PROJECT_ID=local + SUPABASE_HOST=127.0.0.1 + SUPABASE_PORT=54322 + SUPABASE_DATABASE=postgres + SUPABASE_USER=postgres + SUPABASE_PASSWORD=postgres +- app/api/.env + SUPABASE_URL=http://127.0.0.1:54321 + SUPABASE_KEY= + SUPABASE_HOST=127.0.0.1 + SUPABASE_PORT=54322 + SUPABASE_DATABASE=postgres + SUPABASE_USER=postgres + SUPABASE_PASSWORD=postgres +- app/dashboard/.env.local + NEXT_PUBLIC_SUPABASE_URL=http://127.0.0.1:54321 + NEXT_PUBLIC_SUPABASE_ANON_KEY= + SUPABASE_SERVICE_ROLE_KEY= + SUPABASE_PROJECT_ID=local + +5) Run stack +- From app/: docker compose up -d +- API: http://localhost:8000/redoc +- Dashboard: http://localhost:3000 + +6) Notes +- Playground must be disabled: + app/.env -> NEXT_PUBLIC_PLAYGROUND=false + app/dashboard/.env.local -> NEXT_PUBLIC_PLAYGROUND="false" +- ClickHouse Cloud requires IP allowlist. diff --git a/docs/mint.json b/docs/mint.json index 10300ff44..bab8c9134 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -152,6 +152,7 @@ "v2/integrations/llamaindex", "v2/integrations/litellm", "v2/integrations/mem0", + "v2/integrations/memori", "v2/integrations/openai", "v2/integrations/openai_agents_python", "v2/integrations/openai_agents_js", diff --git a/docs/v2/integrations/memori.mdx b/docs/v2/integrations/memori.mdx new file mode 100644 index 000000000..496286159 --- /dev/null +++ b/docs/v2/integrations/memori.mdx @@ -0,0 +1,125 @@ +--- +title: 'Memori' +description: 'Track and monitor Memori memory operations with AgentOps' +--- + +[Memori](https://github.com/GibsonAI/memori) provides automatic short-term and long-term memory for AI applications and agents, seamlessly recording conversations and adding context to LLM interactions without requiring explicit memory management. + +## Why Track Memori with AgentOps? + +- **Memory Recording**: Track when conversations are automatically captured and stored +- **Context Injection**: Monitor how memory is automatically added to LLM context +- **Conversation Flow**: Understand the complete dialogue history across sessions +- **Memory Effectiveness**: Analyze how historical context improves response quality +- **Performance Impact**: Track latency and token usage from memory operations +- **Error Tracking**: Identify issues with memory recording or context retrieval + +AgentOps automatically instruments Memori to provide complete observability of your memory operations. + +## Installation + + +```bash pip +pip install agentops memorisdk openai python-dotenv +``` + +```bash poetry +poetry add agentops memorisdk openai python-dotenv +``` + +```bash uv +uv pip install agentops memorisdk openai python-dotenv +``` + + +## Environment Configuration + +Load environment variables and set up API keys. + + ```bash Export to CLI + export AGENTOPS_API_KEY="your_agentops_api_key_here" + export OPENAI_API_KEY="your_openai_api_key_here" + ``` + ```txt Set in .env file + AGENTOPS_API_KEY="your_agentops_api_key_here" + OPENAI_API_KEY="your_openai_api_key_here" + ``` + + +## Tracking Automatic Memory Operations + + +```python Basic Memory Tracking +import agentops +from memori import Memori +from openai import OpenAI + +# Start a trace to group related operations +agentops.start_trace("memori_conversation_flow", tags=["memori_memory_example"]) + +try: + # Initialize OpenAI client + openai_client = OpenAI() + + # Initialize Memori with conscious ingestion enabled + # AgentOps tracks the memory configuration + memori = Memori( + database_connect="sqlite:///agentops_example.db", + conscious_ingest=True, + auto_ingest=True, + ) + + memori.enable() + + # First conversation - AgentOps tracks LLM call and memory recording + response1 = openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "user", "content": "I'm working on a Python FastAPI project"} + ], + ) + + print("Assistant:", response1.choices[0].message.content) + + # Second conversation - AgentOps tracks memory retrieval and context injection + response2 = openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Help me add user authentication"}], + ) + + print("Assistant:", response2.choices[0].message.content) + print("💡 Notice: Memori automatically provided FastAPI project context!") + + # End trace - AgentOps aggregates all operations + agentops.end_trace(end_state="success") + +except Exception as e: + agentops.end_trace(end_state="error") + +``` + + +## What You'll See in AgentOps + +When using Memori with AgentOps, your dashboard will show: + +1. **Conversation Timeline**: Complete flow of all conversations with memory context +2. **Memory Injection Analytics**: Track when and how much context is automatically added +3. **Context Relevance**: Monitor the effectiveness of automatic memory retrieval +4. **Performance Metrics**: Latency impact of memory operations on LLM calls +5. **Token Usage**: Track additional tokens consumed by memory context +6. **Memory Growth**: Visualize how conversation history accumulates over time +7. **Error Tracking**: Failed memory operations with full error context + +## Key Benefits of Memori + AgentOps + +- **Zero-Effort Memory**: Memori automatically handles conversation recording +- **Intelligent Context**: Only relevant memory is injected into LLM context +- **Complete Visibility**: AgentOps tracks all automatic memory operations +- **Performance Monitoring**: Understand the cost/benefit of automatic memory +- **Debugging Support**: Full traceability of memory decisions and context injection + + + + + diff --git a/docs/v2/introduction.mdx b/docs/v2/introduction.mdx index 0625d838c..d603828f3 100644 --- a/docs/v2/introduction.mdx +++ b/docs/v2/introduction.mdx @@ -49,6 +49,7 @@ The AgentOps app is open source. Browse the code or contribute in our } iconType="image" href="/v2/integrations/ibm_watsonx_ai" /> } iconType="image" href="/v2/integrations/xai" /> } iconType="image" href="/v2/integrations/mem0" /> + } iconType="image" href="/v2/integrations/memori" /> Observability and monitoring for your AI agents and LLM apps. And we do it all in just two lines of code...