diff --git a/.github/ISSUE_TEMPLATE/1.bug_report.yml b/.github/ISSUE_TEMPLATE/1.bug_report.yml
new file mode 100644
index 0000000..5d9c141
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/1.bug_report.yml
@@ -0,0 +1,27 @@
+name: Bug report
+description: Report a bug for Langbase.
+labels: []
+body:
+    - type: markdown
+      attributes:
+          value: |
+              This template is to report bugs for the Langbase. If you need help with your own project, feel free to [start a new thread in our discord forum](https://langbase.com/discord).
+    - type: textarea
+      attributes:
+          label: Description
+          description: A detailed bug description for Langbase and steps to reproduce it. Include the API, framework, and AI provider you're using.
+          placeholder: |
+              Steps to reproduce...
+      validations:
+          required: true
+    - type: textarea
+      attributes:
+          label: Code example
+          description: Provide an example code snippet that may have a problem
+          placeholder: |
+              ...
+    - type: textarea
+      attributes:
+          label: Additional context
+          description: |
+              Any additional information that might help us investigate.
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/2.feature_request.yml b/.github/ISSUE_TEMPLATE/2.feature_request.yml
new file mode 100644
index 0000000..aa5ead0
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/2.feature_request.yml
@@ -0,0 +1,25 @@
+name: Feature Request
+description: Propose a new feature for Langbase.
+labels: []
+body:
+    - type: markdown
+      attributes:
+          value: |
+              Use this template to propose new features for Langbase. If you need help with your own project, feel free to [start a new thread in our discord forum](https://langbase.com/discord).
+    - type: textarea
+      attributes:
+          label: Feature Description
+          description: Describe the feature you are proposing. Include the API, framework, and AI provider.
+          placeholder: Feature description...
+      validations:
+          required: true
+    - type: textarea
+      attributes:
+          label: Use Case
+          description: Explain how this feature would be beneficial.
+          placeholder: Use case...
+    - type: textarea
+      attributes:
+          label: Additional Context
+          description: Any additional information that might help us understand your request.
+          placeholder: Additional context...
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..3401e89
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: false
+contact_links:
+    - name: Ask a Question
+      url: https://langbase.com/discord
+      about: Please ask your questions in our discord forum.
\ No newline at end of file
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..d8ea2eb
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,7 @@
+## TLDR
+
+<!-- Add a brief description of what this pull request changes and why and any important things for reviewers to look at -->
+
+## Dive Deeper
+
+<!-- more thoughts and in depth discussion here -->
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..b89e156
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,27 @@
+name: Test
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements-dev.txt
+
+    - name: Run tests
+      run: |
+        pytest
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..1459385
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,26 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-toml
+      - id: debug-statements
+      - id: mixed-line-ending
+
+  - repo: https://github.com/psf/black
+    rev: 23.12.1
+    hooks:
+      - id: black
+        language_version: python3
+        args: [.]
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        args: [--profile=black, --line-length=88]
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..c32ecc4
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,196 @@
+# Contributing to Langbase Python SDK
+
+Thank you for your interest in contributing to the Langbase Python SDK! We welcome contributions from the community.
+
+## Getting Started
+
+### Prerequisites
+
+- Python 3.7 or higher
+- pip package manager
+- git
+
+### Development Setup
+
+1. **Fork and clone the repository**
+   ```bash
+   git clone https://github.com/langbase/langbase-python-sdk
+   cd langbase-python-sdk
+   ```
+
+2. **Create a virtual environment**
+   ```bash
+   python3 -m venv .venv
+   source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+   ```
+    ### Note:
+    Check version of pip
+    ```bash
+    pip --version
+    ```
+    **If it's pip 21.3 or lower, you need to upgrade it.**
+    ```bash
+    pip install --upgrade pip
+   ```
+
+3. **Install the package in development mode**
+   ```bash
+   pip install -e .
+   ```
+
+4. **Install development dependencies**
+   ```bash
+   pip install -r requirements-dev.txt
+   ```
+
+5. **Install pre-commit hooks**
+   ```bash
+   pre-commit install
+   ```
+
+## Before You Commit
+
+**IMPORTANT**: All code must pass quality checks before committing. Run these commands:
+
+### Format Your Code
+```bash
+# Auto-format with Black (required)
+black langbase/ tests/ examples/
+
+# Sort imports with isort (required)
+isort langbase/ tests/ examples/
+```
+
+
+### 4. Run Tests
+```bash
+# Run all tests
+pytest
+
+# Run with coverage
+pytest --cov=langbase
+
+# Run specific test file
+pytest tests/test_pipes.py
+
+# Run in verbose mode
+pytest -v
+```
+
+### 5. Run All Checks at Once
+```bash
+# This runs all pre-commit hooks (black, isort)
+pre-commit run --all-files
+```
+
+## Quick Checklist
+
+Before pushing your changes, ensure:
+
+- [ ] ✅ Code is formatted with `black`
+- [ ] ✅ Imports are sorted with `isort`
+- [ ] ✅ All tests pass with `pytest`
+- [ ] ✅ New features have tests
+- [ ] ✅ New features have type hints
+- [ ] ✅ Documentation is updated if needed
+
+## Making Changes
+
+### 1. Create a Feature Branch
+```bash
+git checkout -b feature/your-feature-name
+```
+
+### 2. Make Your Changes
+- Write clean, readable code
+- Add type hints to all functions
+- Follow existing code patterns
+- Add docstrings to public functions
+
+### 3. Add Tests
+- Write tests for new features
+- Ensure existing tests still pass
+- Aim for good test coverage
+
+### 4. Update Documentation
+- Update README.md if adding new features
+- Update docstrings
+- Add examples if applicable
+
+### 5. Commit Your Changes
+```bash
+# Stage your changes
+git add .
+
+# Commit with a descriptive message
+git commit -m "📖 DOC: Improved contribution docs"
+```
+
+Follow conventional commit format:
+- `📦 NEW:` New feature
+- `🐛 BUG:` Bug fix
+- `📖 Docs:` Documentation changes
+- `👌🏻 IMP:` Improvements
+
+### 6. Push and Create PR
+```bash
+git push origin feature/your-feature-name
+```
+
+Then create a Pull Request on GitHub.
+
+## Code Style Guide
+
+### Type Hints
+All functions should have type hints:
+```python
+def process_data(input_text: str, max_length: int = 100) -> Dict[str, Any]:
+    """Process input text and return results."""
+    ...
+```
+
+### Docstrings
+Use Google-style docstrings:
+```python
+def my_function(param1: str, param2: int) -> bool:
+    """
+    Brief description of function.
+
+    Args:
+        param1: Description of param1
+        param2: Description of param2
+
+    Returns:
+        Description of return value
+    ...
+```
+
+
+## Testing Guidelines
+
+### Writing Tests
+- Use pytest for all tests
+- Use descriptive test names
+- Test both success and error cases
+- Use fixtures for common setup
+
+Example:
+```python
+def test_pipe_run_with_invalid_name_raises_error(langbase_client):
+    """Test that running a pipe with invalid name raises appropriate error."""
+    with pytest.raises(NotFoundError) as exc_info:
+        langbase_client.pipes.run(name="non-existent-pipe")
+
+    assert "404" in str(exc_info.value)
+```
+
+## Need Help?
+
+- Check existing issues and PRs
+- Read the [documentation](https://langbase.com/docs)
+- Ask in our [Discord community](https://discord.gg/langbase)
+- Open an issue for bugs or feature requests
+
+## License
+
+By contributing, you agree that your contributions will be licensed under the MIT License.
diff --git a/LICENCE b/LICENCE
new file mode 100644
index 0000000..e58e0ad
--- /dev/null
+++ b/LICENCE
@@ -0,0 +1,13 @@
+Copyright 2023 Langbase, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
\ No newline at end of file
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 30c1094..0000000
--- a/Makefile
+++ /dev/null
@@ -1,75 +0,0 @@
-.PHONY: clean clean-build clean-pyc help test lint format build
-.DEFAULT_GOAL := help
-
-define PRINT_HELP_PYSCRIPT
-import re, sys
-
-for line in sys.stdin:
-	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
-	if match:
-		target, help = match.groups()
-		print("%-20s %s" % (target, help))
-endef
-export PRINT_HELP_PYSCRIPT
-
-help:
-	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
-
-clean: clean-build clean-pyc ## remove all build, test, coverage and Python artifacts
-
-clean-build: ## remove build artifacts
-	rm -fr build/
-	rm -fr dist/
-	rm -fr .eggs/
-	find . -name '*.egg-info' -exec rm -fr {} +
-	find . -name '*.egg' -exec rm -f {} +
-
-clean-pyc: ## remove Python file artifacts
-	find . -name '*.pyc' -exec rm -f {} +
-	find . -name '*.pyo' -exec rm -f {} +
-	find . -name '*~' -exec rm -f {} +
-	find . -name '__pycache__' -exec rm -fr {} +
-
-lint: ## check style with flake8
-	flake8 langbase tests examples
-
-format: ## format code with black and isort
-	black langbase tests examples
-	isort langbase tests examples
-
-test: ## run tests
-	pytest
-
-test-cov: ## run tests with coverage report
-	pytest --cov=langbase --cov-report=term --cov-report=html
-
-venv: ## create virtual environment
-	python -m venv venv
-	@echo "Run 'source venv/bin/activate' to activate the virtual environment"
-
-dev-install: ## install the package in development mode
-	pip install -e ".[dev]"
-
-build: clean ## build the package
-	python -m build
-
-publish-test: build ## publish package to TestPyPI
-	twine upload --repository-url https://test.pypi.org/legacy/ dist/*
-
-publish: build ## publish package to PyPI
-	twine upload dist/*
-
-install-test: ## install package from TestPyPI
-	pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple langbase
-
-examples: ## run examples
-	@echo "Running examples..."
-	@for example in $(shell find examples -name "*.py" | sort); do \
-		echo "\nRunning $${example}:"; \
-		python $${example}; \
-	done
-
-docs: ## generate Sphinx documentation
-	sphinx-apidoc -o docs/source langbase
-	$(MAKE) -C docs clean
-	$(MAKE) -C docs html
diff --git a/README.md b/README.md
index 011ff49..caf30a8 100644
--- a/README.md
+++ b/README.md
@@ -1,229 +1,224 @@
-# Langbase Python SDK: Setup Guide
+# Langbase Python SDK
 
-This document provides instructions for setting up the development environment, testing the SDK, and publishing it to PyPI.
+[![PyPI version](https://badge.fury.io/py/langbase.svg)](https://badge.fury.io/py/langbase)
+[![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
-## Local Development Setup
+The official Python SDK for [Langbase](https://langbase.com) - Build declarative and composable AI-powered LLM products with ease.
 
-### Prerequisites
+## Documentation
 
-- Python 3.7 or higher
-- pip (Python package installer)
-- virtualenv (recommended)
+Check the [Langbase SDK documentation](https://langbase.com/docs/sdk) for more details.
 
-### Setting Up the Development Environment
+The following examples are for reference only. Prefer docs for the latest information.
 
-1. **Clone the repository**:
-   ```bash
-   git clone https://github.com/LangbaseInc/langbase-sdk-python
-   cd langbase-sdk-python
-   ```
+## Features
 
-2. **Create and activate a virtual environment**:
-   ```bash
-   python -m venv venv
+- 🚀 **Simple and intuitive API** - Get started in minutes
+- 🔄 **Streaming support** - Real-time text generation with typed events
+- 🛠️ **Type safety** - Full type hints for better IDE support
+- 📦 **Minimal dependencies** - Only what you need
+- 🐍 **Python 3.7+** - Support for modern Python versions
+- 🔌 **Async ready** - Coming soon!
 
-   # On Unix/macOS
-   source venv/bin/activate
+## Installation
 
-   # On Windows
-   venv\Scripts\activate
-   ```
+```bash
+pip install langbase
+```
 
-3. **Install development dependencies**:
-   ```bash
-   pip install -e ".[dev]"
-   # Or
-   pip install -r requirements-dev.txt
-   ```
+## Quick Start
 
-4. **Create a `.env` file**:
-   ```bash
-   cp .env.example .env
-   ```
+### 1. Set up your API key
 
-   Then edit the `.env` file to include your API keys.
+Create a `.env` file and add your [Langbase API Key](https://langbase.com/docs/api-reference/api-keys).
+```bash
+LANGBASE_API_KEY="your-api-key"
+```
 
-## Running Tests
+---
 
-The SDK uses pytest for testing. To run the tests:
+### 2. Initialize the client
 
-```bash
-# Run all tests
-pytest
+```python
+from langbase import Langbase
+import os
+from dotenv import load_dotenv
 
-# Run specific tests
-pytest tests/test_client.py
+load_dotenv()
 
-# Run with coverage
-pytest --cov=langbase
+# Get API key from environment variable
+langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+# Initialize the client
+lb = Langbase(api_key=langbase_api_key)
 ```
 
-## Building the Package
+### 3. Generate text
 
-To build the package:
+```python
+# Simple generation
+response = lb.pipes.run(
+    name="ai-agent",
+    messages=[{"role": "user", "content": "Tell me about AI"}],
+)
 
-```bash
-python -m build
+print(response["completion"])
 ```
 
-This will create both source distributions and wheel distributions in the `dist/` directory.
+---
 
-## Testing the Package Locally
+### 4. Stream text (Simple)
 
-You can test the package locally without publishing to PyPI:
+```python
+# Stream text as it's generated
+response = lb.pipes.run(
+    name="ai-agent",
+    messages=[{"role": "user", "content": "Tell me about AI"}],
+    stream=True,
+)
 
-```bash
-# Install in development mode
-pip install -e .
+for text in stream_text(response["stream"]):
+    print(text, end="", flush=True)
 ```
 
-Then you can run examples:
-
-```
-./venv/bin/python examples/pipes/pipes.run.py
-```
+### 5. Stream with typed events (Advanced) 🆕
 
-## Publishing to PyPI
+```python
+from langbase import StreamEventType, get_typed_runner
 
-### Prerequisites
+# Get streaming response
+response = lb.pipes.run(
+    name="ai-agent",
+    messages=[{"role": "user", "content": "Tell me about AI"}],
+    stream=True,
+)
 
-- A PyPI account
-- twine package (`pip install twine`)
+# Create typed stream processor
+runner = get_typed_runner(response)
 
-### Steps to Publish
+# Register event handlers
+runner.on(StreamEventType.CONNECT, lambda e:
+    print(f"✓ Connected to thread: {e['threadId']}"))
 
-1. **Make sure your package version is updated**:
-   - Update the version number in `langbase/__init__.py`
+runner.on(StreamEventType.CONTENT, lambda e:
+    print(e["content"], end="", flush=True))
 
-2. **Build the package**:
-   ```bash
-   python -m build
-   ```
+runner.on(StreamEventType.TOOL_CALL, lambda e:
+    print(f"\n🔧 Tool: {e['toolCall']['function']['name']}"))
 
-If it doesn't work, try installing the latest version of `build`:
+runner.on(StreamEventType.END, lambda e:
+    print(f"\n⏱️  Duration: {e['duration']:.2f}s"))
 
-```bash
-pip install build
+# Process the stream
+runner.process()
 ```
 
-And then run:
+## Core Features
 
-```bash
-./venv/bin/python -m build
-```
+### 🔄 Pipes - AI Pipeline Execution
 
-3. **Check the package**:
-   ```bash
-   twine check dist/*
-   ```
-
-4. **Upload to TestPyPI (optional but recommended)**:
-   ```bash
-   twine upload --repository-url https://test.pypi.org/legacy/ dist/*
-   ```
-
-5. **Test the TestPyPI package**:
-   ```bash
-   pip install --index-url https://test.pypi.org/simple/ langbase
-   ```
-
-6. **Upload to PyPI**:
-   ```bash
-   twine upload dist/*
-   ```
-
-## Automating Releases with GitHub Actions
-
-For automated releases, you can use GitHub Actions. Create a workflow file at `.github/workflows/publish.yml` with the following content:
-
-```yaml
-name: Publish to PyPI
-
-on:
-  release:
-    types: [published]
-
-jobs:
-  build-and-publish:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.x'
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install build twine
-      - name: Build and publish
-        env:
-          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
-        run: |
-          python -m build
-          twine upload dist/*
-```
+```python
+# List all pipes
+pipes = lb.pipes.list()
 
-## Project Structure
+# Run a pipe
+response = lb.pipes.run(
+    name="ai-agent",
+    messages=[{"role": "user", "content": "Hello!"}],
+    variables={"style": "friendly"},  # Optional variables
+    stream=True,  # Enable streaming
+)
+```
 
-The project follows this structure:
+### 🧠 Memory - Persistent Context Storage
+
+```python
+# Create a memory
+memory = lb.memories.create(
+    name="product-docs",
+    description="Product documentation",
+)
+
+# Upload documents
+lb.memories.documents.upload(
+    memory_name="product-docs",
+    document_name="guide.pdf",
+    document=open("guide.pdf", "rb"),
+    content_type="application/pdf",
+)
+
+# Retrieve relevant context
+results = lb.memories.retrieve(
+    query="How do I get started?",
+    memory=[{"name": "product-docs"}],
+    top_k=3,
+)
+```
 
+### 🤖 Agent - LLM Agent Execution
+
+```python
+# Run an agent with tools
+response = lb.agent.run(
+    model="openai:gpt-4",
+    messages=[{"role": "user", "content": "Search for AI news"}],
+    tools=[{"type": "function", "function": {...}}],
+    tool_choice="auto",
+    api_key="your-llm-api-key",
+    stream=True,
+)
 ```
-langbase-python/
-├── langbase/                   # Main package
-│   ├── __init__.py             # Package initialization
-│   ├── client.py               # Main client implementation
-│   ├── request.py              # HTTP request handling
-│   ├── errors.py               # Error classes
-│   ├── types.py                # Type definitions
-│   └── utils.py                # Utility functions
-├── tests/                      # Test package
-│   ├── __init__.py             # Test package initialization
-│   ├── test_client.py          # Tests for the client
-│   ├── test_request.py         # Tests for request handling
-│   ├── test_errors.py          # Tests for error classes
-│   └── test_utils.py           # Tests for utility functions
-├── examples/                   # Example scripts
-├── setup.py                    # Package setup script
-├── pyproject.toml              # Project configuration
-├── requirements.txt            # Package dependencies
-├── requirements-dev.txt        # Development dependencies
-├── LICENSE                     # MIT license
-└── README.md                   # Main documentation
+
+### 🔧 Tools - Built-in Utilities
+
+```python
+# Chunk text for processing
+chunks = lb.chunker(
+    content="Long text to split...",
+    chunk_max_length=1024,
+    chunk_overlap=256,
+)
+
+# Generate embeddings
+embeddings = lb.embed(
+    chunks=["Text 1", "Text 2"],
+    embedding_model="openai:text-embedding-3-small",
+)
+
+# Parse documents
+content = lb.parser(
+    document=open("document.pdf", "rb"),
+    document_name="document.pdf",
+    content_type="application/pdf",
+)
 ```
 
-## Contributing
+## Examples
 
-Contributions are welcome! Please feel free to submit a Pull Request.
+Explore the [examples](./examples) directory for complete working examples:
 
-1. Fork the repository
-2. Create your feature branch (`git checkout -b feature/amazing-feature`)
-3. Commit your changes (`git commit -m 'Add some amazing feature'`)
-4. Push to the branch (`git push origin feature/amazing-feature`)
-5. Open a Pull Request
+- [Generate text](./examples/pipes/pipes.run.py)
+- [Stream text with events](./examples/pipes/pipes.run.typed-stream.py)
+- [Work with memory](./examples/memory/)
+- [Agent with tools](./examples/agent/)
+- [Document processing](./examples/parser/)
+- [Workflow automation](./examples/workflow/)
 
-## Troubleshooting
+## API Reference
 
-### Common Issues
+For detailed API documentation, visit [langbase.com/docs/sdk](https://langbase.com/docs/sdk).
 
-1. **Package not found after installation**:
-   - Make sure your virtual environment is activated
-   - Try running `pip list` to confirm installation
+## Contributing
 
-2. **Build errors**:
-   - Make sure you have the latest `build` package: `pip install --upgrade build`
-   - Check for syntax errors in your code
+We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
 
-3. **Test failures**:
-   - Run specific failing tests to get more details
-   - Check for API key issues if integration tests are failing
+## Support
 
-### Getting Help
+- 📚 [Documentation](https://langbase.com/docs)
+- 💬 [Discord Community](https://langbase.com/discord)
+- 🐛 [Issue Tracker](https://github.com/LangbaseInc/langbase-python-sdk/issues)
 
-If you encounter issues not covered here, please open an issue on GitHub with detailed information about the problem, including:
+## License
 
-- Your Python version
-- Your operating system
-- Any error messages
-- Steps to reproduce the issue
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
diff --git a/examples/agent/agent.run.mcp.py b/examples/agent/agent.run.mcp.py
new file mode 100644
index 0000000..4c28e57
--- /dev/null
+++ b/examples/agent/agent.run.mcp.py
@@ -0,0 +1,53 @@
+"""
+Run Agent with MCP
+
+This example demonstrates how to run an agent with MCP (Model Context Protocol).
+"""
+
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+load_dotenv()
+
+
+def main():
+    # Check for required environment variables
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    llm_api_key = os.environ.get("LLM_API_KEY")
+
+    if not langbase_api_key:
+        print("❌ Missing LANGBASE_API_KEY in environment variables.")
+        exit(1)
+
+    if not llm_api_key:
+        print("❌ Missing LLM_API_KEY in environment variables.")
+        exit(1)
+
+    # Initialize Langbase client
+    langbase = Langbase(api_key=langbase_api_key)
+
+    # Run the agent with MCP server
+    response = langbase.agent.run(
+        stream=False,
+        model="openai:gpt-4.1-mini",
+        api_key=llm_api_key,
+        instructions="You are a helpful assistant that help users summarize text.",
+        input=[
+            {
+                "role": "user",
+                "content": "What transport protocols does the 2025-03-26 version of the MCP spec (modelcontextprotocol/modelcontextprotocol) support?",
+            }
+        ],
+        mcp_servers=[
+            {"type": "url", "name": "deepwiki", "url": "https://mcp.deepwiki.com/sse"}
+        ],
+    )
+
+    print("response:", response.get("output"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/agent/agent.run.memory.py b/examples/agent/agent.run.memory.py
new file mode 100644
index 0000000..ffa0333
--- /dev/null
+++ b/examples/agent/agent.run.memory.py
@@ -0,0 +1,92 @@
+"""
+Run Agent with Memory
+
+This example demonstrates how to retrieve and attach memory to an agent call.
+"""
+
+import os
+from io import BytesIO
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+load_dotenv()
+
+
+def main():
+    # Check for required environment variables
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    llm_api_key = os.environ.get("LLM_API_KEY")
+
+    if not langbase_api_key:
+        print("❌ Missing LANGBASE_API_KEY in environment variables.")
+        exit(1)
+
+    if not llm_api_key:
+        print("❌ Missing LLM_API_KEY in environment variables.")
+        exit(1)
+
+    # Initialize Langbase client
+    langbase = Langbase(api_key=langbase_api_key)
+
+    create_memory()
+
+    # Step 1: Retrieve memory
+    memory_response = langbase.memories.retrieve(
+        memory=[{"name": "career-advisor-memory"}],
+        query="Who is an AI Engineer?",
+        top_k=2,
+    )
+
+    # Step 2: Run the agent with the retrieved memory
+    response = langbase.agent.run(
+        model="openai:gpt-4.1",
+        api_key=llm_api_key,
+        instructions="You are a career advisor who helps users understand AI job roles.",
+        input=[
+            {
+                "role": "user",
+                "content": f"{memory_response}\n\nNow, based on the above, who is an AI Engineer?",
+            }
+        ],
+    )
+
+    # Step 3: Display output
+    print("Agent Response:", response.get("output"))
+
+
+def create_memory():
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    langbase = Langbase(api_key=langbase_api_key)
+
+    memories = langbase.memories.list()
+    memory_names = [memory["name"] for memory in memories]
+    career_advisor_memory_name = "career-advisor-memory"
+
+    if career_advisor_memory_name not in memory_names:
+        memory = langbase.memories.create(
+            name="career-advisor-memory",
+            description="A memory for the career advisor agent",
+        )
+
+        print("Memory created: ", memory)
+
+    content = """
+    An AI Engineer is a software engineer who specializes in building AI systems.
+    """
+
+    content_buffer = BytesIO(content.encode("utf-8"))
+
+    langbase.memories.documents.upload(
+        memory_name="career-advisor-memory",
+        document_name="career-advisor-document.txt",
+        document=content_buffer,
+        content_type="text/plain",
+    )
+
+    print("Document uploaded")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/agent/agent.run.py b/examples/agent/agent.run.py
new file mode 100644
index 0000000..53514d0
--- /dev/null
+++ b/examples/agent/agent.run.py
@@ -0,0 +1,47 @@
+"""
+Run Agent
+
+This example demonstrates how to run an agent with a user message.
+"""
+
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+load_dotenv()
+
+
+def main():
+    # Check for required environment variables
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    llm_api_key = os.environ.get("LLM_API_KEY")
+
+    if not langbase_api_key:
+        print("❌ Missing LANGBASE_API_KEY in environment variables.")
+        print("Please set: export LANGBASE_API_KEY='your_langbase_api_key'")
+        exit(1)
+
+    if not llm_api_key:
+        print("❌ Missing LLM_API_KEY in environment variables.")
+        print("Please set: export LLM_API_KEY='your_llm_api_key'")
+        exit(1)
+
+    # Initialize Langbase client
+    langbase = Langbase(api_key=langbase_api_key)
+
+    # Run the agent
+    response = langbase.agent.run(
+        stream=False,
+        model="openai:gpt-4.1-mini",
+        api_key=llm_api_key,
+        instructions="You are a helpful assistant that help users summarize text.",
+        input=[{"role": "user", "content": "Who is an AI Engineer?"}],
+    )
+
+    print("response:", response.get("output"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/agent/agent.run.stream.py b/examples/agent/agent.run.stream.py
new file mode 100644
index 0000000..1a82d41
--- /dev/null
+++ b/examples/agent/agent.run.stream.py
@@ -0,0 +1,65 @@
+"""
+Run Agent Streaming with get_runner
+
+This example demonstrates how to run an agent with streaming response using get_runner.
+"""
+
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase, get_runner
+
+load_dotenv()
+
+
+def main():
+    # Check for required environment variables
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    api_key = os.environ.get("LLM_API_KEY")
+
+    if not langbase_api_key:
+        print("❌ Missing LANGBASE_API_KEY in environment variables.")
+        print("Please set: export LANGBASE_API_KEY='your_langbase_api_key'")
+        exit(1)
+
+    # Initialize Langbase client
+    langbase = Langbase(api_key=langbase_api_key)
+
+    try:
+        # Get readable stream - equivalent to const {stream} = await langbase.agent.run(...)
+        response = langbase.agent.run(
+            stream=True,
+            model="openai:gpt-4.1-mini",
+            instructions="You are a helpful assistant that help users summarize text.",
+            input=[{"role": "user", "content": "Who is an AI Engineer?"}],
+            api_key=api_key,
+        )
+
+        # Convert the stream to a stream runner - equivalent to getRunner(stream)
+        runner = get_runner(response)
+
+        # Event-like handling in Python
+        # Method 1: Using iterator pattern (Python equivalent of event listeners)
+
+        # Equivalent to runner.on('connect', ...)
+        print("Stream started.\n")
+
+        try:
+            # Equivalent to runner.on('content', content => {...})
+            for content in runner.text_generator():
+                print(content, end="", flush=True)
+
+            # Equivalent to runner.on('end', ...)
+            print("\nStream ended.")
+
+        except Exception as error:
+            # Equivalent to runner.on('error', error => {...})
+            print(f"Error: {error}")
+
+    except Exception as e:
+        print(f"Error: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/agent/agent.run.structured.py b/examples/agent/agent.run.structured.py
new file mode 100644
index 0000000..c5c472f
--- /dev/null
+++ b/examples/agent/agent.run.structured.py
@@ -0,0 +1,76 @@
+"""
+Run Agent with Structured Output
+
+This example demonstrates how to run an agent with structured output.
+"""
+
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+load_dotenv()
+
+
+def main():
+    # Check for required environment variables
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    llm_api_key = os.environ.get("LLM_API_KEY")
+
+    if not langbase_api_key:
+        print("❌ Missing LANGBASE_API_KEY in environment variables.")
+        exit(1)
+
+    if not llm_api_key:
+        print("❌ Missing LLM_API_KEY in environment variables.")
+        exit(1)
+
+    # Initialize Langbase client
+    langbase = Langbase(api_key=langbase_api_key)
+
+    # Define the structured output JSON schema
+    math_reasoning_schema = {
+        "type": "object",
+        "properties": {
+            "steps": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "explanation": {"type": "string"},
+                        "output": {"type": "string"},
+                    },
+                    "required": ["explanation", "output"],
+                },
+            },
+            "final_answer": {"type": "string"},
+        },
+        "required": ["steps", "final_answer"],
+    }
+
+    # Run the agent with structured output
+    response = langbase.agent.run(
+        model="openai:gpt-4.1",
+        api_key=llm_api_key,
+        instructions="You are a helpful math tutor. Guide the user through the solution step by step.",
+        input=[{"role": "user", "content": "How can I solve 8x + 22 = -23?"}],
+        response_format={
+            "type": "json_schema",
+            "json_schema": {"name": "math_reasoning", "schema": math_reasoning_schema},
+        },
+    )
+
+    # Parse and display the structured response
+    try:
+        solution = json.loads(response.get("output", "{}"))
+        print("✅ Structured Output Response:")
+        print(json.dumps(solution, indent=2))
+    except json.JSONDecodeError as e:
+        print(f"❌ Error parsing JSON response: {e}")
+        print(f"Raw response: {response.get('output')}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/agent/agent.run.tool.py b/examples/agent/agent.run.tool.py
new file mode 100644
index 0000000..c652bbe
--- /dev/null
+++ b/examples/agent/agent.run.tool.py
@@ -0,0 +1,198 @@
+"""
+Run Agent with Tool
+
+This example demonstrates how to run an agent that can call a tool —
+in this case, a function that sends an email using the Resend API.
+"""
+
+import json
+import os
+
+import requests
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+load_dotenv()
+
+# Define the tool schema for sending emails
+send_email_tool_schema = {
+    "type": "function",
+    "function": {
+        "name": "send_email",
+        "description": "Send an email using Resend API",
+        "parameters": {
+            "type": "object",
+            "required": ["from", "to", "subject", "html", "text"],
+            "properties": {
+                "from": {"type": "string"},
+                "to": {"type": "string"},
+                "subject": {"type": "string"},
+                "html": {"type": "string"},
+                "text": {"type": "string"},
+            },
+            "additionalProperties": False,
+        },
+    },
+}
+
+
+# Actual tool function
+def send_email(args):
+    """Send an email using the Resend API."""
+    from_email = args.get("from")
+    to_email = args.get("to")
+    subject = args.get("subject")
+    html = args.get("html")
+    text = args.get("text")
+
+    # response = requests.post(
+    #     "https://api.resend.com/emails",
+    #     headers={
+    #         "Authorization": f"Bearer {os.environ.get('RESEND_API_KEY')}",
+    #         "Content-Type": "application/json",
+    #     },
+    #     json={
+    #         "from": from_email,
+    #         "to": to_email,
+    #         "subject": subject,
+    #         "html": html,
+    #         "text": text,
+    #     },
+    # )
+
+    # if not response.ok:
+    #     raise Exception("Failed to send email")
+
+    return f"✅ Email sent successfully to {to_email}!"
+
+
+def main():
+    # Check for required environment variables
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    llm_api_key = os.environ.get("LLM_API_KEY")
+    # resend_api_key = os.environ.get("RESEND_API_KEY")
+
+    if not langbase_api_key:
+        print("❌ Missing LANGBASE_API_KEY in environment variables.")
+        exit(1)
+
+    if not llm_api_key:
+        print("❌ Missing LLM_API_KEY in environment variables.")
+        exit(1)
+
+    # if not resend_api_key:
+    #     print("❌ Missing RESEND_API_KEY in environment variables.")
+    #     exit(1)
+
+    # Initialize Langbase client
+    langbase = Langbase(api_key=langbase_api_key)
+
+    recipient_info = {"email": "sam@example.com"}
+
+    email = {
+        "subject": "Welcome to Langbase!",
+        "html_email": "Hello Sam! Welcome to Langbase.",
+        "full_email": "Hello Sam! Welcome to Langbase.",
+    }
+
+    input_messages = [{"role": "user", "content": "Send a welcome email to Sam."}]
+
+    # Initial run with tool
+    response = langbase.agent.run(
+        model="openai:gpt-4.1-mini",
+        api_key=llm_api_key,
+        instructions="You are an email agent. You are given a task to send an email to a recipient. You have the ability to send an email using the send_email tool.",
+        input=input_messages,
+        tools=[send_email_tool_schema],
+        stream=False,
+    )
+
+    # Check if response contains choices (for tool calls)
+    choices = response.get("choices", [])
+
+    print("\n📨 Initial Response:")
+    print(
+        f"Output: {response.get('output', 'No direct output - checking for tool calls...')}"
+    )
+
+    if not choices:
+        print("❌ No choices found in response")
+        return
+
+    # Push agent tool call to messages
+    input_messages.append(choices[0].get("message", {}))
+
+    # Detect tool call
+    tool_calls = choices[0].get("message", {}).get("tool_calls", [])
+    has_tool_calls = tool_calls and len(tool_calls) > 0
+
+    if has_tool_calls:
+        print(f"\n🔧 Tool calls detected: {len(tool_calls)}")
+
+        for i, tool_call in enumerate(tool_calls, 1):
+            # Process each tool call
+            function = tool_call.get("function", {})
+            name = function.get("name")
+            args = function.get("arguments")
+
+            print(f"\n  Tool Call #{i}:")
+            print(f"  - Name: {name}")
+            print(f"  - Raw Arguments: {args}")
+
+            try:
+                parsed_args = json.loads(args)
+                print(f"  - Parsed Arguments: {json.dumps(parsed_args, indent=4)}")
+            except json.JSONDecodeError:
+                print(f"  ❌ Error parsing tool call arguments: {args}")
+                continue
+
+            # Set email parameters
+            print("\n  📧 Preparing email with full details...")
+            parsed_args["from"] = "onboarding@resend.dev"
+            parsed_args["to"] = recipient_info["email"]
+            parsed_args["subject"] = email["subject"]
+            parsed_args["html"] = email["html_email"]
+            parsed_args["text"] = email["full_email"]
+
+            print(f"  - From: {parsed_args['from']}")
+            print(f"  - To: {parsed_args['to']}")
+            print(f"  - Subject: {parsed_args['subject']}")
+
+            # Execute the tool
+            try:
+                print(f"\n  ⚡ Executing {name}...")
+                result = send_email(parsed_args)
+                print(f"  ✅ Tool result: {result}")
+
+                # Add tool result to messages
+                input_messages.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": tool_call.get("id"),
+                        "name": name,
+                        "content": result,
+                    }
+                )
+            except Exception as e:
+                print(f"  ❌ Error executing tool: {e}")
+                continue
+
+    print("\n🤖 Sending tool results back to agent for final response...")
+
+    # Final agent response with tool result
+    final_response = langbase.agent.run(
+        model="openai:gpt-4.1-mini",
+        api_key=llm_api_key,
+        instructions="You are an email sending assistant. Confirm the email has been sent successfully.",
+        input=input_messages,
+        stream=False,
+    )
+
+    print("\n✨ Final Response:")
+    print(f"Agent: {final_response.get('output')}")
+    print("\n" + "=" * 50)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/agent/agent.run.workflow.py b/examples/agent/agent.run.workflow.py
new file mode 100644
index 0000000..b939d12
--- /dev/null
+++ b/examples/agent/agent.run.workflow.py
@@ -0,0 +1,426 @@
+"""
+Example: Using Langbase Workflow for multi-step AI operations.
+
+This example demonstrates how to use the Workflow class to orchestrate
+complex multi-step AI operations with retry logic, timeouts, and error handling.
+"""
+
+import asyncio
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase, Workflow
+
+load_dotenv()
+
+
+async def main():
+    """
+    Demonstrate various workflow capabilities with Langbase operations.
+    """
+    print("🚀 Langbase Workflow Example")
+    print("=" * 50)
+
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    llm_api_key = os.environ.get("LLM_API_KEY")
+
+    if not langbase_api_key:
+        print("❌ Missing LANGBASE_API_KEY in environment variables.")
+        exit(1)
+
+    if not llm_api_key:
+        print("❌ Missing LLM_API_KEY in environment variables.")
+        print("Please set: export LLM_API_KEY='your_llm_api_key'")
+        exit(1)
+
+    # Initialize Langbase client and Workflow
+    langbase = Langbase(api_key=langbase_api_key)
+    workflow = Workflow(debug=True)  # Enable debug mode for visibility
+
+    # Example 1: Basic step execution
+    print("\n📝 Example 1: Basic Step Execution")
+    print("-" * 30)
+
+    async def generate_summary():
+        """Generate a summary using Langbase."""
+        response = langbase.agent.run(
+            input="Summarize the benefits of AI in healthcare.",
+            model="openai:gpt-4o-mini",
+            api_key=os.environ.get("LLM_API_KEY"),
+        )
+        return response["output"]
+
+    try:
+        summary = await workflow.step(
+            {"id": "generate_summary", "run": generate_summary}
+        )
+        print(f"✅ Summary generated: {summary[:100]}...")
+    except Exception as e:
+        print(f"❌ Failed to generate summary: {e}")
+
+    # Example 2: Step with timeout
+    print("\n⏰ Example 2: Step with Timeout")
+    print("-" * 30)
+
+    async def generate_with_timeout():
+        """Generate content with potential timeout."""
+        response = langbase.agent.run(
+            input="Write a detailed story about space exploration.",
+            model="openai:gpt-4o-mini",
+            api_key=os.environ.get("LLM_API_KEY"),
+        )
+        return response["output"]
+
+    try:
+        story = await workflow.step(
+            {
+                "id": "generate_story",
+                "timeout": 10000,  # 10 seconds timeout
+                "run": generate_with_timeout,
+            }
+        )
+        print(f"✅ Story generated: {story[:100]}...")
+    except Exception as e:
+        print(f"❌ Story generation failed or timed out: {e}")
+
+    # Example 3: Step with retry logic
+    print("\n🔄 Example 3: Step with Retry Logic")
+    print("-" * 30)
+
+    async def flaky_operation():
+        """Simulate a potentially flaky operation."""
+        import random
+
+        # Simulate 70% success rate
+        if random.random() < 0.7:
+            response = langbase.agent.run(
+                input="Analyze the impact of renewable energy.",
+                model="openai:gpt-4o-mini",
+                api_key=os.environ.get("LLM_API_KEY"),
+            )
+            return response["output"]
+        raise Exception("Temporary service unavailable")
+
+    try:
+        analysis = await workflow.step(
+            {
+                "id": "generate_analysis",
+                "retries": {
+                    "limit": 3,
+                    "delay": 1000,  # 1 second delay
+                    "backoff": "exponential",
+                },
+                "run": flaky_operation,
+            }
+        )
+        print(f"✅ Analysis generated: {analysis[:100]}...")
+    except Exception as e:
+        print(f"❌ Analysis generation failed after retries: {e}")
+
+    # Example 4: Multi-step workflow with dependencies
+    print("\n🔗 Example 4: Multi-step Workflow")
+    print("-" * 30)
+
+    # Step 1: Generate research topics
+    async def generate_topics():
+        """Generate research topics."""
+        response = langbase.agent.run(
+            input="Generate 3 AI research topics.",
+            model="openai:gpt-4o-mini",
+            api_key=os.environ.get("LLM_API_KEY"),
+        )
+        return response["output"]
+
+    # Step 2: Expand on each topic (using context from previous step)
+    async def expand_topics():
+        """Expand on the generated topics."""
+        # Access previous step's output from workflow context
+        topics = workflow.context["outputs"].get("research_topics", "")
+
+        response = langbase.agent.run(
+            input=f"Expand on these research topics: {topics}",
+            model="openai:gpt-4o-mini",
+            api_key=os.environ.get("LLM_API_KEY"),
+        )
+        return response["output"]
+
+    # Step 3: Generate recommendations
+    async def generate_recommendations():
+        """Generate recommendations based on previous steps."""
+        topics = workflow.context["outputs"].get("research_topics", "")
+        expansion = workflow.context["outputs"].get("topic_expansion", "")
+
+        response = langbase.agent.run(
+            input=f"Based on these topics: {topics}\n\nAnd expansion: {expansion}\n\nGenerate research recommendations.",
+            model="openai:gpt-4o-mini",
+            api_key=os.environ.get("LLM_API_KEY"),
+        )
+        return response["completion"]
+
+    try:
+        # Execute the multi-step workflow
+        topics = await workflow.step(
+            {
+                "id": "research_topics",
+                "timeout": 15000,  # 15 seconds
+                "retries": {"limit": 2, "delay": 2000, "backoff": "linear"},
+                "run": generate_topics,
+            }
+        )
+        print(f"✅ Topics: {topics[:100]}...")
+
+        expansion = await workflow.step(
+            {
+                "id": "topic_expansion",
+                "timeout": 20000,  # 20 seconds
+                "run": expand_topics,
+            }
+        )
+        print(f"✅ Expansion: {expansion[:100]}...")
+
+        recommendations = await workflow.step(
+            {
+                "id": "final_recommendations",
+                "timeout": 15000,
+                "run": generate_recommendations,
+            }
+        )
+        print(f"✅ Recommendations: {recommendations[:100]}...")
+
+    except Exception as e:
+        print(f"❌ Multi-step workflow failed: {e}")
+
+    # Example 5: Parallel steps (simulated with multiple workflows)
+    print("\n⚡ Example 5: Parallel Step Execution")
+    print("-" * 30)
+
+    async def generate_technical_content():
+        """Generate technical content."""
+        response = langbase.agent.run(
+            input="Explain quantum computing basics.",
+            model="openai:gpt-4o-mini",
+            api_key=os.environ.get("LLM_API_KEY"),
+        )
+        return response["output"]
+
+    async def generate_marketing_content():
+        """Generate marketing content."""
+        response = langbase.agent.run(
+            input="Write marketing copy for a tech product.",
+            model="openai:gpt-4o-mini",
+            api_key=os.environ.get("LLM_API_KEY"),
+        )
+        return response["output"]
+
+    # Create separate workflows for parallel execution
+    technical_workflow = Workflow(debug=True)
+    marketing_workflow = Workflow(debug=True)
+
+    try:
+        # Execute steps in parallel
+        results = await asyncio.gather(
+            technical_workflow.step(
+                {
+                    "id": "technical_content",
+                    "timeout": 15000,
+                    "run": generate_technical_content,
+                }
+            ),
+            marketing_workflow.step(
+                {
+                    "id": "marketing_content",
+                    "timeout": 15000,
+                    "run": generate_marketing_content,
+                }
+            ),
+            return_exceptions=True,
+        )
+
+        technical_result, marketing_result = results
+
+        if isinstance(technical_result, Exception):
+            print(f"❌ Technical content failed: {technical_result}")
+        else:
+            print(f"✅ Technical content: {technical_result[:100]}...")
+
+        if isinstance(marketing_result, Exception):
+            print(f"❌ Marketing content failed: {marketing_result}")
+        else:
+            print(f"✅ Marketing content: {marketing_result[:100]}...")
+
+    except Exception as e:
+        print(f"❌ Parallel execution failed: {e}")
+
+    # Display final workflow context
+    print("\n📊 Final Workflow Context")
+    print("-" * 30)
+    print(f"Total steps executed: {len(workflow.context['outputs'])}")
+    for step_id, result in workflow.context["outputs"].items():
+        result_preview = (
+            str(result)[:50] + "..." if len(str(result)) > 50 else str(result)
+        )
+        print(f"  {step_id}: {result_preview}")
+
+    print("\n🎉 Workflow examples completed!")
+
+
+# Example of a more complex workflow class
+class AIContentWorkflow:
+    """
+    A specialized workflow class for AI content generation tasks.
+    """
+
+    def __init__(self, langbase_client: Langbase, debug: bool = False):
+        """
+        Initialize the AI content workflow.
+
+        Args:
+            langbase_client: Langbase client instance
+            debug: Whether to enable debug mode
+        """
+        self.lb = langbase_client
+        self.workflow = Workflow(debug=debug)
+
+    async def generate_blog_post(
+        self, topic: str, target_length: str = "medium", tone: str = "professional"
+    ) -> dict:
+        """
+        Generate a complete blog post with multiple steps.
+
+        Args:
+            topic: The blog post topic
+            target_length: Target length (short, medium, long)
+            tone: Writing tone
+
+        Returns:
+            Dictionary containing all generated content
+        """
+
+        # Step 1: Generate outline
+        async def create_outline():
+            response = self.lb.agent.run(
+                input=f"Create a {target_length} blog post outline about: {topic}",
+                model="openai:gpt-4o-mini",
+                api_key=os.environ.get("LLM_API_KEY"),
+            )
+            return response["output"]
+
+        # Step 2: Generate introduction
+        async def write_introduction():
+            outline = self.workflow.context["outputs"]["outline"]
+            response = self.lb.agent.run(
+                input=f"Write an engaging introduction for this outline: {outline}. Tone: {tone}",
+                model="openai:gpt-4o-mini",
+                api_key=os.environ.get("LLM_API_KEY"),
+            )
+            return response["output"]
+
+        # Step 3: Generate main content
+        async def write_main_content():
+            outline = self.workflow.context["outputs"]["outline"]
+            intro = self.workflow.context["outputs"]["introduction"]
+            response = self.lb.agent.run(
+                input=f"Write the main content based on outline: {outline}\nIntroduction: {intro}\nTone: {tone}",
+                model="openai:gpt-4o-mini",
+                api_key=os.environ.get("LLM_API_KEY"),
+            )
+            return response["output"]
+
+        # Step 4: Generate conclusion
+        async def write_conclusion():
+            outline = self.workflow.context["outputs"]["outline"]
+            content = self.workflow.context["outputs"]["main_content"]
+            response = self.lb.agent.run(
+                input=f"Write a conclusion for this content: {content[:500]}...",
+                model="openai:gpt-4o-mini",
+                api_key=os.environ.get("LLM_API_KEY"),
+            )
+            return response["output"]
+
+        # Execute the workflow
+        try:
+            outline = await self.workflow.step(
+                {
+                    "id": "outline",
+                    "timeout": 10000,
+                    "retries": {"limit": 2, "delay": 1000, "backoff": "fixed"},
+                    "run": create_outline,
+                }
+            )
+
+            introduction = await self.workflow.step(
+                {"id": "introduction", "timeout": 15000, "run": write_introduction}
+            )
+
+            main_content = await self.workflow.step(
+                {
+                    "id": "main_content",
+                    "timeout": 30000,
+                    "retries": {"limit": 1, "delay": 2000, "backoff": "fixed"},
+                    "run": write_main_content,
+                }
+            )
+
+            conclusion = await self.workflow.step(
+                {"id": "conclusion", "timeout": 10000, "run": write_conclusion}
+            )
+
+            return {
+                "topic": topic,
+                "outline": outline,
+                "introduction": introduction,
+                "main_content": main_content,
+                "conclusion": conclusion,
+                "metadata": {
+                    "tone": tone,
+                    "target_length": target_length,
+                    "steps_executed": len(self.workflow.context["outputs"]),
+                },
+            }
+
+        except Exception as e:
+            print(f"❌ Blog post generation failed: {e}")
+            return {
+                "error": str(e),
+                "partial_results": self.workflow.context["outputs"],
+            }
+
+
+async def advanced_workflow_example():
+    """Demonstrate the advanced workflow class."""
+    print("\n🚀 Advanced Workflow Example")
+    print("=" * 50)
+
+    lb = Langbase(api_key=os.environ.get("LANGBASE_API_KEY"))
+    blog_workflow = AIContentWorkflow(lb, debug=True)
+
+    result = await blog_workflow.generate_blog_post(
+        topic="The Future of Artificial Intelligence",
+        target_length="medium",
+        tone="engaging",
+    )
+
+    if "error" in result:
+        print(f"❌ Workflow failed: {result['error']}")
+        if result.get("partial_results"):
+            print("Partial results:", result["partial_results"])
+    else:
+        print("✅ Blog post generated successfully!")
+        print(f"📝 Topic: {result['topic']}")
+        print(f"📋 Outline: {result['outline'][:100]}...")
+        print(f"🎯 Introduction: {result['introduction'][:100]}...")
+        print(f"📄 Content: {result['main_content'][:100]}...")
+        print(f"🎯 Conclusion: {result['conclusion'][:100]}...")
+
+
+if __name__ == "__main__":
+    # Set up environment variables if not already set
+    if not os.getenv("LANGBASE_API_KEY"):
+        print("⚠️  Please set LANGBASE_API_KEY environment variable")
+        print("   You can get your API key from https://langbase.com/settings")
+        exit(1)
+
+    # asyncio.run(main())
+    # Run the advanced example
+    asyncio.run(advanced_workflow_example())
diff --git a/examples/chunk/chunk.py b/examples/chunk/chunk.py
deleted file mode 100644
index e73acdc..0000000
--- a/examples/chunk/chunk.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""
-Example demonstrating how to chunk a document in Langbase.
-"""
-import os
-from langbase import Langbase
-
-# Get API key from environment variable or provide directly
-api_key = os.environ.get("LANGBASE_API_KEY", "your-api-key")
-
-# Initialize the client
-lb = Langbase(api_key=api_key)
-
-# Path to document to chunk
-document_path = "path/to/your/document.txt"  # Change this to your document path
-document_name = "article.txt"
-
-# Chunk the document
-try:
-    # Ensure file exists
-    if not os.path.exists(document_path):
-        raise FileNotFoundError(f"Document not found at {document_path}")
-
-    # Determine content type based on file extension
-    file_extension = os.path.splitext(document_path)[1].lower()
-    content_type_map = {
-        ".pdf": "application/pdf",
-        ".txt": "text/plain",
-        ".md": "text/markdown",
-        ".csv": "text/csv",
-        ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-        ".xls": "application/vnd.ms-excel"
-    }
-
-    content_type = content_type_map.get(file_extension)
-    if not content_type:
-        raise ValueError(f"Unsupported file type: {file_extension}")
-
-    # Read the file content
-    with open(document_path, "rb") as file:
-        document_content = file.read()
-
-    # Chunk the document
-    chunks = lb.chunk(
-        document=document_content,
-        document_name=document_name,
-        content_type=content_type,
-        chunk_max_length="1000",  # Optional: maximum chunk length
-        chunk_overlap="100",      # Optional: overlap between chunks
-        separator="\n\n"          # Optional: custom separator
-    )
-
-    print(f"Successfully chunked document into {len(chunks)} chunks")
-    print()
-
-    # Display chunks
-    for i, chunk in enumerate(chunks, 1):
-        print(f"Chunk {i} ({len(chunk)} characters):")
-        # Print a preview if the chunk is long
-        preview = (chunk[:200] + "...") if len(chunk) > 200 else chunk
-        print(preview)
-        print("-" * 80)
-
-except Exception as e:
-    print(f"Error chunking document: {e}")
diff --git a/examples/chunker/chunker.py b/examples/chunker/chunker.py
new file mode 100644
index 0000000..8c66b99
--- /dev/null
+++ b/examples/chunker/chunker.py
@@ -0,0 +1,45 @@
+"""
+Example demonstrating how to chunk text content using Langbase.
+"""
+
+import json
+import os
+import pathlib
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+load_dotenv()
+
+# Get API key from environment variable
+langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+# Initialize the client
+lb = Langbase(api_key=langbase_api_key)
+
+
+def main():
+    """
+    Chunks text content using Langbase.
+    """
+    try:
+        # Get the path to the document
+        document_path = pathlib.Path(__file__).parent / "composable-ai.md"
+
+        # Read the file
+        with open(document_path, "r", encoding="utf-8") as file:
+            document_content = file.read()
+        # Chunk the content
+        chunks = lb.chunker(
+            content=document_content, chunk_max_length=1024, chunk_overlap=256
+        )
+
+        print(json.dumps(chunks, indent=2))
+
+    except Exception as e:
+        print(f"Error chunking content: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/parse/composable-ai.md b/examples/chunker/composable-ai.md
similarity index 100%
rename from examples/parse/composable-ai.md
rename to examples/chunker/composable-ai.md
diff --git a/examples/embed/embed.py b/examples/embed/embed.py
new file mode 100644
index 0000000..67401c4
--- /dev/null
+++ b/examples/embed/embed.py
@@ -0,0 +1,31 @@
+# Experimental upcoming beta AI primitve.
+# Please refer to the documentation for more information: https://langbase.com/docs for more information.
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+load_dotenv()
+
+# Cconfigure the Langbase client with your API key
+langbase = Langbase(api_key=os.environ.get("LANGBASE_API_KEY"))
+
+
+def main():
+    """
+    Generates embeddings for the given text chunks.
+    """
+    response = langbase.embed(
+        chunks=[
+            "Langbase is the most powerful serverless platform for building AI agents with memory. Build, scale, and evaluate AI agents with semantic memory (RAG) and world-class developer experience.",
+            "We process billions of AI messages/tokens daily. Built for every developer, not just AI/ML experts.",
+        ],
+        embedding_model="openai:text-embedding-3-large",
+    )
+    print(json.dumps(response, indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/memories/memories.create.py b/examples/memories/memories.create.py
deleted file mode 100644
index fcb709c..0000000
--- a/examples/memories/memories.create.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""
-Example demonstrating how to create a memory in Langbase.
-"""
-import os
-from langbase import Langbase
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Define memory configuration
-memory_config = {
-    "name": "product-knowledge",
-    "description": "Memory store for product documentation and information",
-    "embedding_model": "openai:text-embedding-3-large"  # Optional: Specify embedding model
-}
-
-# Create the memory
-try:
-    new_memory = lb.memories.create(**memory_config)
-
-    print(f"Successfully created memory '{new_memory['name']}'")
-    print(f"Description: {new_memory.get('description', 'N/A')}")
-    print(f"Embedding model: {new_memory.get('embedding_model', 'default')}")
-    print(f"URL: {new_memory.get('url', 'N/A')}")
-
-except Exception as e:
-    print(f"Error creating memory: {e}")
diff --git a/examples/memories/memories.docs.delete.py b/examples/memories/memories.docs.delete.py
deleted file mode 100644
index e4e4c76..0000000
--- a/examples/memories/memories.docs.delete.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""
-Example demonstrating how to delete a document from memory in Langbase.
-"""
-import os
-from langbase import Langbase
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Memory and document to delete
-memory_name = "product-knowledge"
-document_name = "product-manual.pdf"
-
-# Delete the document
-try:
-    response = lb.memories.documents.delete(
-        memory_name=memory_name,
-        document_name=document_name
-    )
-
-    if response.get('success', False):
-        print(f"Successfully deleted document '{document_name}' from memory '{memory_name}'")
-    else:
-        print(f"Failed to delete document '{document_name}' from memory '{memory_name}'")
-
-except Exception as e:
-    print(f"Error deleting document: {e}")
diff --git a/examples/memories/memories.docs.list.py b/examples/memories/memories.docs.list.py
deleted file mode 100644
index 5fea223..0000000
--- a/examples/memories/memories.docs.list.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Example demonstrating how to list documents in a memory in Langbase.
-"""
-import os
-from langbase import Langbase
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Memory name to list documents from
-memory_name = "product-knowledge"
-
-# List documents in the memory
-try:
-    documents = lb.memories.documents.list(memory_name=memory_name)
-
-    print(f"Found {len(documents)} documents in memory '{memory_name}':")
-    for doc in documents:
-        print(f"- {doc['name']}")
-        print(f"  Status: {doc.get('status', 'unknown')}")
-        print(f"  Type: {doc.get('metadata', {}).get('type', 'unknown')}")
-        print(f"  Size: {doc.get('metadata', {}).get('size', 'unknown')} bytes")
-        print(f"  Enabled: {doc.get('enabled', True)}")
-        if doc.get('status_message'):
-            print(f"  Message: {doc['status_message']}")
-        print()
-
-except Exception as e:
-    print(f"Error listing documents: {e}")
diff --git a/examples/memories/memories.docs.retry-embed.py b/examples/memories/memories.docs.retry-embed.py
deleted file mode 100644
index b859e90..0000000
--- a/examples/memories/memories.docs.retry-embed.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""
-Example demonstrating how to retry embedding generation for a document in Langbase.
-
-This is useful when document embedding generation has failed or needs to be refreshed.
-"""
-import os
-from langbase import Langbase
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Memory and document to retry embeddings for
-memory_name = "product-knowledge"
-document_name = "product-manual.pdf"
-
-# Retry embedding generation
-try:
-    response = lb.memories.documents.embeddings.retry(
-        memory_name=memory_name,
-        document_name=document_name
-    )
-
-    if response.get('success', False):
-        print(f"Successfully triggered embedding retry for document '{document_name}' in memory '{memory_name}'")
-        print("The embedding generation will run asynchronously in the background.")
-        print("Check the document status later to confirm completion.")
-    else:
-        print(f"Failed to trigger embedding retry for document '{document_name}' in memory '{memory_name}'")
-        if 'message' in response:
-            print(f"Message: {response['message']}")
-
-except Exception as e:
-    print(f"Error retrying embeddings: {e}")
-
-# Optionally, check document status after triggering the retry
-try:
-    print("\nChecking document status...")
-    documents = lb.memories.documents.list(memory_name=memory_name)
-
-    for doc in documents:
-        if doc['name'] == document_name:
-            print(f"Document: {doc['name']}")
-            print(f"Status: {doc.get('status', 'unknown')}")
-            if doc.get('status_message'):
-                print(f"Status message: {doc['status_message']}")
-            print(f"Enabled: {doc.get('enabled', True)}")
-            break
-    else:
-        print(f"Document '{document_name}' not found in memory '{memory_name}'")
-
-except Exception as e:
-    print(f"Error checking document status: {e}")
diff --git a/examples/memories/memories.docs.upload.py b/examples/memories/memories.docs.upload.py
deleted file mode 100644
index 7c5774d..0000000
--- a/examples/memories/memories.docs.upload.py
+++ /dev/null
@@ -1,62 +0,0 @@
-"""
-Example demonstrating how to upload a document to a memory in Langbase.
-"""
-import os
-from langbase import Langbase
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Memory name where to upload the document
-memory_name = "product-knowledge"
-
-# Path to the document to upload
-document_path = "path/to/your/document.pdf"  # Change this to your document path
-document_name = "product-manual.pdf"  # Name to assign to the document
-
-# Metadata for the document
-document_metadata = {
-    "product": "Widget Pro 2000",
-    "version": "v2.1",
-    "department": "Engineering",
-    "language": "English"
-}
-
-# Upload the document
-try:
-    # Ensure file exists
-    if not os.path.exists(document_path):
-        raise FileNotFoundError(f"Document not found at {document_path}")
-
-    # Determine content type based on file extension
-    file_extension = os.path.splitext(document_path)[1].lower()
-    content_type_map = {
-        ".pdf": "application/pdf",
-        ".txt": "text/plain",
-        ".md": "text/markdown",
-        ".csv": "text/csv",
-        ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-        ".xls": "application/vnd.ms-excel"
-    }
-
-    content_type = content_type_map.get(file_extension)
-    if not content_type:
-        raise ValueError(f"Unsupported file type: {file_extension}")
-
-    # Upload the document
-    upload_response = lb.memories.documents.upload(
-        memory_name=memory_name,
-        document_name=document_name,
-        document=document_path,
-        content_type=content_type,
-        meta=document_metadata
-    )
-
-    print(f"Successfully uploaded document '{document_name}' to memory '{memory_name}'")
-    print(f"Status code: {upload_response.status_code}")
-
-except Exception as e:
-    print(f"Error uploading document: {e}")
diff --git a/examples/memories/memories.list.py b/examples/memories/memories.list.py
deleted file mode 100644
index 4fbe7f6..0000000
--- a/examples/memories/memories.list.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""
-Example demonstrating how to list all memories in your Langbase account.
-"""
-import os
-from langbase import Langbase
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# List all memories
-try:
-    memories = lb.memories.list()
-
-    print(f"Found {len(memories)} memories:")
-    for memory in memories:
-        print(f"- {memory['name']}: {memory.get('description', 'No description')}")
-        print(f"  Embedding model: {memory.get('embedding_model', 'default')}")
-        print(f"  Owner: {memory.get('owner_login', 'unknown')}")
-        print()
-
-except Exception as e:
-    print(f"Error listing memories: {e}")
diff --git a/examples/memories/memories.retrieve.py b/examples/memories/memories.retrieve.py
deleted file mode 100644
index 03e8cea..0000000
--- a/examples/memories/memories.retrieve.py
+++ /dev/null
@@ -1,41 +0,0 @@
-"""
-Example demonstrating how to retrieve information from memory in Langbase.
-"""
-import os
-from langbase import Langbase
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Memory name to retrieve from
-memory_name = "product-knowledge"
-
-# Query to search for
-query = "How do I reset my Widget Pro 2000?"
-
-# Retrieve relevant information
-try:
-    results = lb.memories.retrieve(
-        query=query,
-        memory=[{"name": memory_name}],  # Can include multiple memories
-        top_k=3  # Return top 3 most relevant chunks
-    )
-
-    print(f"Found {len(results)} results for query: '{query}'")
-    print()
-
-    for i, result in enumerate(results, 1):
-        print(f"Result {i}:")
-        print(f"Similarity score: {result['similarity']:.4f}")
-        print(f"Metadata: {result.get('meta', {})}")
-        print("Content:")
-        print("-" * 80)
-        print(result['text'])
-        print("-" * 80)
-        print()
-
-except Exception as e:
-    print(f"Error retrieving from memory: {e}")
diff --git a/examples/memory/memory.create.py b/examples/memory/memory.create.py
new file mode 100644
index 0000000..fafde40
--- /dev/null
+++ b/examples/memory/memory.create.py
@@ -0,0 +1,37 @@
+"""
+Example demonstrating how to create a memory in Langbase.
+"""
+
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Create the memory
+    try:
+        response = lb.memories.create(
+            name="product-knowledge",
+            description="Memory store for product documentation and information",
+            embedding_model="openai:text-embedding-3-large",
+        )
+
+        print(json.dumps(response, indent=2))
+
+    except Exception as e:
+        print(f"Error creating memory: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/memory/memory.docs.delete.py b/examples/memory/memory.docs.delete.py
new file mode 100644
index 0000000..ecb645b
--- /dev/null
+++ b/examples/memory/memory.docs.delete.py
@@ -0,0 +1,42 @@
+"""
+Example demonstrating how to delete documents from a memory in Langbase.
+"""
+
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Memory name and document ID to delete
+    memory_name = "product-knowledge"  # Replace with your memory name
+    document_name = "intro.txt"  # Replace with the document name you want to delete
+
+    # Delete the document
+    try:
+        response = lb.memories.documents.delete(
+            memory_name=memory_name, document_name=document_name
+        )
+
+        print(
+            f"Document '{document_name}' deleted successfully from memory '{memory_name}'"
+        )
+        print(json.dumps(response, indent=2))
+
+    except Exception as e:
+        print(f"Error deleting document: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/memory/memory.docs.list.py b/examples/memory/memory.docs.list.py
new file mode 100644
index 0000000..a73fa9f
--- /dev/null
+++ b/examples/memory/memory.docs.list.py
@@ -0,0 +1,37 @@
+"""
+Example demonstrating how to list documents in a memory in Langbase.
+"""
+
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Memory name to list documents from
+    memory_name = "product-knowledge"  # Replace with your memory name
+
+    # List documents in the memory
+    try:
+        response = lb.memories.documents.list(memory_name=memory_name)
+
+        print(f"Documents in memory '{memory_name}':")
+        print(json.dumps(response, indent=2))
+
+    except Exception as e:
+        print(f"Error listing documents: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/memory/memory.docs.retry-embed.py b/examples/memory/memory.docs.retry-embed.py
new file mode 100644
index 0000000..8ba6370
--- /dev/null
+++ b/examples/memory/memory.docs.retry-embed.py
@@ -0,0 +1,40 @@
+"""
+Example demonstrating how to retry embedding for documents in a memory in Langbase.
+"""
+
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Memory name to retry embedding for
+    memory_name = "product-knowledge"  # Replace with your memory name
+    document_name = "name.txt"  # Replace with document name
+
+    # Retry embedding for failed documents
+    try:
+        response = lb.memories.documents.embeddings.retry(
+            memory_name=memory_name, document_name=document_name
+        )
+
+        print(f"Retry embedding initiated for memory '{memory_name}'")
+        print(json.dumps(response, indent=2))
+
+    except Exception as e:
+        print(f"Error retrying embedding: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/memory/memory.docs.upload.py b/examples/memory/memory.docs.upload.py
new file mode 100644
index 0000000..7d2044e
--- /dev/null
+++ b/examples/memory/memory.docs.upload.py
@@ -0,0 +1,42 @@
+"""
+Example demonstrating how to upload documents to a memory in Langbase.
+"""
+
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Memory name to upload documents to
+    memory_name = "product-knowledge"  # Replace with your memory name
+
+    # Upload documents to the memory
+    try:
+        content = "Langbase is a powerful platform for building AI applications with composable AI."
+        response = lb.memories.documents.upload(
+            memory_name=memory_name,
+            document_name="intro.txt",
+            document=content.encode("utf-8"),  # Convert string to bytes
+            content_type="text/plain",
+            meta={"source": "documentation", "section": "introduction"},
+        )
+        print("Document uploaded successfully!")
+        print(f"Status: {response.status_code}")
+
+    except Exception as e:
+        print(f"Error uploading documents: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/memory/memory.list.py b/examples/memory/memory.list.py
new file mode 100644
index 0000000..a5fc92c
--- /dev/null
+++ b/examples/memory/memory.list.py
@@ -0,0 +1,33 @@
+"""
+Example demonstrating how to list memories in Langbase.
+"""
+
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # List all memories
+    try:
+        response = lb.memories.list()
+
+        print(json.dumps(response, indent=2))
+
+    except Exception as e:
+        print(f"Error listing memories: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/memory/memory.retrieve.py b/examples/memory/memory.retrieve.py
new file mode 100644
index 0000000..6970357
--- /dev/null
+++ b/examples/memory/memory.retrieve.py
@@ -0,0 +1,44 @@
+"""
+Example demonstrating how to retrieve memories in Langbase.
+
+This example shows how to retrieve memories using a query. The memory parameter
+expects a list of dictionaries with 'name' keys specifying which memories to search.
+"""
+
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Retrieve memories using a query
+    memory_name = "product-knowledge"  # Replace with your memory name
+    query = "What is Langbase?"
+
+    try:
+        response = lb.memories.retrieve(
+            query=query,
+            memory=[{"name": memory_name}],
+            top_k=5,  # Number of relevant memories to retrieve
+        )
+
+        print(f"Retrieved memories for query: '{query}'")
+        print(json.dumps(response, indent=2))
+
+    except Exception as e:
+        print(f"Error retrieving memories: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/parser/composable-ai.md b/examples/parser/composable-ai.md
new file mode 100644
index 0000000..7ac4831
--- /dev/null
+++ b/examples/parser/composable-ai.md
@@ -0,0 +1,99 @@
+# Composable AI
+
+## The Developer Friendly Future of AI Infrastructure
+
+In software engineering, composition is a powerful concept. It allows for building complex systems from simple, interchangeable parts. Think Legos, Docker containers, React components. Langbase extends this concept to AI infrastructure with our **Composable AI** stack using [Pipes][pipe] and [Memory][memory].
+
+---
+
+## Why Composable AI?
+
+**Composable and personalized AI**: With Langbase, you can compose multiple models together into pipelines. It's easier to think about, easier to develop for, and each pipe lets you choose which model to use for each task. You can see cost of every step. And allow your customers to hyper-personalize.
+
+**Effortlessly zero-config AI infra**: Maybe you want to use a smaller, domain-specific model for one task, and a larger general-purpose model for another task. Langbase makes it easy to use the right primitives and tools for each part of the job and provides developers with a zero-config composable AI infrastructure.
+
+That's a nice way of saying, *you get a unicorn-scale API in minutes, not months*.
+
+> **The most common problem** I hear about in Gen AI space is that my AI agents are too complex and I can't scale them, too much AI talking to AI. I don't have control, I don't understand the cost, and the impact of this change vs that. Time from new model to prod is too long. Feels static, my customers can't personalize it. ⌘ Langbase fixes all this. — [AA](https://www.linkedin.com/in/MrAhmadAwais/)
+
+---
+
+## Interactive Example: Composable AI Email Agent
+
+But how does Composable AI work?
+
+Here's an interactive example of a composable AI Email Agent: Classifies, summarizes, responds. Click to send a spam or valid email and check how composable it is: Swap any pipes, any LLM, hyper-personalize (you or your users), observe costs. Everything is composable.
+
+
+
+## Example: Composable AI Email Agent
+
+
+I have built an AI email agent that can read my emails, understand the sentiment, summarize, and respond to them. Let's break it down to how it works, hint several pipes working together to make smart personalized decisions.
+
+1. I created a pipe: `email-sentiment` — this one reads my emails to understand the sentiment
+2. `email-summarizer` pipe — it summarizes my emails so I can quickly understand them
+3. `email-decision-maker` pipe — should I respond? is it urgent? is it a newsletter?
+4. If `email-decision-maker` pipe says *yes*, then I need to respond. This invokes the final pipe
+5. `email-writer` pipe — writes a draft response to my emails with one of the eight formats I have
+
+
+## Why Composable AI is powerful?
+
+Ah, the power of composition. I can swap out any of these pipes with a new one.
+
+- **Flexibility**: Swap components without rewriting everything
+- **Reusability**: Build complex systems from simple, tested parts
+- **Scalability**: Optimize at the component level for better performance
+- **Observability**: Monitor and debug each step of your AI pipeline
+
+
+### Control flow
+
+- Maybe I want to use a different sentiment analysis model
+- Or maybe I want to use a different summarizer when I'm on vacation
+- I can chose a different LLM (small or large) based on the task
+- BTW I definitely use a different `decision-maker` pipe on a busy day.
+
+### Extensibility
+
+- **Add more when needed**: I can also add more pipes to this pipeline. Maybe I want to add a pipe that checks my calendar or the weather before I respond to an email. You get the idea. Always bet on composition.
+- **Eight Formats to write emails**: And I have several formats. Because Pipes are composable, I have eight different versions of `email-writer` pipe. I have a pipe `email-pick-writer` that picks the correct pipe to draft a response with. Why? I talk to my friends differently than my investors, reports, managers, vendors — you name it.
+
+
+### Long-term memory and context awareness
+
+- By the way, I have all my emails in an `emails-store` memory, which any of these pipes can refer to if needed. That's managed [semantic RAG][memory] over all the emails I have ever received.
+- And yes, my `emails-smart-spam` memory knows all the pesky smart spam emails that I don't want to see in my inbox.
+
+### Cost & Observability
+
+- Because each intent and action is mapped out Pipe — which is an excellent primitive for using LLMs, I can see everything related to cost, usage, and effectiveness of each pipe. I can see how many emails were processed, how many were responded to, how many were marked as spam, etc.
+- I can switch LLMs for any of these actions, [fork a pipe][fork], and see how it performs. I can version my pipes and see how the new version performs against the old one.
+- And we're just getting started …
+
+### Why Developers Love It
+
+- **Modular**: Build, test, and deploy pipes x memorysets independently
+- **Extensible**: API-first no dependency on a single language
+- **Version Control Friendly**: Track changes at the pipe level
+- **Cost-Effective**: Optimize resource usage for each AI task
+- **Stakeholder Friendly**: Collaborate with your team on each pipe and memory. All your R&D team, engineering, product, GTM (marketing, sales), and even stakeholders can collaborate on the same pipe. It's like a Google Doc x GitHub for AI. That's what makes it so powerful.
+
+---
+
+Each pipe and memory are like a docker container. You can have any number of pipes and memorysets.
+
+Can't wait to share more exciting examples of composable AI. We're cookin!!
+
+We'll share more on this soon. Follow us on [Twitter][x] and [LinkedIn][li] for updates.
+
+[pipe]: /pipe/
+[memory]: /memory
+[signup]: https://langbase.fyi/awesome
+[x]: https://twitter.com/LangbaseInc
+[li]: https://www.linkedin.com/company/langbase/
+[email]: mailto:support@langbase.com?subject=Pipe-Quickstart&body=Ref:%20https://langbase.com/docs/pipe/quickstart
+[fork]: https://langbase.com/docs/features/fork
+
+---
diff --git a/examples/parse/parse.py b/examples/parser/parser.py
similarity index 75%
rename from examples/parse/parse.py
rename to examples/parser/parser.py
index 77ff078..1b35024 100644
--- a/examples/parse/parse.py
+++ b/examples/parser/parser.py
@@ -1,40 +1,49 @@
 """
 Example demonstrating how to parse a document using Langbase.
 """
+
+import json
 import os
 import pathlib
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
 
+load_dotenv()
+
 # Get API key from environment variable
 langbase_api_key = os.getenv("LANGBASE_API_KEY")
 
 # Initialize the client
 lb = Langbase(api_key=langbase_api_key)
 
+
 def main():
     """
     Parses a document using Langbase.
     """
     try:
         # Get the path to the document
-        document_path = pathlib.Path(__file__).parent / "examples" / "parse" / "composable-ai.md"
+        document_path = pathlib.Path(__file__).parent / "composable-ai.md"
 
         # Read the file
         with open(document_path, "rb") as file:
             document_content = file.read()
 
         # Parse the document
-        results = lb.parse(
+        results = lb.parser(
             document=document_content,
             document_name="composable-ai.md",
-            content_type="application/pdf"  # Note: This is set to PDF despite the .md extension
+            content_type="text/markdown",
         )
 
         # Print the results
-        print(results)
+        print(json.dumps(results, indent=2))
 
     except Exception as e:
         print(f"Error parsing document: {e}")
 
+
 if __name__ == "__main__":
     main()
diff --git a/examples/pipes/pipes.create.py b/examples/pipes/pipes.create.py
index 50a8d88..a6af536 100644
--- a/examples/pipes/pipes.create.py
+++ b/examples/pipes/pipes.create.py
@@ -1,38 +1,44 @@
 """
-Example demonstrating how to create a new pipe in Langbase.
+Example demonstrating how to create a pipe in Langbase.
 """
+
+import json
 import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
 
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Define pipe configuration
-pipe_config = {
-    "name": "my-assistant-pipe",  # Unique name for your pipe
-    "description": "An assistant that helps with general inquiries",
-    "model": "openai:gpt-4o-2024-11-20",  # Adjust to your preferred model
-    "temperature": 0.7,
-    "max_tokens": 1000,
-    "messages": [
-        {
-            "role": "system",
-            "content": "You are a helpful assistant that provides concise, accurate responses."
-        }
-    ]
-}
-
-# Create the pipe
-try:
-    new_pipe = lb.pipes.create(**pipe_config)
-
-    print(f"Successfully created pipe '{new_pipe['name']}'")
-    print(f"Pipe API Key: {new_pipe.get('api_key', 'N/A')}")
-    print(f"Status: {new_pipe.get('status', 'unknown')}")
-    print(f"URL: {new_pipe.get('url', 'N/A')}")
-
-except Exception as e:
-    print(f"Error creating pipe: {e}")
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    langbase = Langbase(api_key=langbase_api_key)
+
+    # Create the pipe
+    try:
+        response = langbase.pipes.create(
+            name="summary-agent",
+            description="A pipe for text summarization",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant that summarizes text clearly and concisely.",
+                }
+            ],
+            upsert=True,
+        )
+
+        print("Pipe created successfully!")
+        print(json.dumps(response, indent=2))
+
+    except Exception as e:
+        print(f"Error creating pipe: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pipes/pipes.list.py b/examples/pipes/pipes.list.py
index 288ec47..e7f473b 100644
--- a/examples/pipes/pipes.list.py
+++ b/examples/pipes/pipes.list.py
@@ -1,18 +1,29 @@
-# test_script.py
-from langbase import Langbase
+import json
 import os
 
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+    # Test a basic operation (mock or use a real API key)
+    try:
+        # For testing purposes, you can use a mock or a real simple call
+        # This would depend on your API, for example:
+        response = lb.pipes.list()
+        print(json.dumps(response, indent=2))
+
+    except Exception as e:
+        print(f"Error occurred: {e}")
 
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
 
-# Test a basic operation (mock or use a real API key)
-try:
-    # For testing purposes, you can use a mock or a real simple call
-    # This would depend on your API, for example:
-    response = lb.pipes.list()
-    print("Success! Response:", response)
-except Exception as e:
-    print(f"Error occurred: {e}")
+if __name__ == "__main__":
+    main()
diff --git a/examples/pipes/pipes.run.py b/examples/pipes/pipes.run.py
index 8404e89..84e1a29 100644
--- a/examples/pipes/pipes.run.py
+++ b/examples/pipes/pipes.run.py
@@ -1,43 +1,41 @@
 """
 Example demonstrating how to run a pipe in non-streaming mode in Langbase.
 """
-import os
+
 import json
+import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
 from langbase.errors import APIError
 
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Name of the pipe to run
-pipe_name = "my-assistant-pipe"  # Replace with your pipe name
-
-# Define messages for the conversation
-messages = [
-    {
-        "role": "user",
-        "content": "Explain quantum computing in simple terms."
-    }
-]
-
-# Run the pipe with explicit stream=False
-try:
-    print(f"Running pipe '{pipe_name}' in non-streaming mode...")
-
-    response = lb.pipes.run(
-        name=pipe_name,
-        messages=messages,
-        stream=False
-    )
-
-    # Print the entire response as is
-    print("\nRESPONSE:")
-    print(json.dumps(response, indent=2))
-
-except APIError as e:
-    print(f"API Error: {e}")
-except Exception as e:
-    print(f"Unexpected error: {e}")
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Run the pipe with explicit stream=False
+    try:
+        response = lb.pipes.run(
+            name="summary-agent",
+            messages=[{"role": "user", "content": "Who is an AI Engineer?"}],
+            stream=False,
+        )
+
+        # Print the entire response as is
+        print(json.dumps(response, indent=2))
+
+    except APIError as e:
+        print(f"API Error: {e}")
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pipes/pipes.run.stream.py b/examples/pipes/pipes.run.stream.py
index d8f492d..eac24cf 100644
--- a/examples/pipes/pipes.run.stream.py
+++ b/examples/pipes/pipes.run.stream.py
@@ -1,52 +1,46 @@
 """
-Example demonstrating how to run a pipe with streaming in Langbase.
+Example demonstrating how to run a pipe in streaming mode using get_runner in Langbase.
 """
+
 import os
-import json
-from langbase import Langbase
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Name of the pipe to run
-pipe_name = "my-assistant-pipe"
-
-# Define messages for the conversation
-messages = [
-    {
-        "role": "user",
-        "content": "Write a short story about a robot learning to paint."
-    }
-]
-
-# Run the pipe with streaming enabled
-try:
-    stream_response = lb.pipes.run(
-        name=pipe_name,
-        messages=messages,
-        stream=True
-    )
-
-    print("Thread ID:", stream_response['thread_id'])
-
-    print("STREAMING RESPONSE:")
-
-    # Process each chunk as it arrives
-    for chunk in stream_response["stream"]:
-        if chunk:
-            try:
-                # Try to decode as JSON
-                chunk_data = json.loads(chunk.decode('utf-8'))
-                if "completion" in chunk_data:
-                    print(chunk_data["completion"], end="", flush=True)
-            except json.JSONDecodeError:
-                # If not JSON, print raw decoded chunk
-                print(chunk.decode('utf-8'), end="", flush=True)
-
-    print("\n\nStream completed")
-
-except Exception as e:
-    print(f"Error streaming from pipe: {e}")
+
+from dotenv import load_dotenv
+
+from langbase import Langbase, get_runner
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Name of the pipe to run
+    pipe_name = "summary-agent"  # Replace with your pipe name
+
+    try:
+        # Message 1: Tell something to the LLM.
+        print("Stream started \n\n")
+        response1 = lb.pipes.run(
+            name=pipe_name,
+            messages=[{"role": "user", "content": "What is an AI Engineer?"}],
+            stream=True,
+        )
+
+        runner1 = get_runner(response1)
+
+        # Use text_generator() to stream content
+        for content in runner1.text_generator():
+            print(content, end="", flush=True)
+
+        print("\n\nStream ended!")  # Add a newline after first response
+
+    except Exception as e:
+        print(f"Error: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pipes/pipes.run.typed-stream.py b/examples/pipes/pipes.run.typed-stream.py
new file mode 100644
index 0000000..d9c4fbe
--- /dev/null
+++ b/examples/pipes/pipes.run.typed-stream.py
@@ -0,0 +1,78 @@
+"""
+Example demonstrating the new typed streaming interface for pipes.
+
+This shows how to use event-based streaming with typed events for better developer experience.
+"""
+
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase, StreamEventType, get_typed_runner
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Name of the pipe to run
+    pipe_name = "summary-agent"  # Replace with your pipe name
+
+    try:
+        # Get streaming response
+        response = lb.pipes.run(
+            name=pipe_name,
+            messages=[{"role": "user", "content": "What is an AI Engineer?"}],
+            stream=True,
+        )
+
+        # Create typed stream processor
+        runner = get_typed_runner(response)
+
+        # Register event handlers
+        runner.on(
+            StreamEventType.CONNECT,
+            lambda event: print(f"✓ Connected! Thread ID: {event['threadId']}\n"),
+        )
+
+        runner.on(
+            StreamEventType.CONTENT,
+            lambda event: print(event["content"], end="", flush=True),
+        )
+
+        runner.on(
+            StreamEventType.TOOL_CALL,
+            lambda event: print(
+                f"\n🔧 Tool call: {event['toolCall']['function']['name']}"
+            ),
+        )
+
+        runner.on(
+            StreamEventType.COMPLETION,
+            lambda event: print(f"\n\n✓ Completed! Reason: {event['reason']}"),
+        )
+
+        runner.on(
+            StreamEventType.ERROR,
+            lambda event: print(f"\n❌ Error: {event['message']}"),
+        )
+
+        runner.on(
+            StreamEventType.END,
+            lambda event: print(f"⏱️  Total duration: {event['duration']:.2f}s"),
+        )
+
+        # Process the stream
+        runner.process()
+
+    except Exception as e:
+        print(f"Error: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pipes/pipes.tool.stream.py b/examples/pipes/pipes.tool.stream.py
new file mode 100644
index 0000000..943a984
--- /dev/null
+++ b/examples/pipes/pipes.tool.stream.py
@@ -0,0 +1,72 @@
+"""
+Example demonstrating how to use get_tools_from_run_stream to extract tool calls
+from a streaming response, similar to the TypeScript version.
+"""
+
+import itertools
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+from langbase.helper import get_tools_from_run_stream
+
+# Load environment variables
+load_dotenv()
+
+
+def main():
+    # Initialize Langbase client
+    langbase = Langbase(api_key=os.getenv("LANGBASE_API_KEY"))
+
+    user_msg = "What's the weather in SF"
+
+    # Run the pipe with streaming enabled and tools
+    response = langbase.pipes.run(
+        messages=[
+            {
+                "role": "user",
+                "content": user_msg,
+            }
+        ],
+        stream=True,
+        name="summary-agent",
+        tools=[
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_current_weather",
+                    "description": "Get the current weather of a given location",
+                    "parameters": {
+                        "type": "object",
+                        "required": ["location"],
+                        "properties": {
+                            "unit": {
+                                "enum": ["celsius", "fahrenheit"],
+                                "type": "string",
+                            },
+                            "location": {
+                                "type": "string",
+                                "description": "The city and state, e.g. San Francisco, CA",
+                            },
+                        },
+                    },
+                },
+            }
+        ],
+    )
+
+    # Split the stream into two iterators (similar to TypeScript tee())
+    stream_for_response, stream_for_tool_call = itertools.tee(response["stream"], 2)
+
+    # Extract tool calls from the stream
+    tool_calls = get_tools_from_run_stream(stream_for_tool_call)
+    has_tool_calls = len(tool_calls) > 0
+
+    if has_tool_calls:
+        print(json.dumps(tool_calls, indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pipes/pipes.update.py b/examples/pipes/pipes.update.py
index 0918222..1678ed3 100644
--- a/examples/pipes/pipes.update.py
+++ b/examples/pipes/pipes.update.py
@@ -1,39 +1,49 @@
 """
-Example demonstrating how to update an existing pipe in Langbase.
+Example demonstrating how to update a pipe in Langbase.
 """
+
+import json
 import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
 
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Name of the pipe to update
-pipe_name = "my-assistant-pipe"
-
-# Define update configuration
-update_config = {
-    "name": pipe_name,
-    "description": "An updated assistant that provides more detailed responses",
-    "temperature": 0.8,  # Adjust temperature
-    "max_tokens": 2000,  # Increase output length
-    "messages": [
-        {
-            "role": "system",
-            "content": "You are a helpful assistant that provides detailed, informative responses while still being concise and to the point."
-        }
-    ]
-}
-
-# Update the pipe
-try:
-    updated_pipe = lb.pipes.update(**update_config)
-
-    print(f"Successfully updated pipe '{updated_pipe['name']}'")
-    print(f"New description: {updated_pipe.get('description', 'N/A')}")
-    print(f"Status: {updated_pipe.get('status', 'unknown')}")
-
-except Exception as e:
-    print(f"Error updating pipe: {e}")
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Define updated configuration
+    updates = {
+        "description": "Updated description for the text summarization pipe",
+        "model": "openai:gpt-4",
+    }
+
+    # Update the pipe
+    try:
+        response = lb.pipes.update(
+            name="summary-agent",
+            description="An agent that summarizes text",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant that summarizes text clearly and concisely.",
+                }
+            ],
+        )
+
+        print("Pipe updated successfully!")
+        print(json.dumps(response, indent=2))
+
+    except Exception as e:
+        print(f"Error updating pipe: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/threads/threads.append.py b/examples/threads/threads.append.py
index 26b47c5..973880e 100644
--- a/examples/threads/threads.append.py
+++ b/examples/threads/threads.append.py
@@ -1,44 +1,47 @@
 """
 Example demonstrating how to append messages to a thread in Langbase.
 """
+
 import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
 
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-# Thread ID to append messages to
-thread_id = "thread_123456789"  # Replace with your actual thread ID
-
-# Messages to append
-messages = [
-    {
-        "role": "assistant",
-        "content": "I'm sorry to hear you're having trouble with your Widget Pro 2000. What specific issue are you experiencing?"
-    },
-    {
-        "role": "user",
-        "content": "The power button is flashing red and the device won't turn on."
-    }
-]
-
-# Append messages to the thread
-try:
-    response = lb.threads.append(
-        thread_id=thread_id,
-        messages=messages
-    )
-
-    print(f"Successfully appended {len(response)} messages to thread '{thread_id}'")
-
-    # Print the appended messages
-    for i, message in enumerate(response, 1):
-        print(f"\nMessage {i}:")
-        print(f"Role: {message.get('role')}")
-        print(f"Content: {message.get('content')}")
-        print(f"Created at: {message.get('created_at')}")
-
-except Exception as e:
-    print(f"Error appending messages to thread: {e}")
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Thread ID to append messages to
+    thread_id = "thread_123"  # Replace with your actual thread ID
+
+    # Messages to append
+    messages = [
+        {"role": "assistant", "content": "Nice to meet you"},
+    ]
+
+    # Append messages to the thread
+    try:
+        response = lb.threads.append(thread_id=thread_id, messages=messages)
+
+        print(f"Successfully appended {len(response)} messages to thread '{thread_id}'")
+
+        # Print the appended messages
+        for i, message in enumerate(response, 1):
+            print(f"\nMessage {i}:")
+            print(f"Role: {message.get('role')}")
+            print(f"Content: {message.get('content')}")
+            print(f"Created at: {message.get('created_at')}")
+
+    except Exception as e:
+        print(f"Error appending messages to thread: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/threads/threads.create.py b/examples/threads/threads.create.py
index 870bc3f..52a8333 100644
--- a/examples/threads/threads.create.py
+++ b/examples/threads/threads.create.py
@@ -1,39 +1,36 @@
 """
 Example demonstrating how to create a thread in Langbase.
 """
+
+import json
 import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
 
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Create a thread with metadata and initial messages
-try:
-    thread = lb.threads.create(
-        metadata={
-            "user_id": "user_12345",
-            "session_id": "session_67890",
-            "topic": "technical_support",
-            "product": "Widget Pro 2000"
-        },
-        messages=[
-            {
-                "role": "user",
-                "content": "Hello, I'm having trouble with my Widget Pro 2000."
-            }
-        ]
-    )
-
-    print(f"Successfully created thread with ID: {thread['id']}")
-    print(f"Creation timestamp: {thread.get('created_at')}")
-    print(f"Metadata: {thread.get('metadata', {})}")
-
-    # Save the thread ID for later use
-    thread_id = thread['id']
-    print(f"\nSave this thread ID for future interactions: {thread_id}")
-
-except Exception as e:
-    print(f"Error creating thread: {e}")
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Create a thread with metadata and initial messages
+    try:
+        thread = lb.threads.create(
+            metadata={"company": "langbase"},
+            messages=[{"role": "user", "content": "Hello, how are you?"}],
+        )
+
+        print(json.dumps(thread, indent=2))
+
+    except Exception as e:
+        print(f"Error creating thread: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/threads/threads.delete.py b/examples/threads/threads.delete.py
index 4bebea7..abd677f 100644
--- a/examples/threads/threads.delete.py
+++ b/examples/threads/threads.delete.py
@@ -1,28 +1,40 @@
 """
 Example demonstrating how to delete a thread in Langbase.
 """
+
 import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
 
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
 
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Thread ID to delete
+    thread_id = "thread_123"  # Replace with your actual thread ID
+
+    # Delete the thread
+    try:
+        response = lb.threads.delete(thread_id=thread_id)
 
-# Thread ID to delete
-thread_id = "thread_123456789"  # Replace with your actual thread ID
+        if response.get("success", False):
+            print(f"Successfully deleted thread {thread_id}")
+        else:
+            print(f"Failed to delete thread {thread_id}")
+            if "message" in response:
+                print(f"Message: {response['message']}")
 
-# Delete the thread
-try:
-    response = lb.threads.delete(thread_id=thread_id)
+    except Exception as e:
+        print(f"Error deleting thread: {e}")
 
-    if response.get('success', False):
-        print(f"Successfully deleted thread {thread_id}")
-    else:
-        print(f"Failed to delete thread {thread_id}")
-        if 'message' in response:
-            print(f"Message: {response['message']}")
 
-except Exception as e:
-    print(f"Error deleting thread: {e}")
+if __name__ == "__main__":
+    main()
diff --git a/examples/threads/threads.get.py b/examples/threads/threads.get.py
index a4c0776..7c35a5c 100644
--- a/examples/threads/threads.get.py
+++ b/examples/threads/threads.get.py
@@ -1,39 +1,36 @@
 """
-Example demonstrating how to get thread details in Langbase.
+Example demonstrating how to get a specific thread in Langbase.
 """
+
+import json
 import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
-from datetime import datetime
 
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
 
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
 
-# Thread ID to get details for
-thread_id = "thread_123456789"  # Replace with your actual thread ID
+    # Thread ID to retrieve
+    thread_id = "thread_123"  # Replace with your thread ID
 
-# Get thread details
-try:
-    thread = lb.threads.get(thread_id=thread_id)
+    # Get the specific thread
+    try:
+        thread = lb.threads.get(thread_id=thread_id)
 
-    print(f"Thread ID: {thread['id']}")
+        print(json.dumps(thread, indent=2))
 
-    # Convert timestamp to readable date (if available)
-    created_at = thread.get('created_at')
-    if created_at:
-        timestamp = datetime.fromtimestamp(created_at / 1000).strftime('%Y-%m-%d %H:%M:%S')
-        print(f"Created at: {timestamp}")
+    except Exception as e:
+        print(f"Error getting thread: {e}")
 
-    # Print metadata if available
-    metadata = thread.get('metadata', {})
-    if metadata:
-        print("Metadata:")
-        for key, value in metadata.items():
-            print(f"  {key}: {value}")
-    else:
-        print("No metadata available")
 
-except Exception as e:
-    print(f"Error getting thread: {e}")
+if __name__ == "__main__":
+    main()
diff --git a/examples/threads/threads.list.py b/examples/threads/threads.list.py
deleted file mode 100644
index 65e9c69..0000000
--- a/examples/threads/threads.list.py
+++ /dev/null
@@ -1,41 +0,0 @@
-"""
-Example demonstrating how to list messages in a thread in Langbase.
-"""
-import os
-from langbase import Langbase
-from datetime import datetime
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Thread ID to list messages from
-thread_id = "thread_123456789"  # Replace with your actual thread ID
-
-# List messages from the thread
-try:
-    messages = lb.threads.messages.list(thread_id=thread_id)
-
-    print(f"Found {len(messages)} messages in thread '{thread_id}':")
-    print()
-
-    # Format and print the conversation
-    for message in messages:
-        # Convert timestamp to readable date (if available)
-        created_at = message.get('created_at')
-        if created_at:
-            timestamp = datetime.fromtimestamp(created_at / 1000).strftime('%Y-%m-%d %H:%M:%S')
-        else:
-            timestamp = "Unknown time"
-
-        # Get role and format for display
-        role = message.get('role', 'unknown').upper()
-
-        print(f"[{timestamp}] {role}:")
-        print(message.get('content', 'No content'))
-        print("-" * 50)
-
-except Exception as e:
-    print(f"Error listing messages from thread: {e}")
diff --git a/examples/threads/threads.messages.list.py b/examples/threads/threads.messages.list.py
new file mode 100644
index 0000000..79dfc02
--- /dev/null
+++ b/examples/threads/threads.messages.list.py
@@ -0,0 +1,33 @@
+"""
+Example demonstrating how to list threads in Langbase.
+"""
+
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # List all threads
+    try:
+        threads = lb.threads.messages.list(thread_id="thread_123")
+
+        print(json.dumps(threads, indent=2))
+
+    except Exception as e:
+        print(f"Error listing threads: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/threads/threads.update.py b/examples/threads/threads.update.py
index 33bc242..e8b1084 100644
--- a/examples/threads/threads.update.py
+++ b/examples/threads/threads.update.py
@@ -1,52 +1,45 @@
 """
 Example demonstrating how to update thread metadata in Langbase.
 """
+
+import json
 import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
-from datetime import datetime
-
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Thread ID to update
-thread_id = "thread_123456789"  # Replace with your actual thread ID
-
-# New metadata to set for the thread
-updated_metadata = {
-    "status": "resolved",
-    "priority": "high",
-    "last_updated_by": "support_agent_42",
-    "category": "technical_issue",
-    "customer_satisfaction": "high",
-    "resolution_time": "2 hours"
-}
-
-# Update the thread metadata
-try:
-    updated_thread = lb.threads.update(
-        thread_id=thread_id,
-        metadata=updated_metadata
-    )
-
-    print(f"Successfully updated thread {updated_thread['id']}")
-
-    # Convert timestamp to readable date (if available)
-    created_at = updated_thread.get('created_at')
-    if created_at:
-        timestamp = datetime.fromtimestamp(created_at / 1000).strftime('%Y-%m-%d %H:%M:%S')
-        print(f"Created at: {timestamp}")
-
-    # Print updated metadata
-    metadata = updated_thread.get('metadata', {})
-    if metadata:
-        print("Updated metadata:")
-        for key, value in metadata.items():
-            print(f"  {key}: {value}")
-    else:
-        print("No metadata available")
-
-except Exception as e:
-    print(f"Error updating thread: {e}")
+
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Thread ID to update
+    thread_id = "thread_123"  # Replace with your actual thread ID
+
+    # New metadata to set for the thread
+    updated_metadata = {
+        "company": "langbase",
+        "about": "Langbase is the most powerful serverless platform for building AI agents with memory.",
+    }
+
+    # Update the thread metadata
+    try:
+        updated_thread = lb.threads.update(
+            thread_id=thread_id,
+            metadata=updated_metadata,
+        )
+
+        print(json.dumps(updated_thread, indent=2))
+
+    except Exception as e:
+        print(f"Error updating thread: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/tools/tools.crawl.py b/examples/tools/tools.crawl.py
index 68296c4..23c78ac 100644
--- a/examples/tools/tools.crawl.py
+++ b/examples/tools/tools.crawl.py
@@ -4,9 +4,15 @@
 This example crawls specified URLs using spider.cloud service.
 Get your API key from: https://spider.cloud/docs/quickstart
 """
+
 import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
 
+load_dotenv()
+
 # Get API keys from environment variables
 langbase_api_key = os.getenv("LANGBASE_API_KEY")
 crawl_api_key = os.getenv("CRAWL_KEY")
@@ -14,6 +20,7 @@
 # Initialize the client
 lb = Langbase(api_key=langbase_api_key)
 
+
 def main():
     """
     Crawls specified URLs using spider.cloud service.
@@ -23,7 +30,7 @@ def main():
         results = lb.tools.crawl(
             url=["https://langbase.com", "https://langbase.com/about"],
             max_pages=1,
-            api_key=crawl_api_key
+            api_key=crawl_api_key,
         )
 
         # Print the results
@@ -32,5 +39,6 @@ def main():
     except Exception as e:
         print(f"Error performing web crawl: {e}")
 
+
 if __name__ == "__main__":
     main()
diff --git a/examples/tools/tools.web-search.py b/examples/tools/tools.web-search.py
index bdf9dee..902ae03 100644
--- a/examples/tools/tools.web-search.py
+++ b/examples/tools/tools.web-search.py
@@ -1,46 +1,48 @@
 """
 Example demonstrating how to use the web search tool in Langbase.
 """
+
+import json
 import os
+
+from dotenv import load_dotenv
+
 from langbase import Langbase
 
-# Get API key from environment variable
-langbase_api_key = os.getenv("LANGBASE_API_KEY")
-
-# Initialize the client
-search_api_key = os.environ.get("EXA_API_KEY", "your-exa-key")  # Optional: search provider API key
-
-# Initialize the client
-lb = Langbase(api_key=langbase_api_key)
-
-# Configure the search request
-search_query = "latest advancements in quantum computing 2025"
-
-# Optional: restrict to specific domains
-domains = ["arxiv.org", "nature.com", "science.org", "research.google.com"]
-
-# Perform the web search
-try:
-    search_results = lb.tools.web_search(
-        query=search_query,
-        service="exa",  # The search service to use
-        total_results=5,  # Number of results to return
-        domains=domains,  # Optional: restrict to specific domains
-        api_key=search_api_key  # Optional: provider-specific API key
-    )
-
-    print(f"Found {len(search_results)} results for query: '{search_query}'")
-    print()
-
-    # Display the search results
-    for i, result in enumerate(search_results, 1):
-        print(f"Result {i}:")
-        print(f"URL: {result['url']}")
-        print(f"Content snippet:")
-        # Display a preview of the content (first 200 characters)
-        content_preview = result['content'][:200] + "..." if len(result['content']) > 200 else result['content']
-        print(content_preview)
-        print("-" * 80)
-
-except Exception as e:
-    print(f"Error performing web search: {e}")
+
+def main():
+    load_dotenv()
+
+    # Get API key from environment variable
+    langbase_api_key = os.getenv("LANGBASE_API_KEY")
+
+    # Initialize the client
+    search_api_key = os.getenv("EXA_API_KEY")
+
+    # Initialize the client
+    lb = Langbase(api_key=langbase_api_key)
+
+    # Configure the search request
+    search_query = "latest advancements in quantum computing 2025"
+
+    # Optional: restrict to specific domains
+    domains = ["arxiv.org", "nature.com", "science.org"]
+
+    # Perform the web search
+    try:
+        search_results = lb.tools.web_search(
+            query=search_query,
+            service="exa",  # The search service to use
+            total_results=5,  # Number of results to return
+            domains=domains,  # Optional: restrict to specific domains
+            api_key=search_api_key,  # Optional: provider-specific API key
+        )
+
+        print(json.dumps(search_results, indent=2))
+
+    except Exception as e:
+        print(f"Error performing web search: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/workflow/email_processing.py b/examples/workflow/email_processing.py
new file mode 100644
index 0000000..12da295
--- /dev/null
+++ b/examples/workflow/email_processing.py
@@ -0,0 +1,178 @@
+"""
+Email Processing Workflow
+
+This example demonstrates how to create a workflow that analyzes an email
+and generates a response when needed.
+"""
+
+import asyncio
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase, Workflow
+
+load_dotenv()
+
+
+async def process_email(email_content: str):
+    """
+    Process an email by summarizing, analyzing sentiment, determining if response
+    is needed, and generating a response if necessary.
+
+    Args:
+        email_content: The content of the email to process
+
+    Returns:
+        Dictionary containing summary, sentiment, response_needed, and response
+    """
+    # Check for required environment variables
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    llm_api_key = os.environ.get("LLM_API_KEY")
+
+    if not langbase_api_key:
+        print("❌ Missing LANGBASE_API_KEY in environment variables.")
+        print("Please set: export LANGBASE_API_KEY='your_langbase_api_key'")
+        exit(1)
+
+    if not llm_api_key:
+        print("❌ Missing LLM_API_KEY in environment variables.")
+        print("Please set: export LLM_API_KEY='your_llm_api_key'")
+        exit(1)
+
+    # Initialize Langbase
+    langbase = Langbase(api_key=langbase_api_key)
+
+    # Create a new workflow
+    workflow = Workflow(debug=True)
+
+    try:
+        # Steps 1 & 2: Run summary and sentiment analysis in parallel
+        async def summarize_email():
+            response = langbase.agent.run(
+                model="openai:gpt-4.1-mini",
+                instructions="""Create a concise summary of this email. Focus on the main points,
+                requests, and any action items mentioned.""",
+                api_key=llm_api_key,
+                input=[{"role": "user", "content": email_content}],
+                stream=False,
+            )
+            return response.get("output")
+
+        async def analyze_sentiment():
+            response = langbase.agent.run(
+                model="openai:gpt-4.1-mini",
+                instructions="""Analyze the sentiment of this email. Provide a brief analysis
+                that includes the overall tone (positive, neutral, or negative) and any notable
+                emotional elements.""",
+                api_key=llm_api_key,
+                input=[{"role": "user", "content": email_content}],
+                stream=False,
+            )
+            return response.get("output")
+
+        # Execute summary and sentiment analysis steps in parallel
+        summary = await workflow.step({"id": "summarize_email", "run": summarize_email})
+
+        sentiment = await workflow.step(
+            {"id": "analyze_sentiment", "run": analyze_sentiment}
+        )
+
+        # Step 3: Determine if response is needed (using the results from previous steps)
+        async def determine_response_needed():
+            response = langbase.agent.run(
+                model="openai:gpt-4.1-mini",
+                instructions="""Based on the email summary and sentiment analysis, determine if a
+                response is needed. Answer with 'yes' if a response is required, or 'no' if no
+                response is needed. Consider factors like: Does the email contain a question?
+                Is there an explicit request? Is it urgent?""",
+                api_key=llm_api_key,
+                input=[
+                    {
+                        "role": "user",
+                        "content": f"""Email: {email_content}
+
+Summary: {summary}
+
+Sentiment: {sentiment}
+
+Does this email require a response?""",
+                    }
+                ],
+                stream=False,
+            )
+            return "yes" in response.get("output", "").lower()
+
+        response_needed = await workflow.step(
+            {"id": "determine_response_needed", "run": determine_response_needed}
+        )
+
+        # Step 4: Generate response if needed
+        response = None
+        if response_needed:
+
+            async def generate_response():
+                response = langbase.agent.run(
+                    model="openai:gpt-4.1-mini",
+                    instructions="""Generate a professional email response. Address all questions
+                    and requests from the original email. Be helpful, clear, and maintain a
+                    professional tone that matches the original email sentiment.""",
+                    api_key=llm_api_key,
+                    input=[
+                        {
+                            "role": "user",
+                            "content": f"""Original Email: {email_content}
+
+Summary: {summary}
+
+Sentiment Analysis: {sentiment}
+
+Please draft a response email.""",
+                        }
+                    ],
+                    stream=False,
+                )
+                return response.get("output")
+
+            response = await workflow.step(
+                {"id": "generate_response", "run": generate_response}
+            )
+
+        # Return the results
+        return {
+            "summary": summary,
+            "sentiment": sentiment,
+            "response_needed": response_needed,
+            "response": response,
+        }
+
+    except Exception as error:
+        print(f"Email processing workflow failed: {error}")
+        raise error
+
+
+async def main():
+    sample_email = """
+Subject: Pricing Information and Demo Request
+
+Hello,
+
+I came across your platform and I'm interested in learning more about your product
+for our growing company. Could you please send me some information on your pricing tiers?
+
+We're particularly interested in the enterprise tier as we now have a team of about
+50 people who would need access. Would it be possible to schedule a demo sometime next week?
+
+Thanks in advance for your help!
+
+Best regards,
+Jamie
+"""
+
+    results = await process_email(sample_email)
+    print(json.dumps(results, indent=2, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/workflow/summarization.py b/examples/workflow/summarization.py
new file mode 100644
index 0000000..61d7b66
--- /dev/null
+++ b/examples/workflow/summarization.py
@@ -0,0 +1,98 @@
+"""
+Summarization Workflow
+
+This example demonstrates how to create a workflow that summarizes text input
+with parallel processing and retry configuration.
+"""
+
+import asyncio
+import json
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase, Workflow
+
+load_dotenv()
+
+
+async def process_text(input_text: str):
+    """
+    Process text input by summarizing it with retry logic and debug mode.
+
+    Args:
+        input_text: The text to be summarized
+
+    Returns:
+        Dictionary containing the response
+    """
+    # Check for required environment variables
+    langbase_api_key = os.environ.get("LANGBASE_API_KEY")
+    llm_api_key = os.environ.get("LLM_API_KEY")
+
+    if not langbase_api_key:
+        print("❌ Missing LANGBASE_API_KEY in environment variables.")
+        print("Please set: export LANGBASE_API_KEY='your_langbase_api_key'")
+        exit(1)
+
+    if not llm_api_key:
+        print("❌ Missing LLM_API_KEY in environment variables.")
+        print("Please set: export LLM_API_KEY='your_llm_api_key'")
+        exit(1)
+
+    # Initialize Langbase
+    langbase = Langbase(api_key=langbase_api_key)
+
+    # Create workflow with debug mode
+    workflow = Workflow(debug=True)
+
+    try:
+        # Define a single step with retries
+        async def process_text_step():
+            response = langbase.agent.run(
+                model="openai:gpt-4o",
+                instructions="""Summarize the following text in a
+                single paragraph. Be concise but capture the key information.""",
+                api_key=llm_api_key,
+                input=[{"role": "user", "content": input_text}],
+                stream=False,
+            )
+            return response.get("output")
+
+        response = await workflow.step(
+            {
+                "id": "process_text",
+                "retries": {"limit": 2, "delay": 1000, "backoff": "exponential"},
+                "run": process_text_step,
+            }
+        )
+
+        # Return the result
+        return {"response": response}
+
+    except Exception as error:
+        print(f"Workflow step failed: {error}")
+        raise error
+
+
+async def main():
+    sample_text = """
+    Langbase is the most powerful serverless AI platform for building AI agents with memory.
+    Build, deploy, and scale AI agents with tools and memory (RAG). Simple AI primitives
+    with a world-class developer experience without using any frameworks.
+
+    Compared to complex AI frameworks, Langbase is serverless and the first composable
+    AI platform. Build AI agents without any bloated frameworks. You write the logic,
+    we handle the logistics.
+
+    Langbase offers AI Pipes (serverless agents with tools), AI Memory (serverless RAG),
+    and AI Studio (developer platform). The platform is 30-50x less expensive than
+    competitors, supports 250+ LLM models, and enables collaboration among team members.
+    """
+
+    results = await process_text(sample_text)
+    print(json.dumps(results, indent=2, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/workflow/workflow.py b/examples/workflow/workflow.py
new file mode 100644
index 0000000..13535ef
--- /dev/null
+++ b/examples/workflow/workflow.py
@@ -0,0 +1,47 @@
+"""
+Experimental upcoming beta AI primitive.
+Please refer to the documentation for more information: https://langbase.com/docs for more information.
+"""
+
+import asyncio
+import os
+
+from dotenv import load_dotenv
+
+from langbase import Langbase, Workflow
+
+load_dotenv()
+
+
+async def main():
+    # Initialize Langbase client
+    langbase = Langbase(api_key=os.environ.get("LANGBASE_API_KEY"))
+
+    # Create workflow with debug mode
+    workflow = Workflow(debug=True)
+
+    # Define and execute a workflow step
+    async def summarize_step():
+        return langbase.agent.run(
+            model="openai:gpt-4o-mini",
+            api_key=os.environ.get("LLM_API_KEY"),
+            input=[
+                {
+                    "role": "system",
+                    "content": "You are an expert summarizer. Summarize the user input.",
+                },
+                {
+                    "role": "user",
+                    "content": "I am testing workflows. I just created an example of summarize workflow. Can you summarize this?",
+                },
+            ],
+            stream=False,
+        )
+
+    result = await workflow.step({"id": "summarize", "run": summarize_step})
+
+    print(result["output"])
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/langbase/__init__.py b/langbase/__init__.py
index 1951fbf..2865199 100644
--- a/langbase/__init__.py
+++ b/langbase/__init__.py
@@ -3,48 +3,120 @@
 
 This package provides a Python interface to the Langbase API, allowing you to
 build and deploy AI-powered applications using Langbase's infrastructure.
-
-Basic usage:
-
-```python
-from langbase import Langbase
-
-# Initialize the client
-lb = Langbase(api_key="your-api-key")
-
-# Run a pipe
-response = lb.pipes.run(
-    name="your-pipe-name",
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Tell me about AI."}
-    ]
-)
-
-print(response["completion"])
 ```
 """
 
-from .client import Langbase
 from .errors import (
-    APIError, APIConnectionError, APIConnectionTimeoutError,
-    BadRequestError, AuthenticationError, PermissionDeniedError,
-    NotFoundError, ConflictError, UnprocessableEntityError,
-    RateLimitError, InternalServerError
+    APIConnectionError,
+    APIConnectionTimeoutError,
+    APIError,
+    AuthenticationError,
+    BadRequestError,
+    ConflictError,
+    InternalServerError,
+    NotFoundError,
+    PermissionDeniedError,
+    RateLimitError,
+    UnprocessableEntityError,
+)
+from .helper import (
+    ChoiceStream,
+    ChunkStream,
+    Delta,
+    StreamProcessor,
+    collect_stream_text,
+    create_stream_processor,
+    get_runner,
+    get_text_part,
+    get_tools_from_run,
+    get_tools_from_run_stream,
+    get_tools_from_stream,
+    get_typed_runner,
+    handle_response_stream,
+    parse_chunk,
+    stream_text,
+)
+from .langbase import Langbase
+from .primitives.memories import Memories
+from .primitives.pipes import Pipes
+from .primitives.threads import Threads
+from .primitives.tools import Tools
+from .streaming import StreamEventType, TypedStreamProcessor
+from .types import (
+    ChoiceGenerate,
+    Message,
+    PipeBaseOptions,
+    PipeBaseResponse,
+    PipeCreateOptions,
+    PipeCreateResponse,
+    PipeListResponse,
+    PipeUpdateOptions,
+    PipeUpdateResponse,
+    ResponseFormat,
+    RunResponse,
+    RunResponseStream,
+    ToolCall,
+    ToolChoice,
+    Usage,
+    Variable,
 )
+from .workflow import TimeoutError, Workflow
 
 __version__ = "0.1.0"
 __all__ = [
-    'Langbase',
-    'APIError',
-    'APIConnectionError',
-    'APIConnectionTimeoutError',
-    'BadRequestError',
-    'AuthenticationError',
-    'PermissionDeniedError',
-    'NotFoundError',
-    'ConflictError',
-    'UnprocessableEntityError',
-    'RateLimitError',
-    'InternalServerError',
+    # Errors
+    "APIConnectionError",
+    "APIConnectionTimeoutError",
+    "APIError",
+    "AuthenticationError",
+    "BadRequestError",
+    # Type definitions
+    "ChoiceGenerate",
+    # Helper utilities
+    "ChunkStream",
+    "ConflictError",
+    "InternalServerError",
+    # Main classes
+    "Langbase",
+    "Memories",
+    "Message",
+    "NotFoundError",
+    "PermissionDeniedError",
+    "PipeBaseOptions",
+    "PipeBaseResponse",
+    "PipeCreateOptions",
+    "PipeCreateResponse",
+    "PipeListResponse",
+    "PipeUpdateOptions",
+    "PipeUpdateResponse",
+    "Pipes",
+    "RateLimitError",
+    "ResponseFormat",
+    "RunResponse",
+    "RunResponseStream",
+    # Streaming
+    "StreamEventType",
+    "StreamProcessor",
+    "Threads",
+    "TimeoutError",
+    "ToolCall",
+    "ToolChoice",
+    "Tools",
+    "Tools",
+    "TypedStreamProcessor",
+    "UnprocessableEntityError",
+    "Usage",
+    "Variable",
+    "Workflow",
+    "collect_stream_text",
+    "create_stream_processor",
+    "get_runner",
+    "get_text_part",
+    "get_tools_from_run",
+    "get_tools_from_run_stream",
+    "get_tools_from_stream",
+    "get_typed_runner",
+    "handle_response_stream",
+    "parse_chunk",
+    "stream_text",
 ]
diff --git a/langbase/client.py b/langbase/client.py
deleted file mode 100644
index d2cfb75..0000000
--- a/langbase/client.py
+++ /dev/null
@@ -1,762 +0,0 @@
-"""
-Main client for the Langbase SDK.
-
-This module provides the Langbase class which is the main entry point
-for interacting with the Langbase API.
-"""
-import os
-from typing import Dict, List, Optional, Union, Any, BinaryIO, overload
-from io import BytesIO
-import requests
-
-from .errors import APIError
-from .request import Request
-from .utils import convert_document_to_request_files, clean_null_values
-from .types import (
-    EmbeddingModel, ContentType, FileProtocol,
-    MemoryRetrieveResponse, MemoryListDocResponse, MemoryCreateResponse,
-    MemoryListResponse, MemoryDeleteResponse, MemoryDeleteDocResponse,
-    ThreadsBaseResponse, ThreadMessagesBaseResponse
-)
-
-
-class Langbase:
-    """
-    Client for the Langbase API.
-
-    This class provides methods for interacting with all aspects of the Langbase API,
-    including pipes, memories, tools, threads, and utilities.
-    """
-
-    def __init__(
-        self,
-        api_key: Optional[str] = None,
-        base_url: str = "https://api.langbase.com",
-        timeout: int = 30
-    ):
-        """
-        Initialize the Langbase client.
-
-        Args:
-            api_key: The API key for authentication. If not provided, it will be read
-                    from the LANGBASE_API_KEY environment variable.
-            base_url: The base URL for the API.
-            timeout: The timeout for API requests in seconds.
-
-        Raises:
-            ValueError: If no API key is provided and LANGBASE_API_KEY is not set.
-        """
-        self.api_key = api_key or os.environ.get("LANGBASE_API_KEY", "")
-        if not self.api_key:
-            raise ValueError(
-                "API key must be provided either as a parameter or through the LANGBASE_API_KEY environment variable"
-            )
-
-        self.base_url = base_url
-        self.timeout = timeout
-
-        self.request = Request({
-            "api_key": self.api_key,
-            "base_url": self.base_url,
-            "timeout": self.timeout
-        })
-
-        # Initialize properties and methods
-        self._init_pipes()
-        self._init_memories()
-        self._init_tools()
-        self._init_threads()
-        self._init_llm()
-
-        # Deprecated property aliases
-        self.pipe = self.pipes
-        self.memory = self.memories
-        self.tool = self.tools
-
-    def _init_pipes(self):
-        """Initialize pipes methods."""
-
-        class Pipes:
-            def __init__(self, parent):
-                self.parent = parent
-
-            def list(self):
-                """
-                List all pipes.
-
-                Returns:
-                    List of pipe objects
-                """
-                return self.parent.request.get("/v1/pipes")
-
-            def create(self, name: str, description: Optional[str] = None, **kwargs):
-                """
-                Create a new pipe.
-
-                Args:
-                    name: Name of the pipe
-                    description: Description of the pipe
-                    **kwargs: Additional parameters for the pipe
-
-                Returns:
-                    Created pipe object
-                """
-                options = {
-                    "name": name,
-                    "description": description,
-                    **kwargs
-                }
-                return self.parent.request.post("/v1/pipes", clean_null_values(options))
-
-            def update(self, name: str, **kwargs):
-                """
-                Update an existing pipe.
-
-                Args:
-                    name: Name of the pipe to update
-                    **kwargs: Parameters to update
-
-                Returns:
-                    Updated pipe object
-                """
-                options = {
-                    "name": name,
-                    **kwargs
-                }
-                return self.parent.request.post(f"/v1/pipes/{name}", clean_null_values(options))
-
-            def run(
-                self,
-                name: Optional[str] = None,
-                api_key: Optional[str] = None,
-                messages: Optional[List[Dict[str, Any]]] = None,
-                stream: Optional[bool] = None,  # Changed to Optional[bool] with default None
-                **kwargs
-            ):
-                """
-                Run a pipe.
-
-                Args:
-                    name: Name of the pipe to run
-                    api_key: API key for the pipe
-                    messages: List of messages for the conversation
-                    stream: Whether to stream the response (None means don't specify)
-                    **kwargs: Additional parameters for the run
-
-                Returns:
-                    Run response or stream
-
-                Raises:
-                    ValueError: If neither name nor API key is provided
-                """
-                if not name and not api_key:
-                    raise ValueError("Either pipe name or API key is required")
-
-                options = {
-                    "name": name,
-                    "api_key": api_key,
-                    "messages": messages or [],
-                    **kwargs
-                }
-
-                # Only set stream in options if it's explicitly provided
-                if stream is not None:
-                    options["stream"] = stream
-
-                # Create a new request instance if API key is provided
-                request = self.parent.request
-                if api_key:
-                    request = Request({
-                        "api_key": api_key,
-                        "base_url": self.parent.base_url,
-                        "timeout": self.parent.timeout
-                    })
-
-                headers = {}
-                if "llm_key" in kwargs:
-                    headers["LB-LLM-KEY"] = kwargs.pop("llm_key")
-
-                # Pass the stream parameter to post method (which might be None)
-                return request.post("/v1/pipes/run", clean_null_values(options), headers, stream=stream if stream is not None else False)
-
-        self.pipes = Pipes(self)
-
-    def _init_memories(self):
-        """Initialize memories methods."""
-
-        class Documents:
-            def __init__(self, parent):
-                self.parent = parent
-
-            def list(self, memory_name: str) -> List[MemoryListDocResponse]:
-                """
-                List all documents in a memory.
-
-                Args:
-                    memory_name: Name of the memory
-
-                Returns:
-                    List of document objects
-                """
-                return self.parent.request.get(f"/v1/memory/{memory_name}/documents")
-
-            def delete(self, memory_name: str, document_name: str) -> MemoryDeleteDocResponse:
-                """
-                Delete a document from memory.
-
-                Args:
-                    memory_name: Name of the memory
-                    document_name: Name of the document to delete
-
-                Returns:
-                    Delete response
-                """
-                return self.parent.request.delete(
-                    f"/v1/memory/{memory_name}/documents/{document_name}"
-                )
-
-            def upload(
-                self,
-                memory_name: str,
-                document_name: str,
-                document: Union[bytes, BytesIO, str, BinaryIO],
-                content_type: ContentType,
-                meta: Optional[Dict[str, str]] = None
-            ) -> requests.Response:
-                """
-                Upload a document to memory.
-
-                Args:
-                    memory_name: Name of the memory
-                    document_name: Name for the document
-                    document: Document content (bytes, file-like object, or path)
-                    content_type: MIME type of the document
-                    meta: Metadata for the document
-
-                Returns:
-                    Upload response
-
-                Raises:
-                    ValueError: If document type is unsupported
-                    APIError: If the upload fails
-                """
-                try:
-                    # Get signed URL for upload
-                    response = self.parent.request.post("/v1/memory/documents", {
-                        "memoryName": memory_name,
-                        "fileName": document_name,
-                        "meta": meta or {}
-                    })
-
-                    upload_url = response.get("signedUrl")
-
-                    # Convert document to appropriate format
-                    if isinstance(document, str) and os.path.isfile(document):
-                        with open(document, "rb") as f:
-                            file_content = f.read()
-                    elif isinstance(document, bytes):
-                        file_content = document
-                    elif isinstance(document, BytesIO) or hasattr(document, 'read'):
-                        file_content = document.read()
-                        # Reset file pointer if possible
-                        if hasattr(document, 'seek'):
-                            document.seek(0)
-                    else:
-                        raise ValueError(f"Unsupported document type: {type(document)}")
-
-                    # Upload to signed URL
-                    upload_response = requests.put(
-                        upload_url,
-                        headers={
-                            "Authorization": f"Bearer {self.parent.api_key}",
-                            "Content-Type": content_type
-                        },
-                        data=file_content
-                    )
-
-                    if not upload_response.ok:
-                        raise APIError(
-                            upload_response.status_code,
-                            upload_response.text,
-                            "Upload failed",
-                            dict(upload_response.headers)
-                        )
-
-                    return upload_response
-
-                except Exception as e:
-                    if isinstance(e, APIError):
-                        raise e
-                    raise APIError(
-                        None,
-                        str(e),
-                        "Error during document upload",
-                        None
-                    ) from e
-
-            class Embeddings:
-                def __init__(self, parent):
-                    self.parent = parent
-
-                def retry(self, memory_name: str, document_name: str):
-                    """
-                    Retry embedding generation for a document.
-
-                    Args:
-                        memory_name: Name of the memory
-                        document_name: Name of the document
-
-                    Returns:
-                        Retry response
-                    """
-                    return self.parent.request.get(
-                        f"/v1/memory/{memory_name}/documents/{document_name}/embeddings/retry"
-                    )
-
-            def __init__(self, parent):
-                self.parent = parent
-                self.embeddings = self.Embeddings(parent)
-
-        class Memories:
-            def __init__(self, parent):
-                self.parent = parent
-                self.documents = Documents(parent)
-
-            def create(
-                self,
-                name: str,
-                description: Optional[str] = None,
-                embedding_model: Optional[EmbeddingModel] = None
-            ) -> MemoryCreateResponse:
-                """
-                Create a new memory.
-
-                Args:
-                    name: Name for the memory
-                    description: Description of the memory
-                    embedding_model: Model to use for embeddings
-
-                Returns:
-                    Created memory object
-                """
-                options = {
-                    "name": name,
-                    "description": description,
-                    "embedding_model": embedding_model
-                }
-                return self.parent.request.post("/v1/memory", clean_null_values(options))
-
-            def delete(self, name: str) -> MemoryDeleteResponse:
-                """
-                Delete a memory.
-
-                Args:
-                    name: Name of the memory to delete
-
-                Returns:
-                    Delete response
-                """
-                return self.parent.request.delete(f"/v1/memory/{name}")
-
-            def retrieve(
-                self,
-                query: str,
-                memory: List[Dict[str, Any]],
-                top_k: Optional[int] = None
-            ) -> List[MemoryRetrieveResponse]:
-                """
-                Retrieve content from memory based on query.
-
-                Args:
-                    query: Search query
-                    memory: List of memory configurations
-                    top_k: Number of results to return
-
-                Returns:
-                    List of matching content
-                """
-                options = {
-                    "query": query,
-                    "memory": memory
-                }
-
-                if top_k is not None:
-                    options["topK"] = top_k
-
-                return self.parent.request.post("/v1/memory/retrieve", options)
-
-            def list(self) -> List[MemoryListResponse]:
-                """
-                List all memories.
-
-                Returns:
-                    List of memory objects
-                """
-                return self.parent.request.get("/v1/memory")
-
-        self.memories = Memories(self)
-
-    def _init_tools(self):
-        """Initialize tools methods."""
-
-        class Tools:
-            def __init__(self, parent):
-                self.parent = parent
-
-            def crawl(
-                self,
-                url: List[str],
-                max_pages: Optional[int] = None,
-                api_key: Optional[str] = None
-            ):
-                """
-                Crawl web pages.
-
-                Args:
-                    url: List of URLs to crawl
-                    max_pages: Maximum number of pages to crawl
-                    api_key: API key for crawling service
-
-                Returns:
-                    List of crawled content
-                """
-                options = {"url": url}
-
-                if max_pages is not None:
-                    options["maxPages"] = max_pages
-
-                headers = {}
-                if api_key:
-                    headers["LB-CRAWL-KEY"] = api_key
-
-                return self.parent.request.post("/v1/tools/crawl", options, headers)
-
-            def web_search(
-                self,
-                query: str,
-                service: str = "exa",
-                total_results: Optional[int] = None,
-                domains: Optional[List[str]] = None,
-                api_key: Optional[str] = None
-            ):
-                """
-                Search the web.
-
-                Args:
-                    query: Search query
-                    service: Search service to use
-                    total_results: Number of results to return
-                    domains: List of domains to restrict search to
-                    api_key: API key for search service
-
-                Returns:
-                    List of search results
-                """
-                options = {
-                    "query": query,
-                    "service": service
-                }
-
-                if total_results is not None:
-                    options["totalResults"] = total_results
-
-                if domains is not None:
-                    options["domains"] = domains
-
-                headers = {}
-                if api_key:
-                    headers["LB-WEB-SEARCH-KEY"] = api_key
-
-                return self.parent.request.post("/v1/tools/web-search", options, headers)
-
-        self.tools = Tools(self)
-
-    def _init_threads(self):
-        """Initialize threads methods."""
-
-        class Messages:
-            def __init__(self, parent):
-                self.parent = parent
-
-            def list(self, thread_id: str) -> List[ThreadMessagesBaseResponse]:
-                """
-                List all messages in a thread.
-
-                Args:
-                    thread_id: ID of the thread
-
-                Returns:
-                    List of messages
-                """
-                return self.parent.request.get(f"/v1/threads/{thread_id}/messages")
-
-        class Threads:
-            def __init__(self, parent):
-                self.parent = parent
-                self.messages = Messages(parent)
-
-            def create(
-                self,
-                thread_id: Optional[str] = None,
-                metadata: Optional[Dict[str, str]] = None,
-                messages: Optional[List[Dict[str, Any]]] = None
-            ) -> ThreadsBaseResponse:
-                """
-                Create a new thread.
-
-                Args:
-                    thread_id: Optional specific ID for the thread
-                    metadata: Metadata for the thread
-                    messages: Initial messages for the thread
-
-                Returns:
-                    Created thread object
-                """
-                options = {}
-
-                if thread_id:
-                    options["threadId"] = thread_id
-
-                if metadata:
-                    options["metadata"] = metadata
-
-                if messages:
-                    options["messages"] = messages
-
-                return self.parent.request.post("/v1/threads", clean_null_values(options))
-
-            def update(
-                self,
-                thread_id: str,
-                metadata: Dict[str, str]
-            ) -> ThreadsBaseResponse:
-                """
-                Update thread metadata.
-
-                Args:
-                    thread_id: ID of the thread to update
-                    metadata: New metadata
-
-                Returns:
-                    Updated thread object
-                """
-                options = {
-                    "threadId": thread_id,
-                    "metadata": metadata
-                }
-                return self.parent.request.post(f"/v1/threads/{thread_id}", options)
-
-            def get(self, thread_id: str) -> ThreadsBaseResponse:
-                """
-                Get thread details.
-
-                Args:
-                    thread_id: ID of the thread
-
-                Returns:
-                    Thread object
-                """
-                return self.parent.request.get(f"/v1/threads/{thread_id}")
-
-            def delete(self, thread_id: str) -> Dict[str, bool]:
-                """
-                Delete a thread.
-
-                Args:
-                    thread_id: ID of the thread to delete
-
-                Returns:
-                    Delete response
-                """
-                return self.parent.request.delete(f"/v1/threads/{thread_id}")
-
-            def append(
-                self,
-                thread_id: str,
-                messages: List[Dict[str, Any]]
-            ) -> List[ThreadMessagesBaseResponse]:
-                """
-                Append messages to a thread.
-
-                Args:
-                    thread_id: ID of the thread
-                    messages: Messages to append
-
-                Returns:
-                    List of added messages
-                """
-                return self.parent.request.post(
-                    f"/v1/threads/{thread_id}/messages",
-                    messages
-                )
-
-        self.threads = Threads(self)
-
-    def _init_llm(self):
-        """Initialize LLM methods."""
-
-        class LLM:
-            def __init__(self, parent):
-                self.parent = parent
-
-            def run(
-                self,
-                messages: List[Dict[str, Any]],
-                model: str,
-                llm_key: str,
-                stream: bool = False,
-                **kwargs
-            ):
-                """
-                Run an LLM with the specified parameters.
-
-                Args:
-                    messages: List of messages
-                    model: Model identifier
-                    llm_key: API key for the LLM provider
-                    stream: Whether to stream the response
-                    **kwargs: Additional parameters for the model
-
-                Returns:
-                    LLM response or stream
-                """
-                options = {
-                    "messages": messages,
-                    "model": model,
-                    "llm_key": llm_key,
-                    **kwargs
-                }
-
-                if stream:
-                    options["stream"] = True
-
-                headers = {"LB-LLM-Key": llm_key}
-
-                return self.parent.request.post("/v1/llm/run", options, headers, stream=stream)
-
-        self.llm = LLM(self)
-
-    def embed(
-        self,
-        chunks: List[str],
-        embedding_model: Optional[EmbeddingModel] = None
-    ) -> List[List[float]]:
-        """
-        Generate embeddings for text chunks.
-
-        Args:
-            chunks: List of text chunks to embed
-            embedding_model: Model to use for embeddings
-
-        Returns:
-            List of embedding vectors
-        """
-        options = {"chunks": chunks}
-
-        if embedding_model:
-            options["embeddingModel"] = embedding_model
-
-        return self.request.post("/v1/embed", options)
-
-    def chunk(
-        self,
-        document: Union[bytes, BytesIO, str, BinaryIO],
-        document_name: str,
-        content_type: ContentType,
-        chunk_max_length: Optional[str] = None,
-        chunk_overlap: Optional[str] = None,
-        separator: Optional[str] = None
-    ) -> List[str]:
-        """
-        Split a document into chunks.
-
-        Args:
-            document: Document content (bytes, file-like object, or path)
-            document_name: Name for the document
-            content_type: MIME type of the document
-            chunk_max_length: Maximum length of each chunk
-            chunk_overlap: Number of characters to overlap between chunks
-            separator: Custom separator for chunking
-
-        Returns:
-            List of text chunks
-
-        Raises:
-            ValueError: If document type is unsupported
-            APIError: If chunking fails
-        """
-        files = convert_document_to_request_files(document, document_name, content_type)
-
-        if chunk_max_length:
-            files["chunkMaxLength"] = (None, chunk_max_length)
-
-        if chunk_overlap:
-            files["chunkOverlap"] = (None, chunk_overlap)
-
-        if separator:
-            files["separator"] = (None, separator)
-
-        response = requests.post(
-            f"{self.base_url}/v1/chunk",
-            headers={"Authorization": f"Bearer {self.api_key}"},
-            files=files
-        )
-
-        if response.ok:
-            return response.json()
-        else:
-            try:
-                error_body = response.json()
-            except:
-                error_body = response.text
-
-            raise APIError.generate(
-                response.status_code,
-                error_body,
-                response.reason,
-                dict(response.headers)
-            )
-
-    def parse(
-        self,
-        document: Union[bytes, BytesIO, str, BinaryIO],
-        document_name: str,
-        content_type: ContentType
-    ) -> Dict[str, str]:
-        """
-        Parse a document to extract its content.
-
-        Args:
-            document: Document content (bytes, file-like object, or path)
-            document_name: Name for the document
-            content_type: MIME type of the document
-
-        Returns:
-            Dictionary with document name and extracted content
-
-        Raises:
-            ValueError: If document type is unsupported
-            APIError: If parsing fails
-        """
-        files = convert_document_to_request_files(document, document_name, content_type)
-
-        response = requests.post(
-            f"{self.base_url}/v1/parse",
-            headers={"Authorization": f"Bearer {self.api_key}"},
-            files=files
-        )
-
-        if response.ok:
-            return response.json()
-        else:
-            try:
-                error_body = response.json()
-            except:
-                error_body = response.text
-
-            raise APIError.generate(
-                response.status_code,
-                error_body,
-                response.reason,
-                dict(response.headers)
-            )
diff --git a/langbase/constants.py b/langbase/constants.py
new file mode 100644
index 0000000..223f4b2
--- /dev/null
+++ b/langbase/constants.py
@@ -0,0 +1,55 @@
+"""Constants used in the Langbase SDK."""
+
+from typing import Dict
+
+STATUS_CODE_TO_MESSAGE: Dict[int, str] = {
+    400: "Bad Request",
+    401: "Unauthorized",
+    403: "Forbidden",
+    404: "Not Found",
+    409: "Conflict",
+    422: "Unprocessable Entity",
+    429: "Too Many Requests",
+    500: "Internal Server Error",
+    502: "Bad Gateway",
+    503: "Service Unavailable",
+    504: "Gateway Timeout",
+}
+
+ERROR_MAP: Dict[int, str] = {
+    400: "BadRequestError",
+    401: "AuthenticationError",
+    403: "PermissionDeniedError",
+    404: "NotFoundError",
+    409: "ConflictError",
+    422: "UnprocessableEntityError",
+    429: "RateLimitError",
+}
+
+BASE_URL = "https://api.langbase.com"
+# API Endpoints
+PIPES_ENDPOINT = "/v1/pipes"
+PIPE_DETAIL_ENDPOINT = "/v1/pipes/{name}"
+PIPE_RUN_ENDPOINT = "/v1/pipes/run"
+
+MEMORY_ENDPOINT = "/v1/memory"
+MEMORY_DETAIL_ENDPOINT = "/v1/memory/{name}"
+MEMORY_RETRIEVE_ENDPOINT = "/v1/memory/retrieve"
+MEMORY_DOCUMENTS_ENDPOINT = "/v1/memory/{memory_name}/documents"
+MEMORY_DOCUMENT_DETAIL_ENDPOINT = "/v1/memory/{memory_name}/documents/{document_name}"
+MEMORY_DOCUMENTS_UPLOAD_ENDPOINT = "/v1/memory/documents"
+MEMORY_DOCUMENT_EMBEDDINGS_RETRY_ENDPOINT = (
+    "/v1/memory/{memory_name}/documents/{document_name}/embeddings/retry"
+)
+
+TOOLS_CRAWL_ENDPOINT = "/v1/tools/crawl"
+TOOLS_WEB_SEARCH_ENDPOINT = "/v1/tools/web-search"
+
+THREADS_ENDPOINT = "/v1/threads"
+THREAD_DETAIL_ENDPOINT = "/v1/threads/{thread_id}"
+THREAD_MESSAGES_ENDPOINT = "/v1/threads/{thread_id}/messages"
+
+EMBED_ENDPOINT = "/v1/embed"
+CHUNKER_ENDPOINT = "/v1/chunker"
+PARSER_ENDPOINT = "/v1/parser"
+AGENT_RUN_ENDPOINT = "/v1/agent/run"
diff --git a/langbase/errors.py b/langbase/errors.py
index 813cefe..458d8b4 100644
--- a/langbase/errors.py
+++ b/langbase/errors.py
@@ -4,7 +4,10 @@
 This module defines the exception hierarchy used throughout the SDK.
 All errors inherit from the base APIError class.
 """
-from typing import Dict, Optional, Any
+
+from typing import Any, Dict, Optional
+
+from .constants import ERROR_MAP, STATUS_CODE_TO_MESSAGE
 
 
 class APIError(Exception):
@@ -16,6 +19,7 @@ def __init__(
         error: Optional[Dict[str, Any]] = None,
         message: Optional[str] = None,
         headers: Optional[Dict[str, str]] = None,
+        endpoint: Optional[str] = None,
     ):
         """
         Initialize an API error.
@@ -25,27 +29,31 @@ def __init__(
             error: Error response body
             message: Error message
             headers: HTTP response headers
+            endpoint: API endpoint that was called
         """
         self.status = status
         self.headers = headers
-        self.request_id = headers.get('lb-request-id') if headers else None
+        self.endpoint = endpoint
+        self.request_id = headers.get("lb-request-id") if headers else None
 
         if isinstance(error, dict):
             self.error = error
-            self.code = error.get('code')
-            self.status = error.get('status', status)
+            self.code = error.get("code")
+            self.status = error.get("status", status)
         else:
             self.error = error
             self.code = None
 
-        msg = self._make_message(status, error, message)
+        msg = self._make_message(status, error, message, endpoint, self.request_id)
         super().__init__(msg)
 
     @staticmethod
     def _make_message(
         status: Optional[int],
         error: Any,
-        message: Optional[str]
+        message: Optional[str],
+        endpoint: Optional[str] = None,
+        request_id: Optional[str] = None,
     ) -> str:
         """
         Create a human-readable error message.
@@ -54,34 +62,65 @@ def _make_message(
             status: HTTP status code
             error: Error response body
             message: Error message
+            endpoint: API endpoint that was called
+            request_id: Request ID from headers
 
         Returns:
             Formatted error message string
         """
-        if isinstance(error, dict) and 'message' in error:
-            msg = error['message']
+        # Extract the main error message
+        if isinstance(error, dict) and "message" in error:
+            msg = error["message"]
             if not isinstance(msg, str):
                 msg = str(msg)
         elif error:
-            msg = str(error) if isinstance(error, str) else str(error)
+            msg = str(error)
         else:
             msg = message
 
-        if status and msg:
-            return f"{status} {msg}"
+        # Build comprehensive error message
+        parts = []
+
+        # Status line
         if status:
-            return f"{status} status code (no body)"
+            status_text = STATUS_CODE_TO_MESSAGE.get(status, "Unknown Error")
+            parts.append(f"{status_text} ({status})")
+
+        # Error message
         if msg:
-            return msg
-        return "(no status code or body)"
+            parts.append(f"\n  Message: {msg}")
+
+        # API endpoint
+        if endpoint:
+            parts.append(f"\n  Endpoint: {endpoint}")
+
+        # Request ID
+        if request_id:
+            parts.append(f"\n  Request ID: {request_id}")
+
+        # Error details from response
+        if isinstance(error, dict):
+            if "code" in error:
+                parts.append(f"\n  Error Code: {error['code']}")
+            if "details" in error:
+                parts.append(f"\n  Details: {error['details']}")
+
+        # Documentation link
+        if status:
+            parts.append(
+                f"\n  Documentation: https://langbase.com/docs/errors/{status}"
+            )
+
+        return "".join(parts) if parts else "(no error information available)"
 
     @staticmethod
     def generate(
         status: Optional[int],
         error_response: Any,
         message: Optional[str],
-        headers: Optional[Dict[str, str]]
-    ) -> 'APIError':
+        headers: Optional[Dict[str, str]],
+        endpoint: Optional[str] = None,
+    ) -> "APIError":
         """
         Generate the appropriate error based on status code.
 
@@ -90,6 +129,7 @@ def generate(
             error_response: Error response body
             message: Error message
             headers: HTTP response headers
+            endpoint: API endpoint that was called
 
         Returns:
             An instance of the appropriate APIError subclass
@@ -98,32 +138,28 @@ def generate(
             cause = error_response if isinstance(error_response, Exception) else None
             return APIConnectionError(cause=cause)
 
-        error = error_response.get('error') if isinstance(error_response, dict) else error_response
-
-        if status == 400:
-            return BadRequestError(status, error, message, headers)
-        elif status == 401:
-            return AuthenticationError(status, error, message, headers)
-        elif status == 403:
-            return PermissionDeniedError(status, error, message, headers)
-        elif status == 404:
-            return NotFoundError(status, error, message, headers)
-        elif status == 409:
-            return ConflictError(status, error, message, headers)
-        elif status == 422:
-            return UnprocessableEntityError(status, error, message, headers)
-        elif status == 429:
-            return RateLimitError(status, error, message, headers)
-        elif status >= 500:
-            return InternalServerError(status, error, message, headers)
-        else:
-            return APIError(status, error, message, headers)
+        error = (
+            error_response.get("error")
+            if isinstance(error_response, dict)
+            else error_response
+        )
+
+        if status in ERROR_MAP:
+            error_class_name = ERROR_MAP[status]
+            error_class = globals()[error_class_name]
+            return error_class(status, error, message, headers, endpoint)
+
+        if status >= 500:
+            return InternalServerError(status, error, message, headers, endpoint)
+        return APIError(status, error, message, headers, endpoint)
 
 
 class APIConnectionError(APIError):
     """Raised when there's a problem connecting to the API."""
 
-    def __init__(self, message: Optional[str] = None, cause: Optional[Exception] = None):
+    def __init__(
+        self, message: Optional[str] = None, cause: Optional[Exception] = None
+    ):
         """
         Initialize a connection error.
 
@@ -151,39 +187,47 @@ def __init__(self, message: Optional[str] = None):
 
 class BadRequestError(APIError):
     """Raised when the API returns a 400 status code."""
+
     pass
 
 
 class AuthenticationError(APIError):
     """Raised when the API returns a 401 status code."""
+
     pass
 
 
 class PermissionDeniedError(APIError):
     """Raised when the API returns a 403 status code."""
+
     pass
 
 
 class NotFoundError(APIError):
     """Raised when the API returns a 404 status code."""
+
     pass
 
 
 class ConflictError(APIError):
     """Raised when the API returns a 409 status code."""
+
     pass
 
 
 class UnprocessableEntityError(APIError):
     """Raised when the API returns a 422 status code."""
+
     pass
 
 
 class RateLimitError(APIError):
     """Raised when the API returns a 429 status code."""
+
     pass
 
 
 class InternalServerError(APIError):
     """Raised when the API returns a 5xx status code."""
+
     pass
diff --git a/langbase/helper.py b/langbase/helper.py
new file mode 100644
index 0000000..d536b36
--- /dev/null
+++ b/langbase/helper.py
@@ -0,0 +1,479 @@
+import json
+from typing import Any, Dict, Iterator, List, Literal, Optional, Union
+
+from .types import ToolCall
+
+# Type aliases to match TypeScript version
+MessageRole = Literal["function", "assistant", "system", "user", "tool"]
+
+# Interface aliases for consistency with TypeScript
+ToolCallResult = ToolCall
+
+
+class Delta(dict):
+    """Represents a delta object in a streaming chunk."""
+
+    @property
+    def role(self) -> Optional[MessageRole]:
+        """Get the role from the delta."""
+        return self.get("role")
+
+    @property
+    def content(self) -> Optional[str]:
+        """Get the content from the delta."""
+        return self.get("content")
+
+    @property
+    def tool_calls(self) -> Optional[List[ToolCall]]:
+        """Get the tool calls from the delta."""
+        return self.get("tool_calls")
+
+
+class ChoiceStream(dict):
+    """Represents a choice object in a streaming chunk."""
+
+    @property
+    def index(self) -> int:
+        """Get the choice index."""
+        return self.get("index", 0)
+
+    @property
+    def delta(self) -> Delta:
+        """Get the delta object."""
+        return Delta(self.get("delta", {}))
+
+    @property
+    def logprobs(self) -> Optional[bool]:
+        """Get the logprobs value."""
+        return self.get("logprobs")
+
+    @property
+    def finish_reason(self) -> Optional[str]:
+        """Get the finish reason."""
+        return self.get("finish_reason")
+
+
+class ChunkStream(dict):
+    """Represents a streaming chunk from the API."""
+
+    @property
+    def id(self) -> str:
+        """Get the chunk ID."""
+        return self.get("id", "")
+
+    @property
+    def object(self) -> str:
+        """Get the object type."""
+        return self.get("object", "")
+
+    @property
+    def created(self) -> int:
+        """Get the creation timestamp."""
+        return self.get("created", 0)
+
+    @property
+    def model(self) -> str:
+        """Get the model name."""
+        return self.get("model", "")
+
+    @property
+    def choices(self) -> List[ChoiceStream]:
+        """Get the list of choices."""
+        return [ChoiceStream(choice) for choice in self.get("choices", [])]
+
+
+def get_text_part(chunk: Union[ChunkStream, Dict[str, Any]]) -> str:
+    """
+    Retrieves the text part from a given ChunkStream.
+
+    Args:
+        chunk: The ChunkStream object or dictionary.
+
+    Returns:
+        The text content of the first choice's delta, or an empty string if it doesn't exist.
+    """
+    if isinstance(chunk, dict) and not isinstance(chunk, ChunkStream):
+        chunk = ChunkStream(chunk)
+
+    return chunk.choices[0].delta.content or "" if chunk.choices else ""
+
+
+def parse_chunk(chunk_data: Union[bytes, str]) -> Optional[ChunkStream]:
+    """
+    Parse a raw chunk from the stream into a ChunkStream object.
+
+    Args:
+        chunk_data: Raw chunk data from the stream (bytes or string)
+
+    Returns:
+        Parsed ChunkStream object or None if parsing fails
+    """
+    try:
+        # Handle both bytes and string input
+        if isinstance(chunk_data, bytes):
+            chunk_str = chunk_data.decode("utf-8")
+        else:
+            chunk_str = chunk_data
+
+        # Skip empty chunks
+        if not chunk_str.strip():
+            return None
+
+        # Handle SSE format - remove "data: " prefix if present
+        if chunk_str.startswith("data: "):
+            json_str = chunk_str[6:]  # Remove "data: " prefix
+        else:
+            json_str = chunk_str
+
+        # Skip if it's just whitespace after removing prefix
+        if not json_str.strip():
+            return None
+
+        # Try to parse as JSON
+        chunk_dict = json.loads(json_str)
+        return ChunkStream(chunk_dict)
+
+    except (json.JSONDecodeError, UnicodeDecodeError):
+        return None
+
+
+def stream_text(stream: Iterator[Union[bytes, str]]) -> Iterator[str]:
+    """
+    Generator that yields text content from a stream of chunks.
+
+    Supports various stream sources including response.iter_lines(),
+    SSE streams, and raw byte iterators.
+
+    Args:
+        stream: Iterator of raw chunk bytes (e.g., from response.iter_lines())
+
+    Yields:
+        Text content from each chunk
+
+    Example:
+        >>> for text in stream_text(response.iter_lines()):
+        ...     print(text, end="", flush=True)
+    """
+    for chunk_data in stream:
+        if chunk_data:
+            chunk = parse_chunk(chunk_data)
+            if chunk:
+                text = get_text_part(chunk)
+                if text:
+                    yield text
+
+
+def collect_stream_text(stream: Iterator[Union[bytes, str]]) -> str:
+    """
+    Collect all text content from a stream.
+
+    Args:
+        stream: Iterator of raw chunk bytes
+
+    Returns:
+        Complete text content from the stream
+    """
+    return "".join(stream_text(stream))
+
+
+def get_tools_from_stream(stream: Iterator[Union[bytes, str]]) -> List[ToolCall]:
+    """
+    Extract tool calls from a streaming response.
+
+    This function properly assembles tool calls from streaming chunks.
+    In streaming responses, tool calls come in parts:
+    1. First chunk: tool call metadata (id, type, function name)
+    2. Subsequent chunks: incremental function arguments that need to be concatenated
+
+    Args:
+        stream: Iterator of raw chunk data (bytes or strings)
+
+    Returns:
+        List of complete tool calls assembled from the stream
+    """
+    # Dictionary to accumulate tool calls by index
+    tool_calls_accumulator: Dict[int, ToolCall] = {}
+
+    for chunk_data in stream:
+        if chunk_data:
+            chunk = parse_chunk(chunk_data)
+            if chunk and chunk.choices:
+                delta_tool_calls = chunk.choices[0].delta.tool_calls
+                if delta_tool_calls:
+                    for delta_tool_call in delta_tool_calls:
+                        # Get the index of this tool call
+                        index = delta_tool_call.get("index", 0)
+
+                        # Initialize the tool call if it doesn't exist
+                        if index not in tool_calls_accumulator:
+                            tool_calls_accumulator[index] = {
+                                "id": "",
+                                "type": "function",
+                                "function": {"name": "", "arguments": ""},
+                            }
+
+                        # Update with new information from this chunk
+                        if "id" in delta_tool_call:
+                            tool_calls_accumulator[index]["id"] = delta_tool_call["id"]
+
+                        if "type" in delta_tool_call:
+                            tool_calls_accumulator[index]["type"] = delta_tool_call[
+                                "type"
+                            ]
+
+                        if "function" in delta_tool_call:
+                            function_data = delta_tool_call["function"]
+
+                            if "name" in function_data:
+                                tool_calls_accumulator[index]["function"][
+                                    "name"
+                                ] = function_data["name"]
+
+                            if "arguments" in function_data:
+                                # Accumulate arguments by concatenating them
+                                tool_calls_accumulator[index]["function"][
+                                    "arguments"
+                                ] += function_data["arguments"]
+
+    # Return the assembled tool calls as a list, sorted by index
+    return [tool_calls_accumulator[i] for i in sorted(tool_calls_accumulator.keys())]
+
+
+def get_tools_from_run_stream(stream: Iterator[Union[bytes, str]]) -> List[ToolCall]:
+    """
+    Retrieves tools from a readable stream asynchronously.
+
+    Args:
+        stream: The stream to extract tools from
+
+    Returns:
+        List of tool calls extracted from the stream
+    """
+    return get_tools_from_stream(stream)
+
+
+def get_tools_from_run(response: Dict[str, Any]) -> List[ToolCall]:
+    """
+    Extracts tool calls from non-stream response.
+
+    Args:
+        response: The run response object
+
+    Returns:
+        List of tool calls. Returns empty list if no tools are present.
+    """
+    try:
+        choices = response.get("choices", [])
+        if choices:
+            message = choices[0].get("message", {})
+            tool_calls = message.get("tool_calls")
+            return tool_calls or []
+    except (KeyError, IndexError, TypeError):
+        pass
+
+    return []
+
+
+def handle_response_stream(
+    response: Any,
+    raw_response: bool = False,
+) -> Dict[str, Any]:
+    """
+    Handles the response stream from a given response object.
+
+    Args:
+        response: The API response to handle.
+        raw_response: Optional flag to include raw response headers.
+
+    Returns:
+        Dictionary containing the processed stream, thread ID, and optionally raw response headers.
+    """
+    # Extract stream from response (assuming response has iter_lines method)
+    stream = (
+        response.iter_lines()
+        if hasattr(response, "iter_lines")
+        else response.get("stream")
+    )
+
+    # Try to get thread_id from response headers
+    thread_id = None
+    if hasattr(response, "headers"):
+        thread_id = response.headers.get("lb-thread-id")
+    elif isinstance(response, dict):
+        thread_id = response.get("thread_id")
+
+    result = {
+        "stream": stream,
+        "thread_id": thread_id,
+    }
+
+    if raw_response and hasattr(response, "headers"):
+        result["raw_response"] = {"headers": dict(response.headers)}
+
+    return result
+
+
+class StreamProcessor:
+    """
+    A utility class for processing streaming responses with various methods.
+    """
+
+    def __init__(self, stream: Iterator[Union[bytes, str]]):
+        """
+        Initialize the stream processor.
+
+        Args:
+            stream: The raw stream iterator (bytes or strings)
+        """
+        self.stream = stream
+
+    def text_generator(self) -> Iterator[str]:
+        """
+        Generator for text content from the stream.
+
+        Yields:
+            Text content from each chunk
+        """
+        yield from stream_text(self.stream)
+
+    def collect_text(self) -> str:
+        """
+        Collect all text from the stream.
+
+        Returns:
+            Complete text content
+        """
+        return collect_stream_text(self.stream)
+
+    def get_tool_calls(self) -> List[ToolCall]:
+        """
+        Extract tool calls from the stream.
+
+        Returns:
+            List of tool calls
+        """
+        return get_tools_from_stream(self.stream)
+
+    def process_chunks(self) -> Iterator[ChunkStream]:
+        """
+        Generator for parsed chunks from the stream.
+
+        Yields:
+            Parsed ChunkStream objects
+        """
+        for chunk_data in self.stream:
+            if chunk_data:
+                chunk = parse_chunk(chunk_data)
+                if chunk:
+                    yield chunk
+
+
+# Convenience function to create a stream processor
+def create_stream_processor(stream: Iterator[Union[bytes, str]]) -> StreamProcessor:
+    """
+    Create a StreamProcessor instance.
+
+    Args:
+        stream: The raw stream iterator (bytes or strings)
+
+    Returns:
+        StreamProcessor instance
+    """
+    return StreamProcessor(stream)
+
+
+def get_runner(
+    response_or_stream: Union[Any, Iterator[Union[bytes, str]]],
+) -> StreamProcessor:
+    """
+    Returns a runner (StreamProcessor) for the given response or stream.
+
+    This is the Python equivalent to TypeScript's getRunner function.
+    Provides a high-level interface for processing streaming responses.
+
+    Can accept either:
+    - A response dict (like from langbase.pipes.run()) with 'stream' key
+    - A response object with iter_lines() method
+    - A raw stream iterator
+
+    Args:
+        response_or_stream: Response dict, response object, or raw stream iterator
+
+    Returns:
+        StreamProcessor instance that can process the stream
+
+    """
+    # Handle dict response (Python langbase.pipes.run returns {'stream': ..., 'thread_id': ...})
+    if isinstance(response_or_stream, dict) and "stream" in response_or_stream:
+        stream = response_or_stream["stream"]
+    # Handle response object with iter_lines method (raw HTTP response)
+    elif hasattr(response_or_stream, "iter_lines"):
+        stream = response_or_stream.iter_lines()
+    # Handle already extracted stream iterator
+    elif hasattr(response_or_stream, "__iter__"):
+        stream = response_or_stream
+    else:
+        # Fallback: assume it's a stream
+        stream = response_or_stream
+
+    return StreamProcessor(stream)
+
+
+def get_typed_runner(
+    response_or_stream: Union[Any, Iterator[Union[bytes, str]]],
+) -> "TypedStreamProcessor":
+    """
+    Returns a typed stream processor for the given response or stream.
+
+    This provides an enhanced event-driven interface for processing streaming responses.
+
+    Args:
+        response_or_stream: Response dict, response object, or raw stream iterator
+
+    Returns:
+        TypedStreamProcessor instance with event-based handling
+    """
+    from .streaming import TypedStreamProcessor
+
+    # Extract stream and thread_id
+    thread_id = None
+
+    # Handle dict response
+    if isinstance(response_or_stream, dict) and "stream" in response_or_stream:
+        stream = response_or_stream["stream"]
+        thread_id = response_or_stream.get("thread_id")
+    # Handle response object with iter_lines method
+    elif hasattr(response_or_stream, "iter_lines"):
+        stream = response_or_stream.iter_lines()
+        if hasattr(response_or_stream, "headers"):
+            thread_id = response_or_stream.headers.get("lb-thread-id")
+    # Handle already extracted stream iterator
+    elif hasattr(response_or_stream, "__iter__"):
+        stream = response_or_stream
+    else:
+        # Fallback: assume it's a stream
+        stream = response_or_stream
+
+    return TypedStreamProcessor(stream, thread_id)
+
+
+# Export all main components for easy access
+__all__ = [
+    "ChoiceStream",
+    "ChunkStream",
+    "Delta",
+    "MessageRole",
+    "StreamProcessor",
+    "ToolCallResult",
+    "collect_stream_text",
+    "create_stream_processor",
+    "get_runner",
+    "get_text_part",
+    "get_tools_from_run",
+    "get_tools_from_run_stream",
+    "get_tools_from_stream",
+    "get_typed_runner",
+    "handle_response_stream",
+    "parse_chunk",
+    "stream_text",
+]
diff --git a/langbase/langbase.py b/langbase/langbase.py
new file mode 100644
index 0000000..3335ff8
--- /dev/null
+++ b/langbase/langbase.py
@@ -0,0 +1,62 @@
+"""
+Main client for the Langbase SDK.
+
+This module provides the Langbase class which is the main entry point
+for interacting with the Langbase API.
+"""
+
+from typing import Optional
+
+from .primitives.agent import Agent
+from .primitives.chunker import Chunker
+from .primitives.embed import Embed
+from .primitives.memories import Memories
+from .primitives.parser import Parser
+from .primitives.pipes import Pipes
+from .primitives.threads import Threads
+from .primitives.tools import Tools
+from .request import Request
+
+
+class Langbase:
+    """
+    Client for the Langbase API.
+
+    This class provides methods for interacting with all aspects of the Langbase API,
+    including pipes, memories, tools, threads, and utilities.
+    """
+
+    def __init__(self, api_key: str = "", base_url: str = "https://api.langbase.com"):
+        """
+        Initialize the Langbase client.
+
+        Args:
+            api_key: The API key for authentication.
+            base_url: The base URL for the API.
+        """
+        self.base_url = base_url
+        self.api_key = api_key
+
+        self.request = Request({"api_key": self.api_key, "base_url": self.base_url})
+
+        # Initialize primitive classes
+        self.agent = Agent(self)
+        self.chunker_client = Chunker(self)
+        self.embed_client = Embed(self)
+        self.memories = Memories(self)
+        self.parser_client = Parser(self)
+        self.pipes = Pipes(self)
+        self.threads = Threads(self)
+        self.tools = Tools(self)
+
+    def embed(self, chunks, embedding_model=None):
+        """Generate embeddings for text chunks."""
+        return self.embed_client.embed(chunks, embedding_model)
+
+    def chunker(self, content, chunk_max_length=None, chunk_overlap=None):
+        """Split content into chunks."""
+        return self.chunker_client.chunker(content, chunk_max_length, chunk_overlap)
+
+    def parser(self, document, document_name, content_type):
+        """Parse a document to extract its content."""
+        return self.parser_client.parser(document, document_name, content_type)
diff --git a/langbase/primitives/agent.py b/langbase/primitives/agent.py
new file mode 100644
index 0000000..6bcef4a
--- /dev/null
+++ b/langbase/primitives/agent.py
@@ -0,0 +1,153 @@
+"""
+Agent API client for the Langbase SDK.
+"""
+
+from typing import Any, Dict, List, Optional, Union, overload
+
+from langbase.constants import AGENT_RUN_ENDPOINT
+from langbase.request import Request
+from langbase.utils import clean_null_values
+
+
+class Agent:
+    def __init__(self, parent):
+        self.parent = parent
+        self.request: Request = parent.request
+
+    @overload
+    def run(
+        self,
+        input: Union[str, List[Dict[str, Any]]],
+        model: str,
+        api_key: str,
+        instructions: Optional[str] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        parallel_tool_calls: Optional[bool] = None,
+        reasoning_effort: Optional[str] = None,
+        max_completion_tokens: Optional[int] = None,
+        response_format: Optional[Dict[str, Any]] = None,
+        custom_model_params: Optional[Dict[str, Any]] = None,
+        mcp_servers: Optional[List[Dict[str, Any]]] = None,
+        *,
+        stream: bool = True,
+    ) -> Any:
+        """Stream overload - returns streaming response when stream=True"""
+        ...
+
+    @overload
+    def run(
+        self,
+        input: Union[str, List[Dict[str, Any]]],
+        model: str,
+        api_key: str,
+        instructions: Optional[str] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        parallel_tool_calls: Optional[bool] = None,
+        reasoning_effort: Optional[str] = None,
+        max_completion_tokens: Optional[int] = None,
+        response_format: Optional[Dict[str, Any]] = None,
+        custom_model_params: Optional[Dict[str, Any]] = None,
+        mcp_servers: Optional[List[Dict[str, Any]]] = None,
+        stream: bool = False,
+    ) -> Dict[str, Any]:
+        """Non-stream overload - returns dict response when stream=False"""
+        ...
+
+    def run(
+        self,
+        input: Union[str, List[Dict[str, Any]]],
+        model: str,
+        api_key: str,
+        instructions: Optional[str] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        stop: Optional[List[str]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        parallel_tool_calls: Optional[bool] = None,
+        reasoning_effort: Optional[str] = None,
+        max_completion_tokens: Optional[int] = None,
+        response_format: Optional[Dict[str, Any]] = None,
+        custom_model_params: Optional[Dict[str, Any]] = None,
+        mcp_servers: Optional[List[Dict[str, Any]]] = None,
+        stream: bool = False,
+    ) -> Union[Dict[str, Any], Any]:
+        """
+        Run an agent with the specified parameters.
+
+        Args:
+            input: Either a string prompt or a list of messages
+            model: The model to use for the agent
+            api_key: API key for the LLM service
+            instructions: Optional instructions for the agent
+            top_p: Optional top-p sampling parameter
+            max_tokens: Optional maximum tokens to generate
+            temperature: Optional temperature parameter
+            presence_penalty: Optional presence penalty parameter
+            frequency_penalty: Optional frequency penalty parameter
+            stop: Optional list of stop sequences
+            tools: Optional list of tools for the agent
+            tool_choice: Optional tool choice configuration ('auto', 'required', or tool spec)
+            parallel_tool_calls: Optional flag for parallel tool execution
+            reasoning_effort: Optional reasoning effort level
+            max_completion_tokens: Optional maximum completion tokens
+            response_format: Optional response format configuration
+            custom_model_params: Optional custom model parameters
+            mcp_servers: Optional list of MCP (Model Context Protocol) servers
+            stream: Whether to stream the response (default: False)
+
+        Returns:
+            Either a dictionary with the agent's response or a streaming response
+        """
+        options = {
+            "input": input,
+            "model": model,
+            "apiKey": api_key,
+            "instructions": instructions,
+            "top_p": top_p,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "presence_penalty": presence_penalty,
+            "frequency_penalty": frequency_penalty,
+            "stop": stop,
+            "tools": tools,
+            "tool_choice": tool_choice,
+            "parallel_tool_calls": parallel_tool_calls,
+            "reasoning_effort": reasoning_effort,
+            "max_completion_tokens": max_completion_tokens,
+            "response_format": response_format,
+            "customModelParams": custom_model_params,
+            "mcp_servers": mcp_servers,
+        }
+
+        # Only include stream if it's True (similar to TypeScript removing undefined)
+        if stream:
+            options["stream"] = True
+
+        # Clean null values from options
+        options = clean_null_values(options)
+
+        headers = {}
+        if api_key:
+            headers["LB-LLM-KEY"] = api_key
+
+        return self.request.post(
+            AGENT_RUN_ENDPOINT, options, headers=headers, stream=stream
+        )
diff --git a/langbase/primitives/chunker.py b/langbase/primitives/chunker.py
new file mode 100644
index 0000000..84df59e
--- /dev/null
+++ b/langbase/primitives/chunker.py
@@ -0,0 +1,57 @@
+"""
+Chunker API client for the Langbase SDK.
+"""
+
+from typing import Optional
+
+from langbase.constants import CHUNKER_ENDPOINT
+from langbase.request import Request
+from langbase.types import ChunkResponse
+
+
+class Chunker:
+    """
+    Client for text chunking operations.
+
+    This class provides methods for splitting text content into chunks.
+    """
+
+    def __init__(self, parent):
+        """
+        Initialize the Chunker client.
+
+        Args:
+            parent: The parent Langbase instance
+        """
+        self.parent = parent
+        self.request: Request = parent.request
+
+    def chunker(
+        self,
+        content: str,
+        chunk_max_length: Optional[int] = None,
+        chunk_overlap: Optional[int] = None,
+    ) -> ChunkResponse:
+        """
+        Split content into chunks.
+
+        Args:
+            content: The text content to be chunked
+            chunk_max_length: Maximum length for each chunk (1024-30000, default: 1024)
+            chunk_overlap: Number of characters to overlap between chunks (>=256, default: 256)
+
+        Returns:
+            List of text chunks
+
+        Raises:
+            APIError: If chunking fails
+        """
+        json_data = {"content": content}
+
+        if chunk_max_length is not None:
+            json_data["chunkMaxLength"] = chunk_max_length
+
+        if chunk_overlap is not None:
+            json_data["chunkOverlap"] = chunk_overlap
+
+        return self.request.post(CHUNKER_ENDPOINT, json_data)
diff --git a/langbase/primitives/embed.py b/langbase/primitives/embed.py
new file mode 100644
index 0000000..5d0e644
--- /dev/null
+++ b/langbase/primitives/embed.py
@@ -0,0 +1,48 @@
+"""
+Embed API client for the Langbase SDK.
+"""
+
+from typing import List, Optional
+
+from langbase.constants import EMBED_ENDPOINT
+from langbase.request import Request
+from langbase.types import EmbeddingModel, EmbedResponse
+
+
+class Embed:
+    """
+    Client for embedding operations.
+
+    This class provides methods for generating embeddings for text chunks.
+    """
+
+    def __init__(self, parent):
+        """
+        Initialize the Embed client.
+
+        Args:
+            parent: The parent Langbase instance
+        """
+        self.parent = parent
+        self.request: Request = parent.request
+
+    def embed(
+        self, chunks: List[str], embedding_model: Optional[EmbeddingModel] = None
+    ) -> EmbedResponse:
+        """
+        Generate embeddings for text chunks.
+
+        Args:
+            chunks: List of text chunks to embed
+            embedding_model: Model to use for embeddings
+
+        Returns:
+            List of embedding vectors
+        """
+
+        options = {"chunks": chunks}
+
+        if embedding_model:
+            options["embeddingModel"] = embedding_model
+
+        return self.request.post(EMBED_ENDPOINT, options)
diff --git a/langbase/primitives/memories.py b/langbase/primitives/memories.py
new file mode 100644
index 0000000..5a2d6cf
--- /dev/null
+++ b/langbase/primitives/memories.py
@@ -0,0 +1,250 @@
+"""
+Memories API client for the Langbase SDK.
+"""
+
+from io import BytesIO
+from pathlib import Path
+from typing import Any, BinaryIO, Dict, List, Optional, Union
+
+import requests
+
+from langbase.constants import (
+    MEMORY_DETAIL_ENDPOINT,
+    MEMORY_DOCUMENT_DETAIL_ENDPOINT,
+    MEMORY_DOCUMENT_EMBEDDINGS_RETRY_ENDPOINT,
+    MEMORY_DOCUMENTS_ENDPOINT,
+    MEMORY_DOCUMENTS_UPLOAD_ENDPOINT,
+    MEMORY_ENDPOINT,
+    MEMORY_RETRIEVE_ENDPOINT,
+)
+from langbase.errors import APIError
+from langbase.types import (
+    ContentType,
+    EmbeddingModel,
+    MemoryCreateResponse,
+    MemoryDeleteDocResponse,
+    MemoryDeleteResponse,
+    MemoryListDocResponse,
+    MemoryListResponse,
+    MemoryRetrieveResponse,
+)
+from langbase.utils import clean_null_values
+
+
+class Documents:
+    def __init__(self, parent):
+        self.parent = parent
+        self.request = parent.request
+        self.embeddings = self.Embeddings(parent)
+
+    def list(self, memory_name: str) -> List[MemoryListDocResponse]:
+        """
+        List all documents in a memory.
+
+        Args:
+            memory_name: Name of the memory
+
+        Returns:
+            List of document objects
+        """
+        return self.request.get(
+            MEMORY_DOCUMENTS_ENDPOINT.format(memory_name=memory_name)
+        )
+
+    def delete(self, memory_name: str, document_name: str) -> MemoryDeleteDocResponse:
+        """
+        Delete a document from memory.
+
+        Args:
+            memory_name: Name of the memory
+            document_name: Name of the document to delete
+
+        Returns:
+            Delete response
+        """
+        return self.request.delete(
+            MEMORY_DOCUMENT_DETAIL_ENDPOINT.format(
+                memory_name=memory_name, document_name=document_name
+            )
+        )
+
+    def upload(
+        self,
+        memory_name: str,
+        document_name: str,
+        document: Union[bytes, BytesIO, str, BinaryIO],
+        content_type: ContentType,
+        meta: Optional[Dict[str, str]] = None,
+    ) -> requests.Response:
+        """
+        Upload a document to memory.
+
+        Args:
+            memory_name: Name of the memory
+            document_name: Name for the document
+            document: Document content (bytes, file-like object, or path)
+            content_type: MIME type of the document
+            meta: Metadata for the document
+
+        Returns:
+            Upload response
+        """
+        try:
+            # Get signed URL for upload
+            response = self.request.post(
+                MEMORY_DOCUMENTS_UPLOAD_ENDPOINT,
+                {
+                    "memoryName": memory_name,
+                    "fileName": document_name,
+                    "meta": meta or {},
+                },
+            )
+
+            upload_url = response.get("signedUrl")
+
+            # Convert document to appropriate format
+            if isinstance(document, str) and Path(document).is_file():
+                with Path(document).open("rb") as f:
+                    file_content = f.read()
+            elif isinstance(document, bytes):
+                file_content = document
+            elif isinstance(document, BytesIO) or hasattr(document, "read"):
+                file_content = document.read()
+                # Reset file pointer if possible
+                if hasattr(document, "seek"):
+                    document.seek(0)
+            else:
+                msg = f"Unsupported document type: {type(document)}"
+                raise ValueError(msg)
+
+            # Upload to signed URL
+            upload_response = requests.put(
+                upload_url,
+                headers={
+                    "Authorization": f"Bearer {self.parent.parent.api_key}",
+                    "Content-Type": content_type,
+                },
+                data=file_content,
+            )
+
+            if not upload_response.ok:
+                raise APIError(
+                    upload_response.status_code,
+                    upload_response.text,
+                    "Upload failed",
+                    dict(upload_response.headers),
+                )
+
+            return upload_response
+
+        except Exception as e:
+            if isinstance(e, APIError):
+                raise e
+            raise APIError(None, str(e), "Error during document upload", None) from e
+
+    class Embeddings:
+        def __init__(self, parent):
+            self.parent = parent
+            self.request = parent.request
+
+        def retry(self, memory_name: str, document_name: str):
+            """
+            Retry embedding generation for a document.
+
+            Args:
+                memory_name: Name of the memory
+                document_name: Name of the document
+
+            Returns:
+                Retry response
+            """
+            return self.request.get(
+                MEMORY_DOCUMENT_EMBEDDINGS_RETRY_ENDPOINT.format(
+                    memory_name=memory_name, document_name=document_name
+                )
+            )
+
+
+class Memories:
+    def __init__(self, parent):
+        self.parent = parent
+        self.request = parent.request
+        self.documents = Documents(self)
+
+    def create(
+        self,
+        name: str,
+        description: Optional[str] = None,
+        embedding_model: Optional[EmbeddingModel] = None,
+        top_k: Optional[int] = None,
+        chunk_size: Optional[int] = None,
+        chunk_overlap: Optional[int] = None,
+    ) -> MemoryCreateResponse:
+        """
+        Create a new memory.
+
+        Args:
+            name: Name for the memory
+            description: Description of the memory
+            embedding_model: Model to use for embeddings
+            top_k: Number of results to return
+            chunk_size: Size of chunks for document processing
+            chunk_overlap: Overlap between chunks
+
+        Returns:
+            Created memory object
+        """
+        options = {
+            "name": name,
+            "description": description,
+            "embedding_model": embedding_model,
+            "top_k": top_k,
+            "chunk_size": chunk_size,
+            "chunk_overlap": chunk_overlap,
+        }
+        return self.request.post(MEMORY_ENDPOINT, clean_null_values(options))
+
+    def delete(self, name: str) -> MemoryDeleteResponse:
+        """
+        Delete a memory.
+
+        Args:
+            name: Name of the memory to delete
+
+        Returns:
+            Delete response
+        """
+        return self.request.delete(MEMORY_DETAIL_ENDPOINT.format(name=name))
+
+    def retrieve(
+        self,
+        query: str,
+        memory: List[Dict[str, Any]],
+        top_k: Optional[int] = None,
+    ) -> List[MemoryRetrieveResponse]:
+        """
+        Retrieve content from memory based on query.
+
+        Args:
+            query: Search query
+            memory: List of memory configurations
+            top_k: Number of results to return
+
+        Returns:
+            List of matching content
+        """
+        options = {"query": query, "memory": memory}
+
+        if top_k is not None:
+            options["topK"] = top_k
+
+        return self.request.post(MEMORY_RETRIEVE_ENDPOINT, options)
+
+    def list(self) -> List[MemoryListResponse]:
+        """
+        List all memories.
+
+        Returns:
+            List of memory objects
+        """
+        return self.request.get(MEMORY_ENDPOINT)
diff --git a/langbase/primitives/parser.py b/langbase/primitives/parser.py
new file mode 100644
index 0000000..b660d68
--- /dev/null
+++ b/langbase/primitives/parser.py
@@ -0,0 +1,66 @@
+"""
+Parser API client for the Langbase SDK.
+"""
+
+from io import BytesIO
+from typing import BinaryIO, Union
+
+import requests
+
+from langbase.constants import PARSER_ENDPOINT
+from langbase.request import Request
+from langbase.types import ContentType, ParseResponse
+from langbase.utils import convert_document_to_request_files
+
+
+class Parser:
+    """
+    Client for document parsing operations.
+
+    This class provides methods for parsing documents to extract their content.
+    """
+
+    def __init__(self, parent):
+        """
+        Initialize the Parser client.
+
+        Args:
+            parent: The parent Langbase instance
+        """
+        self.parent = parent
+        self.request: Request = parent.request
+
+    def parser(
+        self,
+        document: Union[bytes, BytesIO, str, BinaryIO],
+        document_name: str,
+        content_type: ContentType,
+    ) -> ParseResponse:
+        """
+        Parse a document to extract its content.
+
+        Args:
+            document: Document content (bytes, file-like object, or path)
+            document_name: Name for the document
+            content_type: MIME type of the document
+
+        Returns:
+            Dictionary with document name and extracted content
+        """
+        document_content = convert_document_to_request_files(
+            document, document_name, content_type
+        )
+
+        response = self.request.post(
+            PARSER_ENDPOINT,
+            headers={"Authorization": f"Bearer {self.parent.api_key}"},
+            document=document_content,
+        )
+
+        # Transform API response: rename documentName to document_name
+        if isinstance(response, dict) and "documentName" in response:
+            response["document_name"] = response.pop("documentName")
+
+        print("response", response)
+
+        return response
diff --git a/langbase/primitives/pipes.py b/langbase/primitives/pipes.py
new file mode 100644
index 0000000..bfae8f6
--- /dev/null
+++ b/langbase/primitives/pipes.py
@@ -0,0 +1,173 @@
+"""
+Pipes API client for the Langbase SDK.
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+from langbase.constants import PIPE_DETAIL_ENDPOINT, PIPE_RUN_ENDPOINT, PIPES_ENDPOINT
+from langbase.request import Request
+from langbase.types import (
+    PipeCreateResponse,
+    PipeListResponse,
+    PipeUpdateResponse,
+    RunResponse,
+    RunResponseStream,
+)
+from langbase.utils import clean_null_values
+
+
+class Pipes:
+    def __init__(self, parent):
+        self.parent = parent
+        self.request: Request = parent.request
+
+    def list(self) -> List[PipeListResponse]:
+        """
+        List all pipes.
+
+        Returns:
+            List of pipe objects
+        """
+        return self.request.get(PIPES_ENDPOINT)
+
+    def create(
+        self, name: str, description: Optional[str] = None, **kwargs
+    ) -> PipeCreateResponse:
+        """
+        Create a new pipe.
+
+        Args:
+            name: Name of the pipe
+            description: Description of the pipe
+            **kwargs: Additional parameters for the pipe
+
+        Returns:
+            Created pipe object
+        """
+        options = {"name": name, "description": description, **kwargs}
+        return self.request.post(PIPES_ENDPOINT, clean_null_values(options))
+
+    def update(self, name: str, **kwargs) -> PipeUpdateResponse:
+        """
+        Update an existing pipe.
+
+        Args:
+            name: Name of the pipe to update
+            **kwargs: Parameters to update
+
+        Returns:
+            Updated pipe object
+        """
+        options = {"name": name, **kwargs}
+        return self.request.post(
+            PIPE_DETAIL_ENDPOINT.format(name=name), clean_null_values(options)
+        )
+
+    def run(
+        self,
+        name: Optional[str] = None,
+        api_key: Optional[str] = None,
+        messages: Optional[List[Dict[str, Any]]] = None,
+        variables: Optional[List[Dict[str, str]]] = None,
+        thread_id: Optional[str] = None,
+        raw_response: Optional[bool] = None,
+        run_tools: Optional[bool] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        parallel_tool_calls: Optional[bool] = None,
+        llm_key: Optional[str] = None,
+        json: Optional[bool] = None,
+        memory: Optional[List[Dict[str, str]]] = None,
+        response_format: Optional[Dict[str, Any]] = None,
+        top_p: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        stop: Optional[List[str]] = None,
+        store: Optional[bool] = None,
+        moderate: Optional[bool] = None,
+        stream: Optional[bool] = None,
+        **kwargs,
+    ) -> Union[RunResponse, RunResponseStream]:
+        """
+        Run a pipe.
+
+        Args:
+            name: Name of the pipe to run
+            api_key: API key for the pipe
+            messages: List of messages for the conversation
+            variables: List of variables for template substitution
+            thread_id: Thread ID for conversation continuity
+            raw_response: Whether to include raw response headers
+            run_tools: Whether to enable tool execution
+            tools: List of tools available to the pipe
+            tool_choice: Tool choice strategy ('auto', 'required', or tool spec)
+            parallel_tool_calls: Whether to enable parallel tool calls
+            llm_key: LLM API key for the request
+            json: Whether to enable JSON mode
+            memory: List of runtime memory configurations
+            response_format: Response format configuration
+            top_p: Top-p sampling parameter
+            max_tokens: Maximum tokens to generate
+            temperature: Temperature for randomness
+            presence_penalty: Presence penalty parameter
+            frequency_penalty: Frequency penalty parameter
+            stop: List of stop sequences
+            store: Whether to store the conversation
+            moderate: Whether to enable content moderation
+            stream: Whether to stream the response
+            **kwargs: Additional parameters for the run
+
+        Returns:
+            Run response or stream
+        """
+        if not name and not api_key:
+            msg = "Either pipe name or API key is required"
+            raise ValueError(msg)
+
+        options = {
+            "name": name,
+            "api_key": api_key,
+            "messages": messages or [],
+            "variables": variables,
+            "thread_id": thread_id,
+            "raw_response": raw_response,
+            "run_tools": run_tools,
+            "tools": tools,
+            "tool_choice": tool_choice,
+            "parallel_tool_calls": parallel_tool_calls,
+            "json": json,
+            "memory": memory,
+            "response_format": response_format,
+            "top_p": top_p,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "presence_penalty": presence_penalty,
+            "frequency_penalty": frequency_penalty,
+            "stop": stop,
+            "store": store,
+            "moderate": moderate,
+            **kwargs,
+        }
+
+        # Only set stream in options if it's explicitly provided
+        if stream is not None:
+            options["stream"] = stream
+
+        # Create a new request instance if API key is provided
+        request = self.request
+        if api_key:
+            request = Request({"api_key": api_key, "base_url": self.parent.base_url})
+
+        headers = {}
+        if llm_key:
+            headers["LB-LLM-KEY"] = llm_key
+
+        # Pass the stream parameter to post method (which might be None)
+        return request.post(
+            PIPE_RUN_ENDPOINT,
+            clean_null_values(options),
+            headers,
+            stream=stream if stream is not None else False,
+        )
diff --git a/langbase/primitives/threads.py b/langbase/primitives/threads.py
new file mode 100644
index 0000000..122eb12
--- /dev/null
+++ b/langbase/primitives/threads.py
@@ -0,0 +1,126 @@
+"""
+Threads API client for the Langbase SDK.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from langbase.constants import (
+    THREAD_DETAIL_ENDPOINT,
+    THREAD_MESSAGES_ENDPOINT,
+    THREADS_ENDPOINT,
+)
+from langbase.request import Request
+from langbase.types import ThreadMessagesBaseResponse, ThreadsBaseResponse
+from langbase.utils import clean_null_values
+
+
+class Messages:
+    def __init__(self, parent):
+        self.parent = parent
+        self.request: Request = parent.request
+
+    def list(self, thread_id: str) -> List[ThreadMessagesBaseResponse]:
+        """
+        List all messages in a thread.
+
+        Args:
+            thread_id: ID of the thread
+
+        Returns:
+            List of messages
+        """
+        return self.request.get(THREAD_MESSAGES_ENDPOINT.format(thread_id=thread_id))
+
+
+class Threads:
+    def __init__(self, parent):
+        self.parent = parent
+        self.request: Request = parent.request
+        self.messages = Messages(self)
+
+    def create(
+        self,
+        thread_id: Optional[str] = None,
+        metadata: Optional[Dict[str, str]] = None,
+        messages: Optional[List[Dict[str, Any]]] = None,
+    ) -> ThreadsBaseResponse:
+        """
+        Create a new thread.
+
+        Args:
+            thread_id: Optional specific ID for the thread
+            metadata: Metadata for the thread
+            messages: Initial messages for the thread
+
+        Returns:
+            Created thread object
+        """
+        options = {}
+
+        if thread_id:
+            options["threadId"] = thread_id
+
+        if metadata:
+            options["metadata"] = metadata
+
+        if messages:
+            options["messages"] = messages
+
+        return self.request.post(THREADS_ENDPOINT, clean_null_values(options))
+
+    def update(self, thread_id: str, metadata: Dict[str, str]) -> ThreadsBaseResponse:
+        """
+        Update thread metadata.
+
+        Args:
+            thread_id: ID of the thread to update
+            metadata: New metadata
+
+        Returns:
+            Updated thread object
+        """
+        options = {"metadata": metadata}
+        return self.request.post(
+            THREAD_DETAIL_ENDPOINT.format(thread_id=thread_id), options
+        )
+
+    def get(self, thread_id: str) -> ThreadsBaseResponse:
+        """
+        Get thread details.
+
+        Args:
+            thread_id: ID of the thread
+
+        Returns:
+            Thread object
+        """
+        return self.request.get(THREAD_DETAIL_ENDPOINT.format(thread_id=thread_id))
+
+    def delete(self, thread_id: str) -> Dict[str, bool]:
+        """
+        Delete a thread.
+
+        Args:
+            thread_id: ID of the thread to delete
+
+        Returns:
+            Delete response
+        """
+        return self.request.delete(THREAD_DETAIL_ENDPOINT.format(thread_id=thread_id))
+
+    def append(
+        self, thread_id: str, messages: List[Dict[str, Any]]
+    ) -> List[ThreadMessagesBaseResponse]:
+        """
+        Append messages to a thread.
+
+        Args:
+            thread_id: ID of the thread
+            messages: Messages to append
+
+        Returns:
+            List of added messages
+        """
+        return self.request.post(
+            THREAD_MESSAGES_ENDPOINT.format(thread_id=thread_id), messages
+        )
diff --git a/langbase/primitives/tools.py b/langbase/primitives/tools.py
new file mode 100644
index 0000000..2f41eee
--- /dev/null
+++ b/langbase/primitives/tools.py
@@ -0,0 +1,77 @@
+"""
+Tools API client for the Langbase SDK.
+"""
+
+from typing import List, Optional
+
+from langbase.constants import TOOLS_CRAWL_ENDPOINT, TOOLS_WEB_SEARCH_ENDPOINT
+from langbase.request import Request
+
+
+class Tools:
+    def __init__(self, parent):
+        self.parent = parent
+        self.request: Request = parent.request
+
+    def crawl(
+        self,
+        url: List[str],
+        max_pages: Optional[int] = None,
+        api_key: Optional[str] = None,
+    ):
+        """
+        Crawl web pages.
+
+        Args:
+            url: List of URLs to crawl
+            max_pages: Maximum number of pages to crawl
+            api_key: API key for crawling service
+
+        Returns:
+            List of crawled content
+        """
+        options = {"url": url}
+
+        if max_pages is not None:
+            options["maxPages"] = max_pages
+
+        headers = {}
+        if api_key:
+            headers["LB-CRAWL-KEY"] = api_key
+
+        return self.request.post(TOOLS_CRAWL_ENDPOINT, options, headers)
+
+    def web_search(
+        self,
+        query: str,
+        service: str = "exa",
+        total_results: Optional[int] = None,
+        domains: Optional[List[str]] = None,
+        api_key: Optional[str] = None,
+    ):
+        """
+        Search the web.
+
+        Args:
+            query: Search query
+            service: Search service to use
+            total_results: Number of results to return
+            domains: List of domains to restrict search to
+            api_key: API key for search service
+
+        Returns:
+            List of search results
+        """
+        options = {"query": query, "service": service}
+
+        if total_results is not None:
+            options["totalResults"] = total_results
+
+        if domains is not None:
+            options["domains"] = domains
+
+        headers = {}
+        if api_key:
+            headers["LB-WEB-SEARCH-KEY"] = api_key
+
+        return self.request.post(TOOLS_WEB_SEARCH_ENDPOINT, options, headers)
diff --git a/langbase/request.py b/langbase/request.py
index 574d782..29cd8b0 100644
--- a/langbase/request.py
+++ b/langbase/request.py
@@ -4,12 +4,13 @@
 This module provides the Request class which handles all HTTP communication
 with the Langbase API, including error handling and response parsing.
 """
+
 import json
-from typing import Dict, Optional, Any, Union, Iterator, List
+from typing import Any, Dict, Iterator, Optional, Union
 
 import requests
 
-from .errors import APIError, APIConnectionError, APIConnectionTimeoutError
+from .errors import APIConnectionError, APIConnectionTimeoutError, APIError
 from .types import GENERATION_ENDPOINTS
 
 
@@ -29,12 +30,10 @@ def __init__(self, config: Dict[str, Any]):
             config: Configuration dictionary containing:
                 - api_key: API key for authentication
                 - base_url: Base URL for the API
-                - timeout: Timeout for requests in seconds (default: 30)
         """
         self.config = config
         self.api_key = config.get("api_key", "")
         self.base_url = config.get("base_url", "")
-        self.timeout = config.get("timeout", 30)
 
     def build_url(self, endpoint: str) -> str:
         """
@@ -47,8 +46,8 @@ def build_url(self, endpoint: str) -> str:
             Complete URL for the request
         """
         # Ensure the endpoint starts with a slash
-        if not endpoint.startswith('/'):
-            endpoint = f'/{endpoint}'
+        if not endpoint.startswith("/"):
+            endpoint = f"/{endpoint}"
 
         return f"{self.base_url}{endpoint}"
 
@@ -64,7 +63,7 @@ def build_headers(self, headers: Optional[Dict[str, str]] = None) -> Dict[str, s
         """
         default_headers = {
             "Content-Type": "application/json",
-            "Authorization": f"Bearer {self.api_key}"
+            "Authorization": f"Bearer {self.api_key}",
         }
 
         if headers:
@@ -79,7 +78,7 @@ def make_request(
         headers: Dict[str, str],
         body: Optional[Dict[str, Any]] = None,
         stream: bool = False,
-        files: Optional[Dict[str, Any]] = None
+        files: Optional[Dict[str, Any]] = None,
     ) -> requests.Response:
         """
         Make an HTTP request to the API.
@@ -102,13 +101,16 @@ def make_request(
         try:
             # If files are provided, don't send JSON body
             if files:
+                # Remove Content-Type header for file uploads (requests will set it automatically)
+                filtered_headers = {
+                    k: v for k, v in headers.items() if k != "Content-Type"
+                }
                 response = requests.request(
                     method=method,
                     url=url,
-                    headers={k: v for k, v in headers.items() if k != 'Content-Type'},
+                    headers=filtered_headers,
                     files=files,
-                    timeout=self.timeout,
-                    stream=stream
+                    stream=stream,
                 )
             else:
                 response = requests.request(
@@ -116,14 +118,13 @@ def make_request(
                     url=url,
                     headers=headers,
                     json=body if body else None,
-                    timeout=self.timeout,
-                    stream=stream
+                    stream=stream,
                 )
             return response
         except requests.Timeout as e:
-            raise APIConnectionTimeoutError(str(e))
+            raise APIConnectionTimeoutError(str(e)) from e
         except requests.RequestException as e:
-            raise APIConnectionError(cause=e)
+            raise APIConnectionError(cause=e) from e
 
     def handle_error_response(self, response: requests.Response) -> None:
         """
@@ -141,13 +142,12 @@ def handle_error_response(self, response: requests.Response) -> None:
             error_body = response.text
 
         raise APIError.generate(
-            response.status_code,
-            error_body,
-            response.reason,
-            dict(response.headers)
+            response.status_code, error_body, response.reason, dict(response.headers)
         )
 
-    def handle_stream_response(self, response: requests.Response) -> Dict[str, Union[Iterator[bytes], Optional[str]]]:
+    def handle_stream_response(
+        self, response: requests.Response
+    ) -> Dict[str, Union[Iterator[bytes], Optional[str]]]:
         """
         Handle streaming responses.
 
@@ -159,13 +159,11 @@ def handle_stream_response(self, response: requests.Response) -> Dict[str, Union
         """
         return {
             "stream": response.iter_lines(),
-            "thread_id": response.headers.get("lb-thread-id")
+            "thread_id": response.headers.get("lb-thread-id"),
         }
 
     def handle_run_response_stream(
-        self,
-        response: requests.Response,
-        raw_response: bool = False
+        self, response: requests.Response, raw_response: bool = False
     ) -> Dict[str, Any]:
         """
         Handle streaming responses for run endpoints.
@@ -179,17 +177,17 @@ def handle_run_response_stream(
         """
         result = {
             "stream": response.iter_lines(),
-            "thread_id": response.headers.get("lb-thread-id")
+            "thread_id": response.headers.get("lb-thread-id"),
         }
 
         if raw_response:
-            result["raw_response"] = {
-                "headers": dict(response.headers)
-            }
+            result["rawResponse"] = {"headers": dict(response.headers)}
 
         return result
 
-    def handle_run_response(self, response, thread_id, raw_response=False):
+    def handle_run_response(
+        self, response, thread_id, raw_response=False, endpoint=None
+    ):
         """
         Handle regular responses for run endpoints.
 
@@ -197,16 +195,22 @@ def handle_run_response(self, response, thread_id, raw_response=False):
             response: Response object
             thread_id: Thread ID from response headers
             raw_response: Whether to include raw response headers
+            endpoint: The API endpoint being called
 
         Returns:
             Processed response dictionary
         """
         generate_response = response.json()
+        is_agent_run = endpoint == "/v1/agent/run" if endpoint else False
 
         build_response = (
             {
-                "completion": generate_response.get("completion"),
-                **generate_response.get("raw", {})
+                "output"
+                if is_agent_run
+                else "completion": generate_response.get(
+                    "output" if is_agent_run else "completion"
+                ),
+                **generate_response.get("raw", {}),
             }
             if generate_response.get("raw")
             else generate_response
@@ -218,9 +222,7 @@ def handle_run_response(self, response, thread_id, raw_response=False):
             result["threadId"] = thread_id
 
         if raw_response:
-            result["rawResponse"] = {
-                "headers": dict(response.headers)
-            }
+            result["rawResponse"] = {"headers": dict(response.headers)}
 
         return result
 
@@ -234,7 +236,9 @@ def is_generation_endpoint(self, endpoint: str) -> bool:
         Returns:
             True if the endpoint is a generation endpoint, False otherwise
         """
-        return any(endpoint.startswith(gen_endpoint) for gen_endpoint in GENERATION_ENDPOINTS)
+        return any(
+            endpoint.startswith(gen_endpoint) for gen_endpoint in GENERATION_ENDPOINTS
+        )
 
     def send(
         self,
@@ -243,7 +247,7 @@ def send(
         headers: Optional[Dict[str, str]] = None,
         body: Optional[Dict[str, Any]] = None,
         stream: bool = False,
-        files: Optional[Dict[str, Any]] = None
+        files: Optional[Dict[str, Any]] = None,
     ) -> Any:
         """
         Send a request to the API and handle the response.
@@ -275,33 +279,36 @@ def send(
             thread_id = response.headers.get("lb-thread-id")
 
             if not body:
+                raw_response = body.get("raw_response", False) if body else False
                 return self.handle_run_response(
                     response,
                     thread_id=None,
-                    raw_response=body.get("raw_response", False) if body else False
+                    raw_response=raw_response,
+                    endpoint=endpoint,
                 )
 
             if body.get("stream") and "run" in url:
+                raw_response = body.get("raw_response", False)
                 return self.handle_run_response_stream(
-                    response,
-                    raw_response=body.get("raw_response", False)
+                    response, raw_response=raw_response
                 )
 
             if body.get("stream"):
                 return self.handle_stream_response(response)
 
+            raw_response = body.get("raw_response", False)
             return self.handle_run_response(
                 response,
                 thread_id=thread_id,
-                raw_response=body.get("raw_response", False)
+                raw_response=raw_response,
+                endpoint=endpoint,
             )
-        else:
-            # For non-generation endpoints, just return the JSON response
-            try:
-                return response.json()
-            except json.JSONDecodeError:
-                # If the response is not JSON, return the text
-                return {"text": response.text}
+        # For non-generation endpoints, just return the JSON response
+        try:
+            return response.json()
+        except json.JSONDecodeError:
+            # If the response is not JSON, return the text
+            return {"text": response.text}
 
     def post(
         self,
@@ -309,7 +316,7 @@ def post(
         body: Optional[Dict[str, Any]] = None,
         headers: Optional[Dict[str, str]] = None,
         stream: bool = False,
-        files: Optional[Dict[str, Any]] = None
+        document: Optional[Dict[str, Any]] = None,
     ) -> Any:
         """
         Send a POST request to the API.
@@ -324,13 +331,13 @@ def post(
         Returns:
             Processed API response
         """
-        return self.send(endpoint, "POST", headers, body, stream, files)
+        return self.send(endpoint, "POST", headers, body, stream, document)
 
     def get(
         self,
         endpoint: str,
         headers: Optional[Dict[str, str]] = None,
-        params: Optional[Dict[str, Any]] = None
+        params: Optional[Dict[str, Any]] = None,
     ) -> Any:
         """
         Send a GET request to the API.
@@ -358,7 +365,7 @@ def put(
         endpoint: str,
         body: Optional[Dict[str, Any]] = None,
         headers: Optional[Dict[str, str]] = None,
-        files: Optional[Dict[str, Any]] = None
+        files: Optional[Dict[str, Any]] = None,
     ) -> Any:
         """
         Send a PUT request to the API.
@@ -374,11 +381,7 @@ def put(
         """
         return self.send(endpoint, "PUT", headers, body, files=files)
 
-    def delete(
-        self,
-        endpoint: str,
-        headers: Optional[Dict[str, str]] = None
-    ) -> Any:
+    def delete(self, endpoint: str, headers: Optional[Dict[str, str]] = None) -> Any:
         """
         Send a DELETE request to the API.
 
@@ -390,22 +393,3 @@ def delete(
             Processed API response
         """
         return self.send(endpoint, "DELETE", headers)
-
-    def patch(
-        self,
-        endpoint: str,
-        body: Optional[Dict[str, Any]] = None,
-        headers: Optional[Dict[str, str]] = None
-    ) -> Any:
-        """
-        Send a PATCH request to the API.
-
-        Args:
-            endpoint: API endpoint path
-            body: Request body
-            headers: Additional headers
-
-        Returns:
-            Processed API response
-        """
-        return self.send(endpoint, "PATCH", headers, body)
diff --git a/langbase/streaming.py b/langbase/streaming.py
new file mode 100644
index 0000000..f102026
--- /dev/null
+++ b/langbase/streaming.py
@@ -0,0 +1,347 @@
+"""
+Streaming utilities for the Langbase SDK.
+
+This module provides typed event-based streaming interfaces for better developer experience.
+"""
+
+import time
+from enum import Enum
+from typing import Any, Callable, Dict, Iterator, List, Optional, Union
+
+from typing_extensions import Literal, TypedDict, TypeVar
+
+from .helper import ChunkStream, parse_chunk
+from .types import ToolCall
+
+
+class StreamEventType(str, Enum):
+    """Enum for all possible stream event types."""
+
+    CONNECT = "connect"
+    CONTENT = "content"
+    TOOL_CALL = "tool_call"
+    COMPLETION = "completion"
+    ERROR = "error"
+    END = "end"
+    METADATA = "metadata"
+
+
+class StreamEvent(TypedDict):
+    """Base stream event."""
+
+    type: StreamEventType
+    timestamp: float
+
+
+class ConnectEvent(TypedDict):
+    """Event fired when stream connection is established."""
+
+    type: Literal[StreamEventType.CONNECT]
+    timestamp: float
+    threadId: Optional[str]
+
+
+class ContentEvent(TypedDict):
+    """Event fired when text content is received."""
+
+    type: Literal[StreamEventType.CONTENT]
+    timestamp: float
+    content: str
+    chunk: ChunkStream
+
+
+class ToolCallEvent(TypedDict):
+    """Event fired when a tool call is received."""
+
+    type: Literal[StreamEventType.TOOL_CALL]
+    timestamp: float
+    toolCall: ToolCall
+    index: int
+
+
+class CompletionEvent(TypedDict):
+    """Event fired when the completion is done."""
+
+    type: Literal[StreamEventType.COMPLETION]
+    timestamp: float
+    reason: str
+    usage: Optional[Dict[str, int]]
+
+
+class ErrorEvent(TypedDict):
+    """Event fired when an error occurs."""
+
+    type: Literal[StreamEventType.ERROR]
+    timestamp: float
+    error: Exception
+    message: str
+
+
+class EndEvent(TypedDict):
+    """Event fired when the stream ends."""
+
+    type: Literal[StreamEventType.END]
+    timestamp: float
+    duration: float
+
+
+class MetadataEvent(TypedDict):
+    """Event fired when metadata is received."""
+
+    type: Literal[StreamEventType.METADATA]
+    timestamp: float
+    metadata: Dict[str, Any]
+
+
+# Union type for all events
+Event = Union[
+    ConnectEvent,
+    ContentEvent,
+    ToolCallEvent,
+    CompletionEvent,
+    ErrorEvent,
+    EndEvent,
+    MetadataEvent,
+]
+
+# Type for event handlers
+T = TypeVar("T", bound=Event)
+EventHandler = Callable[[T], None]
+
+
+class TypedStreamProcessor:
+    """
+    Enhanced stream processor with typed events for better developer experience.
+
+    This provides an event-driven interface similar to TypeScript/JavaScript patterns,
+    making it easier to handle different aspects of streaming responses.
+    """
+
+    def __init__(
+        self, stream: Iterator[Union[bytes, str]], thread_id: Optional[str] = None
+    ):
+        """
+        Initialize the typed stream processor.
+
+        Args:
+            stream: The raw stream iterator
+            thread_id: Optional thread ID from the response
+        """
+        self.stream = stream
+        self.thread_id = thread_id
+        self._handlers: Dict[StreamEventType, List[EventHandler]] = {}
+        self._start_time = None
+        self._tool_calls_accumulator: Dict[int, ToolCall] = {}
+
+    def on(
+        self, event: StreamEventType, handler: EventHandler
+    ) -> "TypedStreamProcessor":
+        """
+        Register an event handler.
+
+        Args:
+            event: The event type to listen for
+            handler: The handler function to call when the event occurs
+
+        Returns:
+            Self for method chaining
+        """
+        if event not in self._handlers:
+            self._handlers[event] = []
+        self._handlers[event].append(handler)
+        return self
+
+    def off(
+        self, event: StreamEventType, handler: EventHandler
+    ) -> "TypedStreamProcessor":
+        """
+        Remove an event handler.
+
+        Args:
+            event: The event type
+            handler: The handler function to remove
+
+        Returns:
+            Self for method chaining
+        """
+        if event in self._handlers and handler in self._handlers[event]:
+            self._handlers[event].remove(handler)
+        return self
+
+    def _emit(self, event: Event) -> None:
+        """Emit an event to all registered handlers."""
+        event_type = event["type"]
+        if event_type in self._handlers:
+            for handler in self._handlers[event_type]:
+                try:
+                    handler(event)
+                except Exception as e:
+                    # If error handler exists, use it, otherwise re-raise
+                    if StreamEventType.ERROR in self._handlers:
+                        self._emit(
+                            ErrorEvent(
+                                type=StreamEventType.ERROR,
+                                timestamp=self._get_timestamp(),
+                                error=e,
+                                message=f"Error in {event_type} handler: {e!s}",
+                            )
+                        )
+                    else:
+                        raise
+
+    def _get_timestamp(self) -> float:
+        """Get current timestamp in seconds."""
+        return time.time()
+
+    def process(self) -> None:
+        """
+        Process the stream and emit events.
+
+        This method consumes the stream and emits appropriate events.
+        Call this after registering all event handlers.
+        """
+        self._start_time = self._get_timestamp()
+
+        # Emit connect event
+        self._emit(
+            ConnectEvent(
+                type=StreamEventType.CONNECT,
+                timestamp=self._start_time,
+                threadId=self.thread_id,
+            )
+        )
+
+        try:
+            for chunk_data in self.stream:
+                if chunk_data:
+                    chunk = parse_chunk(chunk_data)
+                    if chunk and chunk.choices:
+                        choice = chunk.choices[0]
+
+                        # Handle content
+                        if choice.delta.content:
+                            self._emit(
+                                ContentEvent(
+                                    type=StreamEventType.CONTENT,
+                                    timestamp=self._get_timestamp(),
+                                    content=choice.delta.content,
+                                    chunk=chunk,
+                                )
+                            )
+
+                        # Handle tool calls
+                        if choice.delta.tool_calls:
+                            self._process_tool_calls(choice.delta.tool_calls)
+
+                        # Handle completion
+                        if choice.finish_reason:
+                            usage = (
+                                chunk.get("usage") if isinstance(chunk, dict) else None
+                            )
+                            self._emit(
+                                CompletionEvent(
+                                    type=StreamEventType.COMPLETION,
+                                    timestamp=self._get_timestamp(),
+                                    reason=choice.finish_reason,
+                                    usage=usage,
+                                )
+                            )
+
+            # Emit any accumulated tool calls
+            for index, tool_call in sorted(self._tool_calls_accumulator.items()):
+                self._emit(
+                    ToolCallEvent(
+                        type=StreamEventType.TOOL_CALL,
+                        timestamp=self._get_timestamp(),
+                        toolCall=tool_call,
+                        index=index,
+                    )
+                )
+
+        except Exception as e:
+            self._emit(
+                ErrorEvent(
+                    type=StreamEventType.ERROR,
+                    timestamp=self._get_timestamp(),
+                    error=e,
+                    message=str(e),
+                )
+            )
+            raise
+        finally:
+            # Always emit end event
+            duration = (
+                self._get_timestamp() - self._start_time if self._start_time else 0
+            )
+            self._emit(
+                EndEvent(
+                    type=StreamEventType.END,
+                    timestamp=self._get_timestamp(),
+                    duration=duration,
+                )
+            )
+
+    def _process_tool_calls(self, delta_tool_calls: List[Dict[str, Any]]) -> None:
+        """Process incremental tool call updates."""
+        for delta_tool_call in delta_tool_calls:
+            index = delta_tool_call.get("index", 0)
+
+            # Initialize if not exists
+            if index not in self._tool_calls_accumulator:
+                self._tool_calls_accumulator[index] = {
+                    "id": "",
+                    "type": "function",
+                    "function": {"name": "", "arguments": ""},
+                }
+
+            # Update with new data
+            if "id" in delta_tool_call:
+                self._tool_calls_accumulator[index]["id"] = delta_tool_call["id"]
+
+            if "type" in delta_tool_call:
+                self._tool_calls_accumulator[index]["type"] = delta_tool_call["type"]
+
+            if "function" in delta_tool_call:
+                func_data = delta_tool_call["function"]
+                if "name" in func_data:
+                    self._tool_calls_accumulator[index]["function"]["name"] = func_data[
+                        "name"
+                    ]
+                if "arguments" in func_data:
+                    self._tool_calls_accumulator[index]["function"][
+                        "arguments"
+                    ] += func_data["arguments"]
+
+    def collect_text(self) -> str:
+        """
+        Collect all text content from the stream.
+
+        Returns:
+            Complete text content
+        """
+        text_parts = []
+
+        def content_handler(event: ContentEvent) -> None:
+            text_parts.append(event["content"])
+
+        self.on(StreamEventType.CONTENT, content_handler)
+        self.process()
+
+        return "".join(text_parts)
+
+    def collect_tool_calls(self) -> List[ToolCall]:
+        """
+        Collect all tool calls from the stream.
+
+        Returns:
+            List of tool calls
+        """
+        tool_calls = []
+
+        def tool_handler(event: ToolCallEvent) -> None:
+            tool_calls.append(event["toolCall"])
+
+        self.on(StreamEventType.TOOL_CALL, tool_handler)
+        self.process()
+
+        return tool_calls
diff --git a/langbase/types.py b/langbase/types.py
index 0a59ff0..8980043 100644
--- a/langbase/types.py
+++ b/langbase/types.py
@@ -4,16 +4,19 @@
 This module defines the various data structures and type hints used
 throughout the SDK to provide better code assistance and documentation.
 """
-from typing import Dict, List, Optional, Union, Any, TypedDict, Literal, Protocol, runtime_checkable
-from typing_extensions import NotRequired
 
+from typing import Any, Dict, List, Optional, Protocol, Union, runtime_checkable
+
+from typing_extensions import Literal, TypedDict
+
+# NotRequired removed - using Optional instead
 
 # Base types and constants
 GENERATION_ENDPOINTS = [
-    '/v1/pipes/run',
-    '/beta/chat',
-    '/beta/generate',
-    '/v1/llm/run',
+    "/v1/pipes/run",
+    "/beta/chat",
+    "/beta/generate",
+    "/v1/agent/run",
 ]
 
 # Role types
@@ -24,7 +27,7 @@
     "openai:text-embedding-3-large",
     "cohere:embed-multilingual-v3.0",
     "cohere:embed-multilingual-light-v3.0",
-    "google:text-embedding-004"
+    "google:text-embedding-004",
 ]
 
 # Content types for documents
@@ -34,19 +37,21 @@
     "text/markdown",
     "text/csv",
     "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-    "application/vnd.ms-excel"
+    "application/vnd.ms-excel",
 ]
 
 
 # Function and tool types
 class Function(TypedDict):
     """Function definition for tool calls."""
+
     name: str
     arguments: str
 
 
 class ToolCall(TypedDict):
     """Tool call definition."""
+
     id: str
     type: Literal["function"]
     function: Function
@@ -54,36 +59,31 @@ class ToolCall(TypedDict):
 
 class ToolFunction(TypedDict):
     """Function definition for tools."""
+
     name: str
-    description: NotRequired[str]
-    parameters: NotRequired[Dict[str, Any]]
+    description: Optional[str]
+    parameters: Optional[Dict[str, Any]]
 
 
 class Tools(TypedDict):
     """Tool definition."""
+
     type: Literal["function"]
     function: ToolFunction
 
 
 class ToolChoice(TypedDict):
     """Tool choice definition."""
+
     type: Literal["function"]
     function: Dict[str, str]
 
 
-# Message types
-class MessageContentItem(TypedDict, total=False):
-    """Content item for a message with multiple content parts."""
-    type: str
-    text: Optional[str]
-    image_url: Optional[Dict[str, str]]
-    cache_control: Optional[Dict[str, str]]
-
-
 class Message(TypedDict, total=False):
     """Basic message structure."""
+
     role: Role
-    content: Optional[Union[str, List[MessageContentItem]]]
+    content: Optional[str]
     name: Optional[str]
     tool_call_id: Optional[str]
     tool_calls: Optional[List[ToolCall]]
@@ -91,6 +91,7 @@ class Message(TypedDict, total=False):
 
 class ThreadMessage(Message, total=False):
     """Message structure with thread-specific fields."""
+
     attachments: Optional[List[Any]]
     metadata: Optional[Dict[str, str]]
 
@@ -98,13 +99,22 @@ class ThreadMessage(Message, total=False):
 # Variable definition
 class Variable(TypedDict):
     """Variable definition for pipe templates."""
+
     name: str
     value: str
 
 
+# Runtime memory definition
+class RuntimeMemory(TypedDict):
+    """Runtime memory configuration."""
+
+    name: str
+
+
 # Response types
 class Usage(TypedDict):
     """Token usage information."""
+
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
@@ -112,6 +122,7 @@ class Usage(TypedDict):
 
 class ChoiceGenerate(TypedDict):
     """Generation choice structure."""
+
     index: int
     message: Message
     logprobs: Optional[bool]
@@ -120,6 +131,7 @@ class ChoiceGenerate(TypedDict):
 
 class ResponseFormat(TypedDict, total=False):
     """Response format configuration."""
+
     type: Literal["text", "json_object", "json_schema"]
     json_schema: Optional[Dict[str, Any]]
 
@@ -127,68 +139,88 @@ class ResponseFormat(TypedDict, total=False):
 # Option types
 class RunOptionsBase(TypedDict, total=False):
     """Base options for running a pipe."""
+
     messages: List[Message]
     variables: List[Variable]
     thread_id: str
     raw_response: bool
     run_tools: bool
     tools: List[Tools]
+    tool_choice: Union[Literal["auto", "required"], ToolChoice]
+    parallel_tool_calls: bool
     name: str
     api_key: str
     llm_key: str
     json: bool
+    memory: List[RuntimeMemory]
+    response_format: ResponseFormat
+    top_p: float
+    max_tokens: int
+    temperature: float
+    presence_penalty: float
+    frequency_penalty: float
+    stop: List[str]
+    store: bool
+    moderate: bool
 
 
 class RunOptions(RunOptionsBase, total=False):
     """Options for running a pipe without streaming."""
+
     stream: Literal[False]
 
 
 class RunOptionsStream(RunOptionsBase):
     """Options for running a pipe with streaming."""
+
     stream: Literal[True]
 
 
 class LlmOptionsBase(TypedDict):
     """Base options for running an LLM."""
+
     messages: List[Message]
     model: str
     llm_key: str
-    top_p: NotRequired[float]
-    max_tokens: NotRequired[int]
-    temperature: NotRequired[float]
-    presence_penalty: NotRequired[float]
-    frequency_penalty: NotRequired[float]
-    stop: NotRequired[List[str]]
-    tools: NotRequired[List[Tools]]
-    tool_choice: NotRequired[Union[Literal['auto', 'required'], ToolChoice]]
-    parallel_tool_calls: NotRequired[bool]
-    reasoning_effort: NotRequired[Optional[str]]
-    max_completion_tokens: NotRequired[int]
-    response_format: NotRequired[ResponseFormat]
-    custom_model_params: NotRequired[Dict[str, Any]]
+    top_p: Optional[float]
+    max_tokens: Optional[int]
+    temperature: Optional[float]
+    presence_penalty: Optional[float]
+    frequency_penalty: Optional[float]
+    stop: Optional[List[str]]
+    tools: Optional[List[Tools]]
+    tool_choice: Optional[Union[Literal["auto", "required"], ToolChoice]]
+    parallel_tool_calls: Optional[bool]
+    reasoning_effort: Optional[str]
+    max_completion_tokens: Optional[int]
+    response_format: Optional[ResponseFormat]
+    custom_model_params: Optional[Dict[str, Any]]
 
 
 class LlmOptions(LlmOptionsBase, total=False):
     """Options for running an LLM without streaming."""
+
     stream: Literal[False]
 
 
 class LlmOptionsStream(LlmOptionsBase):
     """Options for running an LLM with streaming."""
+
     stream: Literal[True]
 
 
 # Response types
 class RawResponseHeaders(TypedDict):
     """Raw response headers."""
+
     headers: Dict[str, str]
 
 
 class RunResponse(TypedDict, total=False):
-    """Response from running a pipe."""
+    """Response from running a pipe without streaming."""
+
     completion: str
-    thread_id: str
+    thread_id: Optional[str]
     id: str
     object: str
     created: int
@@ -197,71 +229,89 @@ class RunResponse(TypedDict, total=False):
     usage: Usage
     system_fingerprint: Optional[str]
     raw_response: Optional[RawResponseHeaders]
-    messages: List[Message]
-    llm_key: str
-    name: str
 
 
-class RunResponseStream(TypedDict, total=False):
-    """Stream response from running a pipe."""
+class RunResponseStream(TypedDict):
+    """Response from running a pipe with streaming."""
+
     stream: Any  # This would be an iterator in Python
     thread_id: Optional[str]
     raw_response: Optional[RawResponseHeaders]
 
 
+# Note: Delta, ChoiceStream, and ChunkStream are defined in helper.py
+
+
 # Memory types
-class MemoryCreateOptions(TypedDict, total=False):
+FilterOperator = Literal["Eq", "NotEq", "In", "NotIn", "And", "Or"]
+FilterConnective = Literal["And", "Or"]
+FilterValue = Union[str, List[str]]
+FilterCondition = List[Union[str, FilterOperator, FilterValue]]
+
+# Recursive type for memory filters
+MemoryFilters = Union[
+    List[Union[FilterConnective, List["MemoryFilters"]]], FilterCondition
+]
+
+
+class MemoryCreateOptions(TypedDict):
     """Options for creating a memory."""
+
     name: str
-    description: str
-    embedding_model: EmbeddingModel
+    description: Optional[str]
+    embedding_model: Optional[EmbeddingModel]
+    top_k: Optional[int]
+    chunk_size: Optional[int]
+    chunk_overlap: Optional[int]
 
 
 class MemoryDeleteOptions(TypedDict):
     """Options for deleting a memory."""
-    name: str
 
-
-class MemoryFilter(List):
-    """Filter for memory retrieval."""
-    pass
+    name: str
 
 
 class MemoryConfig(TypedDict):
     """Memory configuration for retrieval."""
+
     name: str
-    filters: NotRequired[MemoryFilter]
+    filters: Optional[MemoryFilters]
 
 
-class MemoryRetrieveOptions(TypedDict, total=False):
+class MemoryRetrieveOptions(TypedDict):
     """Options for retrieving from memory."""
+
     query: str
     memory: List[MemoryConfig]
-    top_k: int
+    top_k: Optional[int]
 
 
 class MemoryListDocOptions(TypedDict):
     """Options for listing documents in a memory."""
+
     memory_name: str
 
 
 class MemoryDeleteDocOptions(TypedDict):
     """Options for deleting a document from memory."""
+
     memory_name: str
     document_name: str
 
 
 class MemoryRetryDocEmbedOptions(TypedDict):
     """Options for retrying embedding generation for a document."""
+
     memory_name: str
     document_name: str
 
 
-class MemoryUploadDocOptions(TypedDict, total=False):
+class MemoryUploadDocOptions(TypedDict):
     """Options for uploading a document to memory."""
+
     memory_name: str
     document_name: str
-    meta: Dict[str, str]
+    meta: Optional[Dict[str, str]]
     document: Any  # This would be bytes, file-like object, etc.
     content_type: ContentType
 
@@ -269,6 +319,7 @@ class MemoryUploadDocOptions(TypedDict, total=False):
 # Response types for memory
 class MemoryBaseResponse(TypedDict):
     """Base response for memory operations."""
+
     name: str
     description: str
     owner_login: str
@@ -277,36 +328,45 @@ class MemoryBaseResponse(TypedDict):
 
 class MemoryCreateResponse(MemoryBaseResponse):
     """Response from creating a memory."""
+
+    chunk_size: int
+    chunk_overlap: int
     embedding_model: EmbeddingModel
 
 
 class MemoryListResponse(MemoryBaseResponse):
     """Response from listing memories."""
+
     embedding_model: EmbeddingModel
 
 
 class BaseDeleteResponse(TypedDict):
     """Base response for delete operations."""
+
     success: bool
 
 
 class MemoryDeleteResponse(BaseDeleteResponse):
     """Response from deleting a memory."""
+
     pass
 
 
 class MemoryDeleteDocResponse(BaseDeleteResponse):
     """Response from deleting a document from memory."""
+
     pass
 
 
 class MemoryRetryDocEmbedResponse(BaseDeleteResponse):
     """Response from retrying document embedding."""
+
     pass
 
 
 class MemoryRetrieveResponse(TypedDict):
     """Response from retrieving from memory."""
+
     text: str
     similarity: float
     meta: Dict[str, str]
@@ -314,14 +374,16 @@ class MemoryRetrieveResponse(TypedDict):
 
 class MemoryDocMetadata(TypedDict):
     """Metadata for a document in memory."""
+
     size: int
     type: ContentType
 
 
 class MemoryListDocResponse(TypedDict):
     """Response from listing documents in memory."""
+
     name: str
-    status: Literal['queued', 'in_progress', 'completed', 'failed']
+    status: Literal["queued", "in_progress", "completed", "failed"]
     status_message: Optional[str]
     metadata: MemoryDocMetadata
     enabled: bool
@@ -333,8 +395,9 @@ class MemoryListDocResponse(TypedDict):
 # Tool types
 class ToolWebSearchOptions(TypedDict, total=False):
     """Options for web search."""
+
     query: str
-    service: Literal['exa']
+    service: Literal["exa"]
     total_results: int
     domains: List[str]
     api_key: str
@@ -342,12 +405,14 @@ class ToolWebSearchOptions(TypedDict, total=False):
 
 class ToolWebSearchResponse(TypedDict):
     """Response from web search."""
+
     url: str
     content: str
 
 
 class ToolCrawlOptions(TypedDict, total=False):
     """Options for web crawling."""
+
     url: List[str]
     max_pages: int
     api_key: str
@@ -355,6 +420,7 @@ class ToolCrawlOptions(TypedDict, total=False):
 
 class ToolCrawlResponse(TypedDict):
     """Response from web crawling."""
+
     url: str
     content: str
 
@@ -362,8 +428,9 @@ class ToolCrawlResponse(TypedDict):
 # Embed types
 class EmbedOptions(TypedDict, total=False):
     """Options for embedding generation."""
+
     chunks: List[str]
-    embedding_model: EmbeddingModel
+    embedding_model: Optional[EmbeddingModel]
 
 
 EmbedResponse = List[List[float]]
@@ -371,13 +438,11 @@ class EmbedOptions(TypedDict, total=False):
 
 # Chunk types
 class ChunkOptions(TypedDict, total=False):
-    """Options for chunking a document."""
-    document: Any  # This would be bytes, file-like object, etc.
-    document_name: str
-    content_type: ContentType
-    chunk_max_length: str
-    chunk_overlap: str
-    separator: str
+    """Options for chunking content."""
+
+    content: str
+    chunkOverlap: Optional[int]
+    chunkMaxLength: Optional[int]
 
 
 ChunkResponse = List[str]
@@ -386,6 +451,7 @@ class ChunkOptions(TypedDict, total=False):
 # Parse types
 class ParseOptions(TypedDict):
     """Options for parsing a document."""
+
     document: Any  # This would be bytes, file-like object, etc.
     document_name: str
     content_type: ContentType
@@ -393,6 +459,7 @@ class ParseOptions(TypedDict):
 
 class ParseResponse(TypedDict):
     """Response from parsing a document."""
+
     document_name: str
     content: str
 
@@ -400,6 +467,7 @@ class ParseResponse(TypedDict):
 # Thread types
 class ThreadsCreate(TypedDict, total=False):
     """Options for creating a thread."""
+
     thread_id: str
     metadata: Dict[str, str]
     messages: List[ThreadMessage]
@@ -407,41 +475,48 @@ class ThreadsCreate(TypedDict, total=False):
 
 class ThreadsUpdate(TypedDict):
     """Options for updating a thread."""
+
     thread_id: str
     metadata: Dict[str, str]
 
 
 class ThreadsGet(TypedDict):
     """Options for getting a thread."""
+
     thread_id: str
 
 
 class DeleteThreadOptions(TypedDict):
     """Options for deleting a thread."""
+
     thread_id: str
 
 
 class ThreadsBaseResponse(TypedDict):
     """Base response for thread operations."""
+
     id: str
-    object: Literal['thread']
+    object: Literal["thread"]
     created_at: int
     metadata: Dict[str, str]
 
 
 class ThreadMessagesCreate(TypedDict):
     """Options for creating messages in a thread."""
+
     thread_id: str
     messages: List[ThreadMessage]
 
 
 class ThreadMessagesList(TypedDict):
     """Options for listing messages in a thread."""
+
     thread_id: str
 
 
 class ThreadMessagesBaseResponse(TypedDict, total=False):
     """Base response for thread message operations."""
+
     id: str
     created_at: int
     thread_id: str
@@ -454,16 +529,247 @@ class ThreadMessagesBaseResponse(TypedDict, total=False):
     metadata: Optional[Dict[str, str]]
 
 
+# Pipe types - simplified based on TypeScript SDK
+class PipeBaseOptions(TypedDict, total=False):
+    """Base options for pipe operations."""
+
+    name: str
+    description: Optional[str]
+    status: Optional[Literal["public", "private"]]
+    upsert: Optional[bool]
+    model: Optional[str]
+    stream: Optional[bool]
+    json: Optional[bool]
+    store: Optional[bool]
+    moderate: Optional[bool]
+    top_p: Optional[float]
+    max_tokens: Optional[int]
+    temperature: Optional[float]
+    presence_penalty: Optional[float]
+    frequency_penalty: Optional[float]
+    stop: Optional[List[str]]
+    tools: Optional[List[Tools]]
+    tool_choice: Optional[Union[Literal["auto", "required"], ToolChoice]]
+    parallel_tool_calls: Optional[bool]
+    messages: Optional[List[Message]]
+    variables: Optional[List[Variable]]
+    memory: Optional[List[Dict[str, str]]]
+    response_format: Optional[ResponseFormat]
+
+
+class PipeCreateOptions(PipeBaseOptions):
+    """Options for creating a pipe."""
+
+    pass
+
+
+class PipeUpdateOptions(PipeBaseOptions):
+    """Options for updating a pipe."""
+
+    pass
+
+
+class PipeRunOptions(TypedDict, total=False):
+    """Options for running a pipe."""
+
+    name: Optional[str]
+    api_key: Optional[str]
+    messages: Optional[List[Message]]
+    stream: Optional[bool]
+    variables: Optional[Union[List[Variable], Dict[str, str]]]
+    thread_id: Optional[str]
+    tools: Optional[List[Tools]]
+    tool_choice: Optional[Union[Literal["auto", "required"], ToolChoice]]
+    parallel_tool_calls: Optional[bool]
+    memory: Optional[List[Dict[str, str]]]
+    response_format: Optional[ResponseFormat]
+    top_p: Optional[float]
+    max_tokens: Optional[int]
+    temperature: Optional[float]
+    presence_penalty: Optional[float]
+    frequency_penalty: Optional[float]
+    stop: Optional[List[str]]
+    llm_key: Optional[str]
+    json: Optional[bool]
+    store: Optional[bool]
+    moderate: Optional[bool]
+
+
+class PipeBaseResponse(TypedDict):
+    """Base response for pipe operations."""
+
+    name: str
+    description: str
+    status: Literal["public", "private"]
+    owner_login: str
+    url: str
+    type: str
+    api_key: str
+
+
+class PipeCreateResponse(PipeBaseResponse):
+    """Response from creating a pipe."""
+
+    pass
+
+
+class PipeUpdateResponse(PipeBaseResponse):
+    """Response from updating a pipe."""
+
+    pass
+
+
+class PipeListResponse(TypedDict):
+    """Response from listing pipes - includes all pipe configuration."""
+
+    name: str
+    description: str
+    status: Literal["public", "private"]
+    owner_login: str
+    url: str
+    model: str
+    stream: bool
+    json: bool
+    store: bool
+    moderate: bool
+    top_p: float
+    max_tokens: int
+    temperature: float
+    presence_penalty: float
+    frequency_penalty: float
+    stop: List[str]
+    tool_choice: Union[Literal["auto", "required"], ToolChoice]
+    parallel_tool_calls: bool
+    messages: List[Message]
+    variables: List[Variable]
+    tools: List[Tools]
+    memory: List[Dict[str, str]]
+
+
+# Pipe run response types (use existing RunResponse and RunResponseStream)
+
+
 # Config types
 class LangbaseOptions(TypedDict, total=False):
     """Options for initializing Langbase client."""
-    api_key: str
-    base_url: Literal['https://api.langbase.com', 'https://eu-api.langbase.com']
-    timeout: int
+
+    api_key: str  # Required
+    base_url: Literal[
+        "https://api.langbase.com", "https://eu-api.langbase.com"
+    ]  # Optional
 
 
 # Protocol for file-like objects
 @runtime_checkable
 class FileProtocol(Protocol):
     """Protocol for file-like objects."""
-    def read(self, size: int = -1) -> bytes: ...
+
+    def read(self, size: int = -1) -> bytes:
+        ...
+
+
+# Agent types
+class McpServerSchema(TypedDict):
+    """MCP (Model Context Protocol) server configuration."""
+
+    name: str
+    type: Literal["url"]
+    url: str
+    authorization_token: Optional[str]
+    tool_configuration: Optional[Dict[str, Any]]
+    custom_headers: Optional[Dict[str, str]]
+
+
+class AgentRunOptionsBase(TypedDict):
+    """Base options for running an agent."""
+
+    input: Union[str, List[Message]]  # REQUIRED
+    model: str  # REQUIRED
+    apiKey: str  # REQUIRED
+    instructions: Optional[str]  # OPTIONAL (has ? in TypeScript)
+    top_p: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    max_tokens: Optional[int]  # OPTIONAL (has ? in TypeScript)
+    temperature: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    presence_penalty: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    frequency_penalty: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    stop: Optional[List[str]]  # OPTIONAL (has ? in TypeScript)
+    tools: Optional[List[Tools]]  # OPTIONAL (has ? in TypeScript)
+    tool_choice: Optional[
+        Union[Literal["auto", "required"], ToolChoice]
+    ]  # OPTIONAL (has ? in TypeScript)
+    parallel_tool_calls: Optional[bool]  # OPTIONAL (has ? in TypeScript)
+    mcp_servers: Optional[List[McpServerSchema]]  # OPTIONAL (has ? in TypeScript)
+    reasoning_effort: Optional[str]  # OPTIONAL (has ? in TypeScript)
+    max_completion_tokens: Optional[int]  # OPTIONAL (has ? in TypeScript)
+    response_format: Optional[ResponseFormat]  # OPTIONAL (has ? in TypeScript)
+    customModelParams: Optional[Dict[str, Any]]  # OPTIONAL (has ? in TypeScript)
+
+
+class AgentRunOptionsWithoutMcp(AgentRunOptionsBase):
+    """Agent run options without MCP servers."""
+
+    stream: Optional[Literal[False]]  # OPTIONAL (has ? in TypeScript)
+
+
+class AgentRunOptionsWithMcp(TypedDict):
+    """Agent run options with MCP servers."""
+
+    # Required fields from base
+    input: Union[str, List[Message]]  # REQUIRED
+    model: str  # REQUIRED
+    apiKey: str  # REQUIRED
+
+    # Optional fields from base
+    instructions: Optional[str]  # OPTIONAL (has ? in TypeScript)
+    top_p: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    max_tokens: Optional[int]  # OPTIONAL (has ? in TypeScript)
+    temperature: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    presence_penalty: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    frequency_penalty: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    stop: Optional[List[str]]  # OPTIONAL (has ? in TypeScript)
+    tools: Optional[List[Tools]]  # OPTIONAL (has ? in TypeScript)
+    tool_choice: Optional[
+        Union[Literal["auto", "required"], ToolChoice]
+    ]  # OPTIONAL (has ? in TypeScript)
+    parallel_tool_calls: Optional[bool]  # OPTIONAL (has ? in TypeScript)
+    reasoning_effort: Optional[str]  # OPTIONAL (has ? in TypeScript)
+    max_completion_tokens: Optional[int]  # OPTIONAL (has ? in TypeScript)
+    response_format: Optional[ResponseFormat]  # OPTIONAL (has ? in TypeScript)
+    customModelParams: Optional[Dict[str, Any]]  # OPTIONAL (has ? in TypeScript)
+
+    # Overridden fields
+    mcp_servers: List[McpServerSchema]  # REQUIRED (overrides optional from base)
+    stream: Literal[False]  # REQUIRED
+
+
+class AgentRunOptionsStreamT(TypedDict):
+    """Agent run options for streaming (without MCP servers)."""
+
+    input: Union[str, List[Message]]  # REQUIRED
+    model: str  # REQUIRED
+    apiKey: str  # REQUIRED
+    stream: Literal[True]  # REQUIRED
+    instructions: Optional[str]  # OPTIONAL (has ? in TypeScript)
+    top_p: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    max_tokens: Optional[int]  # OPTIONAL (has ? in TypeScript)
+    temperature: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    presence_penalty: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    frequency_penalty: Optional[float]  # OPTIONAL (has ? in TypeScript)
+    stop: Optional[List[str]]  # OPTIONAL (has ? in TypeScript)
+    tools: Optional[List[Tools]]  # OPTIONAL (has ? in TypeScript)
+    tool_choice: Optional[
+        Union[Literal["auto", "required"], ToolChoice]
+    ]  # OPTIONAL (has ? in TypeScript)
+    parallel_tool_calls: Optional[bool]  # OPTIONAL (has ? in TypeScript)
+    reasoning_effort: Optional[str]  # OPTIONAL (has ? in TypeScript)
+    max_completion_tokens: Optional[int]  # OPTIONAL (has ? in TypeScript)
+    response_format: Optional[ResponseFormat]  # OPTIONAL (has ? in TypeScript)
+    customModelParams: Optional[Dict[str, Any]]  # OPTIONAL (has ? in TypeScript)
+
+
+# Union types for agent options
+AgentRunOptions = Union[AgentRunOptionsWithoutMcp, AgentRunOptionsWithMcp]
+AgentRunOptionsStream = AgentRunOptionsStreamT
+
+# Agent response type (reuses RunResponse)
+AgentRunResponse = RunResponse
diff --git a/langbase/utils.py b/langbase/utils.py
index f826fb8..8fe492d 100644
--- a/langbase/utils.py
+++ b/langbase/utils.py
@@ -4,17 +4,19 @@
 This module contains helper functions for common tasks like
 document handling and data conversion.
 """
-import os
-from typing import Union, Dict, Any, BinaryIO
+
 from io import BytesIO
-from .types import ContentType, FileProtocol
+from pathlib import Path
+from typing import Any, BinaryIO, Dict, Optional, Tuple, Union
+
+from .types import ContentType
 
 
 def convert_document_to_request_files(
     document: Union[bytes, BytesIO, str, BinaryIO],
     document_name: str,
-    content_type: ContentType
-) -> Dict[str, Union[tuple, str]]:
+    content_type: ContentType,
+) -> Dict[str, Union[Tuple[str, bytes, ContentType], Tuple[None, str], str]]:
     """
     Convert a document to the format needed for requests library's files parameter.
 
@@ -25,35 +27,35 @@ def convert_document_to_request_files(
 
     Returns:
         Dictionary for use with requests.post(files=...)
-
-    Raises:
-        ValueError: If the document type is not supported
-        FileNotFoundError: If the document path doesn't exist
     """
-    files = {}
+    files: Dict[str, Union[Tuple[str, bytes, ContentType], Tuple[None, str], str]] = {}
 
-    if isinstance(document, str) and os.path.isfile(document):
+    if isinstance(document, str) and Path(document).is_file():
         # If it's a file path, open and read the file
-        with open(document, "rb") as f:
-            files['document'] = (document_name, f.read(), content_type)
+        with Path(document).open("rb") as f:
+            files["document"] = (document_name, f.read(), content_type)
     elif isinstance(document, bytes):
         # If it's raw bytes
-        files['document'] = (document_name, document, content_type)
-    elif isinstance(document, BytesIO) or hasattr(document, 'read'):
+        files["document"] = (document_name, document, content_type)
+    elif isinstance(document, BytesIO) or hasattr(document, "read"):
         # If it's a file-like object
         document_content = document.read()
         # Reset the pointer if it's a file-like object that supports seek
-        if hasattr(document, 'seek'):
+        if hasattr(document, "seek"):
             document.seek(0)
-        files['document'] = (document_name, document_content, content_type)
+        files["document"] = (document_name, document_content, content_type)
     else:
-        raise ValueError(f"Unsupported document type: {type(document)}")
+        msg = f"Unsupported document type: {type(document)}"
+        raise ValueError(msg)
 
-    files['documentName'] = (None, document_name)
+    # Add documentName as a separate field (not as a file)
+    files["documentName"] = (None, document_name)
     return files
 
 
-def prepare_headers(api_key: str, additional_headers: Dict[str, str] = None) -> Dict[str, str]:
+def prepare_headers(
+    api_key: str, additional_headers: Optional[Dict[str, str]] = None
+) -> Dict[str, str]:
     """
     Prepare headers for API requests.
 
@@ -64,10 +66,7 @@ def prepare_headers(api_key: str, additional_headers: Dict[str, str] = None) ->
     Returns:
         Dictionary of headers to use in requests
     """
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {api_key}"
-    }
+    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
 
     if additional_headers:
         headers.update(additional_headers)
@@ -89,7 +88,7 @@ def format_thread_id(thread_id: str) -> str:
     thread_id = thread_id.strip()
 
     # Ensure thread_id has the correct format
-    if not thread_id.startswith('thread_'):
+    if not thread_id.startswith("thread_"):
         thread_id = f"thread_{thread_id}"
 
     return thread_id
diff --git a/langbase/workflow.py b/langbase/workflow.py
new file mode 100644
index 0000000..9973c32
--- /dev/null
+++ b/langbase/workflow.py
@@ -0,0 +1,247 @@
+"""
+Workflow execution engine for Langbase SDK.
+
+This module provides a robust workflow execution system with support for:
+- Step-based execution with retries and timeouts
+- Configurable retry strategies (exponential, linear, fixed backoff)
+- Debug logging and performance monitoring
+- Context management for step outputs
+"""
+
+import asyncio
+import time
+from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, TypeVar
+
+from typing_extensions import Literal, TypedDict
+
+from .errors import APIError
+
+T = TypeVar("T")
+
+
+class WorkflowContext(TypedDict):
+    """Context for workflow execution containing step outputs."""
+
+    outputs: Dict[str, Any]
+
+
+class RetryConfig(TypedDict):
+    """Configuration for step retry behavior."""
+
+    limit: int
+    delay: int
+    backoff: Literal["exponential", "linear", "fixed"]
+
+
+class StepConfig(TypedDict, Generic[T]):
+    """Configuration for a workflow step."""
+
+    id: str
+    timeout: Optional[int]
+    retries: Optional[RetryConfig]
+    run: Callable[[], Awaitable[T]]
+
+
+class TimeoutError(APIError):
+    """Raised when a workflow step times out."""
+
+    def __init__(self, step_id: str, timeout: int):
+        """
+        Initialize a timeout error.
+
+        Args:
+            step_id: The ID of the step that timed out
+            timeout: The timeout value in milliseconds
+        """
+        message = f'Step "{step_id}" timed out after {timeout}ms'
+        super().__init__(message=message)
+        self.step_id = step_id
+        self.timeout = timeout
+
+
+class Workflow:
+    """
+    A workflow execution engine that provides step-based execution with retry logic,
+    timeouts, and debugging capabilities.
+    """
+
+    def __init__(self, debug: bool = False):
+        """
+        Initialize a new workflow instance.
+
+        Args:
+            debug: Whether to enable debug logging and performance monitoring
+        """
+        self._context: WorkflowContext = {"outputs": {}}
+        self._debug = debug
+
+    @property
+    def context(self) -> WorkflowContext:
+        """Get the current workflow context."""
+        return self._context
+
+    async def step(self, config: StepConfig[T]) -> T:
+        """
+        Execute a workflow step with retry logic and timeout handling.
+
+        Args:
+            config: Step configuration including ID, timeout, retries, and execution function
+
+        Returns:
+            The result of the step execution
+
+        Raises:
+            TimeoutError: If the step exceeds the specified timeout
+            APIError: If the step fails after all retry attempts
+        """
+        if self._debug:
+            print(f"\n🔄 Starting step: {config['id']}")
+            start_time = time.time()
+            if config.get("timeout"):
+                print(f"⏳ Timeout: {config['timeout']}ms")
+            if config.get("retries"):
+                print(f"🔄 Retries: {config['retries']}")
+
+        last_error: Optional[Exception] = None
+        attempt = 1
+        max_attempts = 1
+
+        if config.get("retries"):
+            max_attempts = config["retries"]["limit"] + 1
+
+        while attempt <= max_attempts:
+            try:
+                step_task = config["run"]()
+
+                if config.get("timeout"):
+                    step_task = self._with_timeout(
+                        promise=step_task,
+                        timeout=config["timeout"],
+                        step_id=config["id"],
+                    )
+
+                result = await step_task
+                self._context["outputs"][config["id"]] = result
+
+                if self._debug:
+                    elapsed = (time.time() - start_time) * 1000
+                    print(f"⏱️ Step {config['id']}: {elapsed:.2f}ms")
+                    print(f"📤 Output: {result}")
+                    print(f"✅ Completed step: {config['id']}\n")
+
+                return result
+
+            except Exception as error:
+                last_error = error
+
+                if attempt < max_attempts:
+                    retry_config = config.get("retries")
+                    delay = 0
+
+                    if retry_config:
+                        delay = self._calculate_delay(
+                            retry_config["delay"], attempt, retry_config["backoff"]
+                        )
+
+                    if self._debug:
+                        print(f"⚠️ Attempt {attempt} failed, retrying in {delay}ms...")
+                        print(f"Error: {error}")
+
+                    await self._sleep(delay / 1000.0)  # Convert to seconds
+                    attempt += 1
+                else:
+                    if self._debug:
+                        elapsed = (time.time() - start_time) * 1000
+                        print(f"⏱️ Step {config['id']}: {elapsed:.2f}ms")
+                        print(f"❌ Failed step: {config['id']}")
+                        print(f"Error: {error}")
+
+                    if isinstance(last_error, Exception):
+                        raise last_error from None
+                    raise APIError(message=str(last_error)) from None
+
+        # This should never be reached, but just in case
+        if last_error:
+            raise last_error
+        raise APIError(message="Unknown error occurred")
+
+    async def _with_timeout(
+        self, promise: Awaitable[T], timeout: int, step_id: str
+    ) -> T:
+        """
+        Add timeout handling to a promise.
+
+        Args:
+            promise: The awaitable to add timeout to
+            timeout: Timeout in milliseconds
+            step_id: Step ID for error reporting
+
+        Returns:
+            The result of the promise
+
+        Raises:
+            TimeoutError: If the promise doesn't complete within the timeout
+        """
+        try:
+            return await asyncio.wait_for(promise, timeout=timeout / 1000.0)
+        except asyncio.TimeoutError as e:
+            raise TimeoutError(step_id=step_id, timeout=timeout) from e
+
+    def _calculate_delay(
+        self,
+        base_delay: int,
+        attempt: int,
+        strategy: Literal["exponential", "linear", "fixed"],
+    ) -> int:
+        """
+        Calculate retry delay based on strategy.
+
+        Args:
+            base_delay: Base delay in milliseconds
+            attempt: Current attempt number (1-based)
+            strategy: Backoff strategy to use
+
+        Returns:
+            Calculated delay in milliseconds
+        """
+        if strategy == "exponential":
+            return base_delay * (2 ** (attempt - 1))
+        if strategy == "linear":
+            return base_delay * attempt
+        # fixed
+        return base_delay
+
+    async def _sleep(self, seconds: float) -> None:
+        """
+        Sleep for the specified duration.
+
+        Args:
+            seconds: Duration to sleep in seconds
+        """
+        await asyncio.sleep(seconds)
+
+    def run(self, steps: List[StepConfig[Any]]) -> Dict[str, Any]:
+        """
+        Execute multiple workflow steps in sequence.
+
+        Args:
+            steps: List of step configurations to execute
+
+        Returns:
+            Dictionary containing outputs from all steps
+
+        Raises:
+            TimeoutError: If any step exceeds its timeout
+            APIError: If any step fails after all retry attempts
+        """
+
+        async def _run_all():
+            for step_config in steps:
+                await self.step(step_config)
+            return self._context["outputs"]
+
+        return asyncio.run(_run_all())
+
+    def reset(self) -> None:
+        """Reset the workflow context, clearing all step outputs."""
+        self._context = {"outputs": {}}
diff --git a/pyproject.toml b/pyproject.toml
index 335f373..9dc0a65 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,29 +1,104 @@
+[project]
+name = "langbase"
+version = "0.1.0"
+description = "Python SDK for the Langbase API"
+readme = "README.md"
+license = {text = "MIT"}
+authors = [
+    { name = "Saqib", email = "saqib@langbase.com" },
+    { name = "Ankit", email = "ankit@langbase.com" },
+]
+requires-python = ">=3.7"
+keywords = ["ai", "langbase", "agent", "memory", "rag", "mcp", "pipes", "workflow"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "requests>=2.25.0",
+    "typing-extensions>=4.0.0",
+]
+
+[project.urls]
+Documentation = "https://docs.langbase.com"
+Homepage = "https://langbase.com"
+Repository = "https://github.com/LangbaseInc/langbase-python-sdk"
+Issues = "https://github.com/LangbaseInc/langbase-python-sdk/issues"
+
 [build-system]
-requires = ["setuptools>=42", "wheel"]
+requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 
+[tool.setuptools.packages.find]
+include = ["langbase*"]
+
 [tool.black]
 line-length = 88
-target-version = ["py37", "py38", "py39", "py310", "py311"]
+target-version = ["py37", "py38", "py39", "py310", "py311", "py312"]
 include = '\.pyi?$'
+extend-exclude = '''
+/(
+  # directories
+  \.eggs
+  | \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | venv
+  | build
+  | dist
+)/
+'''
 
 [tool.isort]
 profile = "black"
 line_length = 88
+known_first_party = ["langbase"]
+skip_glob = ["*/venv/*", "*/.venv/*"]
 
-[tool.pytest]
+[tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = "test_*.py"
+python_classes = "Test*"
+python_functions = "test_*"
+addopts = "-v --strict-markers --tb=short"
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+    "integration: marks tests as integration tests",
+    "unit: marks tests as unit tests",
+]
+
+[tool.coverage.run]
+source = ["langbase"]
+branch = true
+omit = [
+    "*/tests/*",
+    "*/__init__.py",
+    "*/venv/*",
+    "*/.venv/*",
+]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+    "@abstractmethod",
+]
+show_missing = true
+precision = 2
 
-[tool.mypy]
-python_version = "3.7"
-disallow_untyped_defs = true
-disallow_incomplete_defs = true
-check_untyped_defs = true
-disallow_untyped_decorators = true
-no_implicit_optional = true
-strict_optional = true
-warn_redundant_casts = true
-warn_unused_ignores = true
-warn_return_any = true
-warn_unused_configs = true
+[tool.coverage.html]
+directory = "htmlcov"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 9b07c64..24b64b2 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,9 +1,23 @@
 -r requirements.txt
+
+# Testing
 pytest>=7.0.0
+pytest-asyncio>=0.21.0
 pytest-cov>=3.0.0
+pytest-xdist>=3.0.0
+responses>=0.23.0
+
+# Code formatting
 black>=22.1.0
 isort>=5.10.1
-mypy>=0.950
-flake8>=4.0.1
+
+# Pre-commit hooks
+pre-commit>=3.0.0
+
+# Building
 build>=0.8.0
-twine>=4.0.1
+twine>=4.0.0
+
+# Development utilities
+ipdb>=0.13.0
+python-dotenv>=0.19.0
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 69fed7e..0000000
--- a/setup.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-Setup script for the Langbase SDK.
-"""
-from setuptools import setup, find_packages
-
-# Set version directly without trying to import it
-VERSION = "0.1.0"
-
-# Read the contents of the README file
-with open("README.md", encoding="utf-8") as f:
-    long_description = f.read()
-
-setup(
-    name="langbase",
-    version=VERSION,
-    description="Python SDK for the Langbase API",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    author="Langbase",
-    author_email="support@langbase.com",
-    url="https://github.com/langbaseinc/langbase-sdk-python",
-    packages=find_packages(),
-    include_package_data=True,
-    install_requires=[
-        "requests>=2.25.0",
-        "typing-extensions>=4.0.0",
-    ],
-    classifiers=[
-        "Development Status :: 4 - Beta",
-        "Intended Audience :: Developers",
-        "License :: OSI Approved :: MIT License",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-    python_requires=">=3.7",
-    keywords="ai, langbase, llm, embeddings, vector store",
-    project_urls={
-        "Documentation": "https://docs.langbase.com",
-        "Source": "https://github.com/langbaseinc/langbase-sdk-python",
-        "Issues": "https://github.com/langbaseinc/langbase-sdk-python/issues",
-    },
-)
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..9db8339
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,400 @@
+"""
+Shared test config and fixtures for Langbase SDK tests.
+"""
+
+import time
+
+import pytest
+
+from langbase import Langbase
+
+
+@pytest.fixture
+def base_url():
+    """Base URL for the Langbase API."""
+    return "https://api.langbase.com"
+
+
+@pytest.fixture
+def api_key():
+    """Test API key."""
+    return "test-api-key"
+
+
+@pytest.fixture
+def langbase_client(api_key, base_url):
+    """Langbase client instance for testing."""
+    return Langbase(api_key=api_key, base_url=base_url)
+
+
+@pytest.fixture
+def mock_responses():
+    """Common mock response patterns matching the actual types from types.py."""
+    timestamp = int(time.time())
+
+    return {
+        # Pipes responses (RunResponse type)
+        "pipe_list": [
+            {
+                "name": "test-pipe",
+                "description": "Test pipe",
+                "status": "public",
+                "owner_login": "test-user",
+                "url": "https://langbase.com/test-user/test-pipe",
+                "api_key": "pipe-key-1",
+            },
+            {
+                "name": "another-pipe",
+                "description": "Another pipe",
+                "status": "private",
+                "owner_login": "test-user",
+                "url": "https://langbase.com/test-user/another-pipe",
+                "api_key": "pipe-key-2",
+            },
+        ],
+        "pipe_create": {
+            "name": "new-pipe",
+            "api_key": "pipe-api-key",
+            "description": "A test pipe",
+            "status": "public",
+            "owner_login": "test-user",
+            "url": "https://langbase.com/test-user/new-pipe",
+        },
+        "pipe_run": {
+            "completion": "Hello, world!",
+            "thread_id": "thread_test123",
+            "id": "chatcmpl-123",
+            "object": "chat.completion",
+            "created": timestamp,
+            "model": "gpt-4",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Hello, world!",
+                    },
+                    "logprobs": None,
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 5,
+                "completion_tokens": 3,
+                "total_tokens": 8,
+            },
+            "system_fingerprint": "fp_1234567890",
+        },
+        "pipe_run_stream": {
+            "stream": "mock-stream-object",
+            "thread_id": "thread_test123",
+            "raw_response": {"headers": {"x-request-id": "req_123"}},
+        },
+        # Memory responses (MemoryCreateResponse, MemoryListResponse types)
+        "memory_list": [
+            {
+                "name": "test-memory",
+                "description": "Test memory",
+                "owner_login": "test-user",
+                "url": "https://langbase.com/test-user/test-memory",
+                "embedding_model": "openai:text-embedding-3-large",
+            },
+            {
+                "name": "another-memory",
+                "description": "Another memory",
+                "owner_login": "test-user",
+                "url": "https://langbase.com/test-user/another-memory",
+                "embedding_model": "cohere:embed-multilingual-v3.0",
+            },
+        ],
+        "memory_create": {
+            "name": "new-memory",
+            "description": "A test memory",
+            "owner_login": "test-user",
+            "url": "https://langbase.com/test-user/new-memory",
+            "embedding_model": "openai:text-embedding-3-large",
+        },
+        "memory_delete": {"success": True},
+        "memory_retrieve": [
+            {
+                "text": "Test content",
+                "similarity": 0.95,
+                "meta": {"source": "test.pdf", "page": "1"},
+            },
+            {
+                "text": "Another content",
+                "similarity": 0.85,
+                "meta": {"source": "test.pdf", "page": "2"},
+            },
+        ],
+        # Memory documents responses (MemoryListDocResponse type)
+        "memory_docs_list": [
+            {
+                "name": "doc1.txt",
+                "status": "completed",
+                "status_message": None,
+                "metadata": {
+                    "size": 1024,
+                    "type": "text/plain",
+                },
+                "enabled": True,
+                "chunk_size": 1000,
+                "chunk_overlap": 200,
+                "owner_login": "test-user",
+            },
+            {
+                "name": "doc2.pdf",
+                "status": "in_progress",
+                "status_message": "Processing PDF",
+                "metadata": {
+                    "size": 2048,
+                    "type": "application/pdf",
+                },
+                "enabled": True,
+                "chunk_size": 1000,
+                "chunk_overlap": 200,
+                "owner_login": "test-user",
+            },
+        ],
+        "memory_docs_delete": {"success": True},
+        "memory_docs_upload_signed_url": {
+            "signedUrl": "https://storage.langbase.com/upload?signature=xyz",
+            "publicUrl": "https://storage.langbase.com/memories/test-memory/doc.pdf",
+        },
+        "memory_docs_embeddings_retry": {"success": True},
+        # Tools responses (ToolWebSearchResponse, ToolCrawlResponse types)
+        "tools_web_search": [
+            {
+                "url": "https://example.com",
+                "content": "Example content from search result",
+            },
+            {
+                "url": "https://test.com",
+                "content": "Test content from search result",
+            },
+        ],
+        "tools_crawl": [
+            {
+                "url": "https://example.com",
+                "content": "Crawled page content from example.com",
+            }
+        ],
+        # Threads responses (ThreadsBaseResponse type)
+        "threads_create": {
+            "id": "thread_123",
+            "object": "thread",
+            "created_at": timestamp,
+            "metadata": {},
+        },
+        "threads_create_with_metadata": {
+            "id": "thread_123",
+            "object": "thread",
+            "created_at": timestamp,
+            "metadata": {"user_id": "123", "session": "abc"},
+        },
+        "threads_create_with_thread_id": {
+            "id": "custom_thread_456",
+            "object": "thread",
+            "created_at": timestamp,
+            "metadata": {},
+        },
+        "threads_create_with_messages": {
+            "id": "thread_123",
+            "object": "thread",
+            "created_at": timestamp,
+            "metadata": {},
+            "messages": [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there!"},
+            ],
+        },
+        "threads_update": {
+            "id": "thread_123",
+            "object": "thread",
+            "created_at": timestamp,
+            "metadata": {"user_id": "123", "session": "abc"},
+        },
+        "threads_get": {
+            "id": "thread_123",
+            "object": "thread",
+            "created_at": timestamp,
+            "metadata": {},
+        },
+        "threads_delete": {"deleted": True, "id": "thread_123"},
+        # Thread messages responses (ThreadMessagesBaseResponse type)
+        "threads_append": [
+            {
+                "id": "msg_1",
+                "created_at": timestamp,
+                "thread_id": "thread_123",
+                "role": "user",
+                "content": "Hello",
+                "name": None,
+                "tool_call_id": None,
+                "tool_calls": None,
+                "attachments": None,
+                "metadata": None,
+            },
+            {
+                "id": "msg_2",
+                "created_at": timestamp + 1,
+                "thread_id": "thread_123",
+                "role": "assistant",
+                "content": "Hi there!",
+                "name": None,
+                "tool_call_id": None,
+                "tool_calls": None,
+                "attachments": None,
+                "metadata": None,
+            },
+        ],
+        "threads_messages_list": [
+            {
+                "id": "msg_1",
+                "created_at": timestamp,
+                "thread_id": "thread_123",
+                "role": "user",
+                "content": "Hello",
+                "name": None,
+                "tool_call_id": None,
+                "tool_calls": None,
+                "attachments": None,
+                "metadata": None,
+            }
+        ],
+        # Utilities responses (EmbedResponse, ChunkResponse, ParseResponse types)
+        "embed": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
+        "chunker": ["First chunk", "Second chunk", "Third chunk"],
+        "parser": {
+            "documentName": "test.pdf",
+            "content": "Parsed document content from test.pdf",
+        },
+        # Agent run response (similar to pipe run)
+        "agent.run": {
+            "completion": "Agent response to the query",
+            "thread_id": "thread_agent123",
+            "id": "chatcmpl-agent123",
+            "object": "chat.completion",
+            "created": timestamp,
+            "model": "gpt-4",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Agent response to the query",
+                    },
+                    "logprobs": None,
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 50,
+                "completion_tokens": 50,
+                "total_tokens": 100,
+            },
+            "system_fingerprint": "fp_agent1234567890",
+        },
+        # Error responses
+        "error_400": {"error": "Bad request", "message": "Invalid parameters"},
+        "error_401": {"error": "Unauthorized", "message": "Invalid API key"},
+        "error_404": {"error": "Not found", "message": "Resource not found"},
+        "error_500": {
+            "error": "Internal server error",
+            "message": "Something went wrong",
+        },
+    }
+
+
+@pytest.fixture
+def stream_chunks():
+    """Sample streaming response chunks for SSE (Server-Sent Events) format."""
+    return [
+        b'data: {"choices":[{"delta":{"content":"Hello"},"index":0}]}\n\n',
+        b'data: {"choices":[{"delta":{"content":" world"},"index":0}]}\n\n',
+        b'data: {"choices":[{"delta":{"content":"!"},"index":0}]}\n\n',
+        b"data: [DONE]\n\n",
+    ]
+
+
+@pytest.fixture
+def upload_file_content():
+    """Sample file content for upload tests."""
+    return b"This is test document content for upload testing."
+
+
+@pytest.fixture
+def sample_thread_messages():
+    """Sample thread messages for testing."""
+    return [
+        {
+            "role": "user",
+            "content": "What is the capital of France?",
+        },
+        {
+            "role": "assistant",
+            "content": "The capital of France is Paris.",
+        },
+    ]
+
+
+@pytest.fixture
+def sample_variables():
+    """Sample variables for pipe runs."""
+    return [
+        {"name": "topic", "value": "AI ethics"},
+        {"name": "style", "value": "professional"},
+    ]
+
+
+@pytest.fixture
+def sample_tools():
+    """Sample tools definition for function calling."""
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather in a location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "The unit of temperature",
+                        },
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+
+
+@pytest.fixture
+def sample_tool_calls():
+    """Sample tool calls in a message."""
+    return [
+        {
+            "id": "call_1234567890",
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "arguments": '{"location": "San Francisco, CA", "unit": "celsius"}',
+            },
+        }
+    ]
+
+
+def create_stream_response(chunks):
+    """Helper function to create streaming response."""
+
+    def stream_generator():
+        yield from chunks
+
+    return stream_generator()
diff --git a/tests/constants.py b/tests/constants.py
new file mode 100644
index 0000000..ee2c841
--- /dev/null
+++ b/tests/constants.py
@@ -0,0 +1,12 @@
+AUTHORIZATION_HEADER = {
+    "Authorization": "Bearer test-api-key",
+}
+
+JSON_CONTENT_TYPE_HEADER = {
+    "Content-Type": "application/json",
+}
+
+AUTH_AND_JSON_CONTENT_HEADER = {
+    **AUTHORIZATION_HEADER,
+    **JSON_CONTENT_TYPE_HEADER,
+}
diff --git a/tests/test_client.py b/tests/test_client.py
deleted file mode 100644
index a4c8cf1..0000000
--- a/tests/test_client.py
+++ /dev/null
@@ -1,323 +0,0 @@
-"""
-Tests for the Langbase client.
-"""
-import os
-import unittest
-from unittest.mock import patch, MagicMock
-
-from langbase import Langbase, APIError, NotFoundError
-
-
-class TestLangbase(unittest.TestCase):
-    """Test the Langbase client."""
-
-    def setUp(self):
-        """Set up test fixtures."""
-        # Create a mock API key for testing
-        self.api_key = "test-api-key"
-        self.lb = Langbase(api_key=self.api_key)
-
-    def test_initialization_with_api_key(self):
-        """Test initialization with API key parameter."""
-        self.assertEqual(self.lb.api_key, self.api_key)
-        self.assertEqual(self.lb.base_url, "https://api.langbase.com")
-        self.assertEqual(self.lb.timeout, 30)
-
-    @patch.dict(os.environ, {"LANGBASE_API_KEY": "env-api-key"}, clear=True)
-    def test_initialization_with_env_var(self):
-        """Test initialization with environment variable."""
-        lb = Langbase()
-        self.assertEqual(lb.api_key, "env-api-key")
-
-    def test_initialization_with_no_api_key(self):
-        """Test initialization with no API key."""
-        with patch.dict(os.environ, {}, clear=True):
-            with self.assertRaises(ValueError):
-                Langbase()
-
-    @patch("langbase.request.Request.get")
-    def test_pipes_list(self, mock_get):
-        """Test pipes.list method."""
-        mock_get.return_value = [{"name": "test-pipe"}]
-        result = self.lb.pipes.list()
-        mock_get.assert_called_once_with("/v1/pipes")
-        self.assertEqual(result, [{"name": "test-pipe"}])
-
-    @patch("langbase.request.Request.post")
-    def test_pipes_create(self, mock_post):
-        """Test pipes.create method."""
-        mock_post.return_value = {"name": "new-pipe", "api_key": "pipe-api-key"}
-        result = self.lb.pipes.create(
-            name="new-pipe",
-            description="A test pipe",
-            model="anthropic:claude-3-sonnet"
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, {"name": "new-pipe", "api_key": "pipe-api-key"})
-
-    @patch("langbase.request.Request.post")
-    def test_pipes_update(self, mock_post):
-        """Test pipes.update method."""
-        mock_post.return_value = {"name": "updated-pipe"}
-        result = self.lb.pipes.update(
-            name="test-pipe",
-            temperature=0.7
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, {"name": "updated-pipe"})
-
-    @patch("langbase.request.Request.post")
-    def test_pipes_run(self, mock_post):
-        """Test pipes.run method."""
-        mock_post.return_value = {"completion": "Hello, world!"}
-        result = self.lb.pipes.run(
-            name="test-pipe",
-            messages=[{"role": "user", "content": "Hi"}]
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, {"completion": "Hello, world!"})
-
-    @patch("langbase.request.Request.post")
-    def test_pipes_run_with_no_name_or_api_key(self, mock_post):
-        """Test pipes.run method with no name or API key."""
-        with self.assertRaises(ValueError):
-            self.lb.pipes.run(messages=[{"role": "user", "content": "Hi"}])
-
-    @patch("langbase.request.Request.post")
-    def test_memories_create(self, mock_post):
-        """Test memories.create method."""
-        mock_post.return_value = {"name": "test-memory"}
-        result = self.lb.memories.create(
-            name="test-memory",
-            description="A test memory"
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, {"name": "test-memory"})
-
-    @patch("langbase.request.Request.get")
-    def test_memories_list(self, mock_get):
-        """Test memories.list method."""
-        mock_get.return_value = [{"name": "test-memory"}]
-        result = self.lb.memories.list()
-        mock_get.assert_called_once_with("/v1/memory")
-        self.assertEqual(result, [{"name": "test-memory"}])
-
-    @patch("langbase.request.Request.delete")
-    def test_memories_delete(self, mock_delete):
-        """Test memories.delete method."""
-        mock_delete.return_value = {"success": True}
-        result = self.lb.memories.delete(name="test-memory")
-        mock_delete.assert_called_once_with("/v1/memory/test-memory")
-        self.assertEqual(result, {"success": True})
-
-    @patch("langbase.request.Request.post")
-    def test_memories_retrieve(self, mock_post):
-        """Test memories.retrieve method."""
-        mock_post.return_value = [{"text": "Test text", "similarity": 0.9}]
-        result = self.lb.memories.retrieve(
-            query="test query",
-            memory=[{"name": "test-memory"}]
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, [{"text": "Test text", "similarity": 0.9}])
-
-    @patch("langbase.request.Request.get")
-    def test_memories_documents_list(self, mock_get):
-        """Test memories.documents.list method."""
-        mock_get.return_value = [{"name": "test-doc"}]
-        result = self.lb.memories.documents.list(memory_name="test-memory")
-        mock_get.assert_called_once_with("/v1/memory/test-memory/documents")
-        self.assertEqual(result, [{"name": "test-doc"}])
-
-    @patch("langbase.request.Request.delete")
-    def test_memories_documents_delete(self, mock_delete):
-        """Test memories.documents.delete method."""
-        mock_delete.return_value = {"success": True}
-        result = self.lb.memories.documents.delete(
-            memory_name="test-memory",
-            document_name="test-doc"
-        )
-        mock_delete.assert_called_once_with("/v1/memory/test-memory/documents/test-doc")
-        self.assertEqual(result, {"success": True})
-
-    @patch("langbase.request.Request.post")
-    @patch("requests.put")
-    def test_memories_documents_upload(self, mock_put, mock_post):
-        """Test memories.documents.upload method."""
-        mock_post.return_value = {"signedUrl": "https://upload-url.com"}
-        mock_put.return_value = MagicMock(ok=True)
-
-        document = b"test document content"
-        result = self.lb.memories.documents.upload(
-            memory_name="test-memory",
-            document_name="test-doc.txt",
-            document=document,
-            content_type="text/plain"
-        )
-
-        mock_post.assert_called_once()
-        mock_put.assert_called_once()
-        self.assertTrue(result.ok)
-
-    @patch("langbase.request.Request.get")
-    def test_memories_documents_embeddings_retry(self, mock_get):
-        """Test memories.documents.embeddings.retry method."""
-        mock_get.return_value = {"success": True}
-        result = self.lb.memories.documents.embeddings.retry(
-            memory_name="test-memory",
-            document_name="test-doc"
-        )
-        mock_get.assert_called_once_with(
-            "/v1/memory/test-memory/documents/test-doc/embeddings/retry"
-        )
-        self.assertEqual(result, {"success": True})
-
-    @patch("langbase.request.Request.post")
-    def test_tools_web_search(self, mock_post):
-        """Test tools.web_search method."""
-        mock_post.return_value = [{"url": "https://example.com", "content": "Example content"}]
-        result = self.lb.tools.web_search(
-            query="test query",
-            service="exa"
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, [{"url": "https://example.com", "content": "Example content"}])
-
-    @patch("langbase.request.Request.post")
-    def test_tools_crawl(self, mock_post):
-        """Test tools.crawl method."""
-        mock_post.return_value = [{"url": "https://example.com", "content": "Example content"}]
-        result = self.lb.tools.crawl(
-            url=["https://example.com"]
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, [{"url": "https://example.com", "content": "Example content"}])
-
-    @patch("langbase.request.Request.post")
-    def test_threads_create(self, mock_post):
-        """Test threads.create method."""
-        mock_post.return_value = {"id": "thread_123", "object": "thread"}
-        result = self.lb.threads.create(
-            metadata={"user_id": "123"}
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, {"id": "thread_123", "object": "thread"})
-
-    @patch("langbase.request.Request.post")
-    def test_threads_update(self, mock_post):
-        """Test threads.update method."""
-        mock_post.return_value = {"id": "thread_123", "object": "thread"}
-        result = self.lb.threads.update(
-            thread_id="thread_123",
-            metadata={"status": "complete"}
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, {"id": "thread_123", "object": "thread"})
-
-    @patch("langbase.request.Request.get")
-    def test_threads_get(self, mock_get):
-        """Test threads.get method."""
-        mock_get.return_value = {"id": "thread_123", "object": "thread"}
-        result = self.lb.threads.get(thread_id="thread_123")
-        mock_get.assert_called_once_with("/v1/threads/thread_123")
-        self.assertEqual(result, {"id": "thread_123", "object": "thread"})
-
-    @patch("langbase.request.Request.delete")
-    def test_threads_delete(self, mock_delete):
-        """Test threads.delete method."""
-        mock_delete.return_value = {"success": True}
-        result = self.lb.threads.delete(thread_id="thread_123")
-        mock_delete.assert_called_once_with("/v1/threads/thread_123")
-        self.assertEqual(result, {"success": True})
-
-    @patch("langbase.request.Request.post")
-    def test_threads_append(self, mock_post):
-        """Test threads.append method."""
-        mock_post.return_value = [{"id": "msg_123", "content": "Hello"}]
-        result = self.lb.threads.append(
-            thread_id="thread_123",
-            messages=[{"role": "user", "content": "Hello"}]
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, [{"id": "msg_123", "content": "Hello"}])
-
-    @patch("langbase.request.Request.get")
-    def test_threads_messages_list(self, mock_get):
-        """Test threads.messages.list method."""
-        mock_get.return_value = [{"id": "msg_123", "content": "Hello"}]
-        result = self.lb.threads.messages.list(thread_id="thread_123")
-        mock_get.assert_called_once_with("/v1/threads/thread_123/messages")
-        self.assertEqual(result, [{"id": "msg_123", "content": "Hello"}])
-
-    @patch("langbase.request.Request.post")
-    def test_llm_run(self, mock_post):
-        """Test llm.run method."""
-        mock_post.return_value = {"completion": "Hello, world!"}
-        result = self.lb.llm.run(
-            messages=[{"role": "user", "content": "Hi"}],
-            model="anthropic:claude-3-sonnet",
-            llm_key="llm-api-key"
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, {"completion": "Hello, world!"})
-
-    @patch("langbase.request.Request.post")
-    def test_embed(self, mock_post):
-        """Test embed method."""
-        mock_post.return_value = [[0.1, 0.2, 0.3]]
-        result = self.lb.embed(
-            chunks=["Test text"]
-        )
-        mock_post.assert_called_once()
-        self.assertEqual(result, [[0.1, 0.2, 0.3]])
-
-    @patch("requests.post")
-    def test_chunk(self, mock_post):
-        """Test chunk method."""
-        mock_response = MagicMock()
-        mock_response.ok = True
-        mock_response.json.return_value = ["Chunk 1", "Chunk 2"]
-        mock_post.return_value = mock_response
-
-        result = self.lb.chunk(
-            document=b"Test document",
-            document_name="test.txt",
-            content_type="text/plain"
-        )
-
-        mock_post.assert_called_once()
-        self.assertEqual(result, ["Chunk 1", "Chunk 2"])
-
-    @patch("requests.post")
-    def test_parse(self, mock_post):
-        """Test parse method."""
-        mock_response = MagicMock()
-        mock_response.ok = True
-        mock_response.json.return_value = {
-            "documentName": "test.txt",
-            "content": "Test content"
-        }
-        mock_post.return_value = mock_response
-
-        result = self.lb.parse(
-            document=b"Test document",
-            document_name="test.txt",
-            content_type="text/plain"
-        )
-
-        mock_post.assert_called_once()
-        self.assertEqual(result, {"documentName": "test.txt", "content": "Test content"})
-
-    @patch("langbase.request.Request.get")
-    def test_error_handling(self, mock_get):
-        """Test error handling."""
-        # Simulate a 404 error
-        mock_error = APIError(404, {"message": "Not found"}, "Not found", {})
-        mock_get.side_effect = NotFoundError(404, {"message": "Not found"}, "Not found", {})
-
-        with self.assertRaises(NotFoundError):
-            self.lb.pipes.list()
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_errors.py b/tests/test_errors.py
deleted file mode 100644
index 6608b84..0000000
--- a/tests/test_errors.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""
-Tests for error handling classes.
-"""
-import unittest
-
-from langbase.errors import (
-    APIError, APIConnectionError, APIConnectionTimeoutError,
-    BadRequestError, AuthenticationError, PermissionDeniedError,
-    NotFoundError, ConflictError, UnprocessableEntityError,
-    RateLimitError, InternalServerError
-)
-
-
-class TestErrors(unittest.TestCase):
-    """Test error handling classes."""
-
-    def test_api_error_init(self):
-        """Test APIError initialization."""
-        error = APIError(400, {"message": "Bad request"}, "Bad request", {"X-Request-ID": "123"})
-
-        self.assertEqual(error.status, 400)
-        self.assertEqual(error.error, {"message": "Bad request"})
-        self.assertEqual(error.request_id, None)  # No lb-request-id in headers
-        self.assertEqual(str(error), "400 Bad request")
-
-    def test_api_error_init_with_request_id(self):
-        """Test APIError initialization with request ID."""
-        error = APIError(400, {"message": "Bad request"}, "Bad request", {"lb-request-id": "123"})
-
-        self.assertEqual(error.status, 400)
-        self.assertEqual(error.error, {"message": "Bad request"})
-        self.assertEqual(error.request_id, "123")
-        self.assertEqual(str(error), "400 Bad request")
-
-    def test_api_error_make_message(self):
-        """Test APIError._make_message."""
-        # Message from error.message (string)
-        msg = APIError._make_message(400, {"message": "Error message"}, None)
-        self.assertEqual(msg, "400 Error message")
-
-        # Message from error.message (dict)
-        msg = APIError._make_message(400, {"message": {"detail": "Error"}}, None)
-        self.assertEqual(msg, '400 {"detail": "Error"}')
-
-        # Message from error (string)
-        msg = APIError._make_message(400, "Error message", None)
-        self.assertEqual(msg, "400 Error message")
-
-        # Message from error (dict)
-        msg = APIError._make_message(400, {"error": "Something went wrong"}, None)
-        self.assertEqual(msg, '400 {"error": "Something went wrong"}')
-
-        # Message from message parameter
-        msg = APIError._make_message(400, None, "Error message")
-        self.assertEqual(msg, "400 Error message")
-
-        # Status only
-        msg = APIError._make_message(400, None, None)
-        self.assertEqual(msg, "400 status code (no body)")
-
-        # Message only
-        msg = APIError._make_message(None, None, "Error message")
-        self.assertEqual(msg, "Error message")
-
-        # No information
-        msg = APIError._make_message(None, None, None)
-        self.assertEqual(msg, "(no status code or body)")
-
-    def test_api_error_generate(self):
-        """Test APIError.generate."""
-        # No status (connection error)
-        error = APIError.generate(None, None, "Connection error", {})
-        self.assertIsInstance(error, APIConnectionError)
-
-        # 400 Bad Request
-        error = APIError.generate(400, {"error": "Bad request"}, None, {})
-        self.assertIsInstance(error, BadRequestError)
-
-        # 401 Authentication Error
-        error = APIError.generate(401, {"error": "Unauthorized"}, None, {})
-        self.assertIsInstance(error, AuthenticationError)
-
-        # 403 Permission Denied
-        error = APIError.generate(403, {"error": "Forbidden"}, None, {})
-        self.assertIsInstance(error, PermissionDeniedError)
-
-        # 404 Not Found
-        error = APIError.generate(404, {"error": "Not found"}, None, {})
-        self.assertIsInstance(error, NotFoundError)
-
-        # 409 Conflict
-        error = APIError.generate(409, {"error": "Conflict"}, None, {})
-        self.assertIsInstance(error, ConflictError)
-
-        # 422 Unprocessable Entity
-        error = APIError.generate(422, {"error": "Invalid data"}, None, {})
-        self.assertIsInstance(error, UnprocessableEntityError)
-
-        # 429 Rate Limit
-        error = APIError.generate(429, {"error": "Too many requests"}, None, {})
-        self.assertIsInstance(error, RateLimitError)
-
-        # 500 Internal Server Error
-        error = APIError.generate(500, {"error": "Server error"}, None, {})
-        self.assertIsInstance(error, InternalServerError)
-
-        # Other status code
-        error = APIError.generate(418, {"error": "I'm a teapot"}, None, {})
-        self.assertIsInstance(error, APIError)
-        self.assertEqual(error.status, 418)
-
-    def test_api_connection_error(self):
-        """Test APIConnectionError."""
-        error = APIConnectionError()
-        self.assertEqual(str(error), "Connection error.")
-        self.assertIsNone(error.status)
-
-        error = APIConnectionError("Custom message")
-        self.assertEqual(str(error), "Custom message")
-
-        cause = ValueError("Underlying error")
-        error = APIConnectionError(cause=cause)
-        self.assertEqual(error.__cause__, cause)
-
-    def test_api_connection_timeout_error(self):
-        """Test APIConnectionTimeoutError."""
-        error = APIConnectionTimeoutError()
-        self.assertEqual(str(error), "Request timed out.")
-
-        error = APIConnectionTimeoutError("Custom timeout message")
-        self.assertEqual(str(error), "Custom timeout message")
-
-    def test_error_subclasses(self):
-        """Test error subclasses."""
-        # Check that all error subclasses have the expected status code
-        self.assertEqual(BadRequestError(400, None, None, None).status, 400)
-        self.assertEqual(AuthenticationError(401, None, None, None).status, 401)
-        self.assertEqual(PermissionDeniedError(403, None, None, None).status, 403)
-        self.assertEqual(NotFoundError(404, None, None, None).status, 404)
-        self.assertEqual(ConflictError(409, None, None, None).status, 409)
-        self.assertEqual(UnprocessableEntityError(422, None, None, None).status, 422)
-        self.assertEqual(RateLimitError(429, None, None, None).status, 429)
-
-        # InternalServerError can have any 5xx status
-        error = InternalServerError(500, None, None, None)
-        self.assertEqual(error.status, 500)
-
-        error = InternalServerError(503, None, None, None)
-        self.assertEqual(error.status, 503)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_langbase_client.py b/tests/test_langbase_client.py
new file mode 100644
index 0000000..8db5789
--- /dev/null
+++ b/tests/test_langbase_client.py
@@ -0,0 +1,66 @@
+"""
+Tests for Langbase client initialization and configuration.
+"""
+
+from langbase import Langbase
+
+
+class TestLangbaseClient:
+    """Test Langbase client initialization and configuration."""
+
+    def test_initialization_with_api_key(self):
+        """Test initialization with API key parameter."""
+        client = Langbase(api_key="test-api-key")
+        assert client.api_key == "test-api-key"
+        assert client.base_url == "https://api.langbase.com"
+        assert hasattr(client, "pipes")
+        assert hasattr(client, "memories")
+        assert hasattr(client, "tools")
+        assert hasattr(client, "threads")
+
+    def test_request_instance_creation(self, langbase_client):
+        """Test that request instance is properly created."""
+        assert hasattr(langbase_client, "request")
+        assert langbase_client.request.api_key == "test-api-key"
+        assert langbase_client.request.base_url == "https://api.langbase.com"
+
+    def test_nested_class_initialization(self, langbase_client):
+        """Test that nested classes are properly initialized."""
+        # Test pipes
+        assert hasattr(langbase_client.pipes, "list")
+        assert hasattr(langbase_client.pipes, "create")
+        assert hasattr(langbase_client.pipes, "update")
+        assert hasattr(langbase_client.pipes, "run")
+
+        # Test memories
+        assert hasattr(langbase_client.memories, "create")
+        assert hasattr(langbase_client.memories, "list")
+        assert hasattr(langbase_client.memories, "delete")
+        assert hasattr(langbase_client.memories, "retrieve")
+        assert hasattr(langbase_client.memories, "documents")
+
+        # Test memory documents
+        assert hasattr(langbase_client.memories.documents, "list")
+        assert hasattr(langbase_client.memories.documents, "delete")
+        assert hasattr(langbase_client.memories.documents, "upload")
+        assert hasattr(langbase_client.memories.documents, "embeddings")
+
+        # Test tools
+        assert hasattr(langbase_client.tools, "crawl")
+        assert hasattr(langbase_client.tools, "web_search")
+
+        # Test threads
+        assert hasattr(langbase_client.threads, "create")
+        assert hasattr(langbase_client.threads, "update")
+        assert hasattr(langbase_client.threads, "get")
+        assert hasattr(langbase_client.threads, "delete")
+        assert hasattr(langbase_client.threads, "append")
+        assert hasattr(langbase_client.threads, "messages")
+
+    def test_utility_methods_available(self, langbase_client):
+        """Test that utility methods are available on the client."""
+        assert hasattr(langbase_client, "embed")
+        assert hasattr(langbase_client, "chunker")
+        assert hasattr(langbase_client, "parser")
+        assert hasattr(langbase_client, "agent")
+        assert hasattr(langbase_client.agent, "run")
diff --git a/tests/test_memories.py b/tests/test_memories.py
new file mode 100644
index 0000000..e6f1e6c
--- /dev/null
+++ b/tests/test_memories.py
@@ -0,0 +1,267 @@
+"""
+Tests for the Memories API.
+"""
+
+import json
+
+import responses
+
+from langbase.constants import (
+    BASE_URL,
+    MEMORY_DETAIL_ENDPOINT,
+    MEMORY_DOCUMENT_DETAIL_ENDPOINT,
+    MEMORY_DOCUMENT_EMBEDDINGS_RETRY_ENDPOINT,
+    MEMORY_DOCUMENTS_ENDPOINT,
+    MEMORY_DOCUMENTS_UPLOAD_ENDPOINT,
+    MEMORY_ENDPOINT,
+    MEMORY_RETRIEVE_ENDPOINT,
+)
+from langbase.types import (
+    MemoryCreateResponse,
+    MemoryDeleteResponse,
+    MemoryListDocResponse,
+    MemoryListResponse,
+    MemoryRetrieveResponse,
+    MemoryRetryDocEmbedResponse,
+)
+from tests.constants import (
+    AUTH_AND_JSON_CONTENT_HEADER,
+    AUTHORIZATION_HEADER,
+    JSON_CONTENT_TYPE_HEADER,
+)
+from tests.validation_utils import validate_response_headers
+
+
+class TestMemories:
+    """Test the Memories API."""
+
+    @responses.activate
+    def test_memories_list(self, langbase_client, mock_responses):
+        """Test memories.list method."""
+        responses.add(
+            responses.GET,
+            f"{BASE_URL}{MEMORY_ENDPOINT}",
+            json=mock_responses["memory_list"],
+            status=200,
+        )
+
+        result = langbase_client.memories.list()
+
+        assert result == mock_responses["memory_list"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_memories_create(self, langbase_client, mock_responses):
+        """Test memories.create method."""
+        request_body = {
+            "name": "new-memory",
+            "description": "A test memory",
+            "embedding_model": "openai:text-embedding-ada-002",
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{MEMORY_ENDPOINT}",
+            json=mock_responses["memory_create"],
+            status=201,
+        )
+
+        result = langbase_client.memories.create(**request_body)
+
+        assert result == mock_responses["memory_create"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
+
+    @responses.activate
+    def test_memories_delete(self, langbase_client, mock_responses):
+        """Test memories.delete method."""
+        memory_name = "test-memory"
+
+        responses.add(
+            responses.DELETE,
+            f"{BASE_URL}{MEMORY_DETAIL_ENDPOINT.format(name=memory_name)}",
+            json=mock_responses["memory_delete"],
+            status=200,
+        )
+
+        result = langbase_client.memories.delete(memory_name)
+
+        assert result == mock_responses["memory_delete"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_memories_retrieve(self, langbase_client, mock_responses):
+        """Test memories.retrieve method."""
+        request_body = {
+            "query": "test query",
+            "memory": [{"name": "memory1"}, {"name": "memory2"}],
+            "topK": 5,
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{MEMORY_RETRIEVE_ENDPOINT}",
+            json=mock_responses["memory_retrieve"],
+            status=200,
+        )
+
+        result = langbase_client.memories.retrieve(
+            query=request_body["query"],
+            memory=request_body["memory"],
+            top_k=5,
+        )
+
+        assert result == mock_responses["memory_retrieve"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
+
+
+class TestMemoryDocuments:
+    """Test the Memory Documents API."""
+
+    @responses.activate
+    def test_documents_list(self, langbase_client, mock_responses):
+        """Test documents.list method."""
+        memory_name = "test-memory"
+
+        responses.add(
+            responses.GET,
+            f"{BASE_URL}{MEMORY_DOCUMENTS_ENDPOINT.format(memory_name=memory_name)}",
+            json=mock_responses["memory_docs_list"],
+            status=200,
+        )
+
+        result = langbase_client.memories.documents.list(memory_name)
+
+        assert result == mock_responses["memory_docs_list"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_documents_delete(self, langbase_client, mock_responses):
+        """Test documents.delete method."""
+        memory_name = "test-memory"
+        document_name = "test-doc.txt"
+
+        responses.add(
+            responses.DELETE,
+            f"{BASE_URL}{MEMORY_DOCUMENT_DETAIL_ENDPOINT.format(memory_name=memory_name, document_name=document_name)}",
+            json=mock_responses["memory_docs_delete"],
+            status=200,
+        )
+
+        result = langbase_client.memories.documents.delete(memory_name, document_name)
+
+        assert result == mock_responses["memory_docs_delete"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_documents_upload_simple(
+        self, langbase_client, mock_responses, upload_file_content
+    ):
+        """Test documents.upload method."""
+        memory_name = "test-memory"
+        document_name = "test-doc.txt"
+
+        # Mock the signed URL request
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{MEMORY_DOCUMENTS_UPLOAD_ENDPOINT}",
+            json=mock_responses["memory_docs_upload_signed_url"],
+            status=200,
+        )
+
+        # Mock the file upload to signed URL
+        responses.add(
+            responses.PUT,
+            "https://storage.langbase.com/upload?signature=xyz",
+            status=200,
+        )
+
+        result = langbase_client.memories.documents.upload(
+            memory_name=memory_name,
+            document_name=document_name,
+            document=upload_file_content,
+            content_type="text/plain",
+        )
+
+        assert len(responses.calls) == 2
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert responses.calls[1].request.body == upload_file_content
+        validate_response_headers(
+            responses.calls[1].request.headers,
+            {**AUTHORIZATION_HEADER, "Content-Type": "text/plain"},
+        )
+
+    @responses.activate
+    def test_documents_upload_with_metadata(
+        self, langbase_client, mock_responses, upload_file_content
+    ):
+        """Test documents.upload method with metadata."""
+        memory_name = "test-memory"
+        document_name = "test-doc.txt"
+        metadata = {"author": "test", "category": "documentation"}
+
+        # Mock the signed URL request
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{MEMORY_DOCUMENTS_UPLOAD_ENDPOINT}",
+            json=mock_responses["memory_docs_upload_signed_url"],
+            status=200,
+        )
+
+        # Mock the file upload to signed URL
+        responses.add(
+            responses.PUT,
+            "https://storage.langbase.com/upload?signature=xyz",
+            status=200,
+        )
+
+        result = langbase_client.memories.documents.upload(
+            memory_name=memory_name,
+            document_name=document_name,
+            document=upload_file_content,
+            content_type="text/plain",
+            meta=metadata,
+        )
+
+        signed_url_request = responses.calls[0].request
+        validate_response_headers(
+            signed_url_request.headers, AUTH_AND_JSON_CONTENT_HEADER
+        )
+        request_json = json.loads(signed_url_request.body)
+        assert request_json["meta"] == metadata
+
+    @responses.activate
+    def test_documents_embeddings_retry(self, langbase_client, mock_responses):
+        """Test documents.embeddings.retry method."""
+        memory_name = "test-memory"
+        document_name = "test-doc.txt"
+
+        responses.add(
+            responses.GET,
+            f"{BASE_URL}{MEMORY_DOCUMENT_EMBEDDINGS_RETRY_ENDPOINT.format(memory_name=memory_name, document_name=document_name)}",
+            json=mock_responses["memory_docs_embeddings_retry"],
+            status=200,
+        )
+
+        result = langbase_client.memories.documents.embeddings.retry(
+            memory_name, document_name
+        )
+
+        assert result == mock_responses["memory_docs_embeddings_retry"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
diff --git a/tests/test_pipes.py b/tests/test_pipes.py
new file mode 100644
index 0000000..4d9c2a3
--- /dev/null
+++ b/tests/test_pipes.py
@@ -0,0 +1,263 @@
+"""
+Tests for the Pipes API.
+"""
+
+import json
+
+import pytest
+import responses
+
+from langbase import Langbase
+from langbase.constants import BASE_URL, PIPES_ENDPOINT
+from tests.constants import (
+    AUTH_AND_JSON_CONTENT_HEADER,
+    AUTHORIZATION_HEADER,
+    JSON_CONTENT_TYPE_HEADER,
+)
+from tests.validation_utils import validate_response_headers
+
+
+class TestPipes:
+    """Test the Pipes API."""
+
+    @responses.activate
+    def test_pipes_list(self, langbase_client, mock_responses):
+        """Test pipes.list method."""
+        responses.add(
+            responses.GET,
+            f"{BASE_URL}{PIPES_ENDPOINT}",
+            json=mock_responses["pipe_list"],
+            status=200,
+        )
+
+        result = langbase_client.pipes.list()
+
+        assert result == mock_responses["pipe_list"]
+        request = responses.calls[0].request
+        assert len(responses.calls) == 1
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_pipes_create(self, langbase_client, mock_responses):
+        """Test pipes.create method."""
+        request_body = {
+            "name": "new-pipe",
+            "description": "A test pipe",
+            "model": "anthropic:claude-3-sonnet",
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{PIPES_ENDPOINT}",
+            json=mock_responses["pipe_create"],
+            status=201,
+        )
+
+        result = langbase_client.pipes.create(**request_body)
+        request = responses.calls[0].request
+        assert result == mock_responses["pipe_create"]
+        assert len(responses.calls) == 1
+        assert json.loads(request.body) == request_body
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_pipes_update(self, langbase_client, mock_responses):
+        """Test pipes.update method."""
+        pipe_name = "test-pipe"
+        request_body = {"temperature": 0.7, "description": "Updated description"}
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{PIPES_ENDPOINT}/{pipe_name}",
+            json={**mock_responses["pipe_create"], **request_body},
+            status=200,
+        )
+
+        result = langbase_client.pipes.update(name=pipe_name, **request_body)
+        request = responses.calls[0].request
+
+        assert result == {**mock_responses["pipe_create"], **request_body}
+        assert len(responses.calls) == 1
+        assert json.loads(request.body) == {
+            "name": pipe_name,
+            **request_body,
+        }
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_pipes_run_basic(self, langbase_client, mock_responses):
+        """Test pipes.run method with basic parameters."""
+        messages = [{"role": "user", "content": "Hello"}]
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{PIPES_ENDPOINT}/run",
+            json=mock_responses["pipe_run"],
+            status=200,
+        )
+
+        request_body = {
+            "name": "test-pipe",
+            "messages": messages,
+        }
+
+        result = langbase_client.pipes.run(**request_body)
+        request = responses.calls[0].request
+
+        assert result == mock_responses["pipe_run"]
+        assert len(responses.calls) == 1
+
+        # Validate body.
+        assert json.loads(request.body) == request_body
+
+        # Validate headers.
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_pipes_run_with_api_key(self, langbase_client, mock_responses):
+        """Test pipes.run method with pipe API key."""
+        messages = [{"role": "user", "content": "Hello"}]
+
+        request_body = {"messages": messages}
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{PIPES_ENDPOINT}/run",
+            json=mock_responses["pipe_run"],
+            status=200,
+        )
+
+        result = langbase_client.pipes.run(api_key="pipe-specific-key", **request_body)
+        request = responses.calls[0].request
+
+        assert result == mock_responses["pipe_run"]
+        assert len(responses.calls) == 1
+
+        assert json.loads(request.body) == {
+            **request_body,
+            "api_key": "pipe-specific-key",
+        }
+        validate_response_headers(
+            request.headers,
+            {
+                **AUTH_AND_JSON_CONTENT_HEADER,
+                "Authorization": "Bearer pipe-specific-key",
+            },
+        )
+
+    @responses.activate
+    def test_pipes_run_streaming(self, langbase_client, stream_chunks):
+        """Test pipes.run method with streaming."""
+        messages = [{"role": "user", "content": "Hello"}]
+
+        request_body = {"name": "test-pipe", "messages": messages, "stream": True}
+
+        # Create streaming response
+        stream_content = b"".join(stream_chunks)
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{PIPES_ENDPOINT}/run",
+            body=stream_content,
+            status=200,
+            headers={
+                "Content-Type": "text/event-stream",
+            },
+        )
+
+        result = langbase_client.pipes.run(**request_body)
+        request = responses.calls[0].request
+
+        assert hasattr(result["stream"], "__iter__")
+        assert len(responses.calls) == 1
+
+        # Validate body
+        assert json.loads(request.body) == request_body
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_pipes_run_with_llm_key(self, langbase_client, mock_responses):
+        """Test pipes.run method with LLM key header."""
+        messages = [{"role": "user", "content": "Hello"}]
+
+        request_body = {"name": "test-pipe", "messages": messages}
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{PIPES_ENDPOINT}/run",
+            json=mock_responses["pipe_run"],
+            status=200,
+        )
+
+        result = langbase_client.pipes.run(llm_key="custom-llm-key", **request_body)
+        request = responses.calls[0].request
+
+        assert result == mock_responses["pipe_run"]
+        assert len(responses.calls) == 1
+
+        # Validate body
+        assert json.loads(request.body) == request_body
+
+        validate_response_headers(
+            request.headers,
+            {**AUTH_AND_JSON_CONTENT_HEADER, "LB-LLM-KEY": "custom-llm-key"},
+        )
+
+    @responses.activate
+    def test_pipes_run_with_all_parameters(self, langbase_client, mock_responses):
+        """Test pipes.run method with all possible parameters."""
+        request_body = {
+            "name": "test-pipe",
+            "messages": [{"role": "user", "content": "Hello"}],
+            "temperature": 0.7,
+            "max_tokens": 100,
+            "top_p": 0.9,
+            "stream": False,
+            "variables": {"var1": "value1"},
+            "thread_id": "existing_thread",
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{PIPES_ENDPOINT}/run",
+            json=mock_responses["pipe_run"],
+            status=200,
+        )
+
+        result = langbase_client.pipes.run(**request_body)
+        request = responses.calls[0].request
+
+        assert result == mock_responses["pipe_run"]
+        assert len(responses.calls) == 1
+
+        # Verify all parameters were included in request
+        assert json.loads(request.body) == request_body
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_pipes_run_stream_parameter_not_included_when_false(
+        self, langbase_client, mock_responses
+    ):
+        """Test that stream parameter is included in request when explicitly set to False."""
+        request_body = {
+            "name": "test-pipe",
+            "messages": [{"role": "user", "content": "Hello"}],
+            "stream": False,
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{PIPES_ENDPOINT}/run",
+            json=mock_responses["pipe_run"],
+            status=200,
+        )
+
+        result = langbase_client.pipes.run(**request_body)
+        request = responses.calls[0].request
+
+        assert result == mock_responses["pipe_run"]
+        assert len(responses.calls) == 1
+
+        # Validate body - stream should be included when explicitly set to False
+        assert json.loads(request.body) == request_body
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
diff --git a/tests/test_request.py b/tests/test_request.py
deleted file mode 100644
index 4a9e3c5..0000000
--- a/tests/test_request.py
+++ /dev/null
@@ -1,245 +0,0 @@
-"""
-Tests for the Request class.
-"""
-import unittest
-from unittest.mock import patch, MagicMock
-
-import requests
-
-from langbase.errors import (
-    APIError, APIConnectionError, BadRequestError,
-    NotFoundError, AuthenticationError
-)
-from langbase.request import Request
-
-
-class TestRequest(unittest.TestCase):
-    """Test the Request class."""
-
-    def setUp(self):
-        """Set up test fixtures."""
-        self.config = {
-            "api_key": "test-api-key",
-            "base_url": "https://api.langbase.com",
-            "timeout": 30
-        }
-        self.request = Request(self.config)
-
-    def test_initialization(self):
-        """Test initialization."""
-        self.assertEqual(self.request.api_key, "test-api-key")
-        self.assertEqual(self.request.base_url, "https://api.langbase.com")
-        self.assertEqual(self.request.timeout, 30)
-
-    def test_build_url(self):
-        """Test build_url method."""
-        url = self.request.build_url("/test")
-        self.assertEqual(url, "https://api.langbase.com/test")
-
-    def test_build_headers(self):
-        """Test build_headers method."""
-        headers = self.request.build_headers()
-        self.assertEqual(headers["Content-Type"], "application/json")
-        self.assertEqual(headers["Authorization"], "Bearer test-api-key")
-
-        # Test with additional headers
-        headers = self.request.build_headers({"X-Custom": "Value"})
-        self.assertEqual(headers["Content-Type"], "application/json")
-        self.assertEqual(headers["Authorization"], "Bearer test-api-key")
-        self.assertEqual(headers["X-Custom"], "Value")
-
-    @patch("requests.request")
-    def test_make_request(self, mock_request):
-        """Test make_request method."""
-        mock_response = MagicMock()
-        mock_response.status_code = 200
-        mock_request.return_value = mock_response
-
-        response = self.request.make_request(
-            "https://api.langbase.com/test",
-            "GET",
-            {"Authorization": "Bearer test-api-key"}
-        )
-
-        mock_request.assert_called_once_with(
-            method="GET",
-            url="https://api.langbase.com/test",
-            headers={"Authorization": "Bearer test-api-key"},
-            json=None,
-            timeout=30,
-            stream=False
-        )
-        self.assertEqual(response, mock_response)
-
-    @patch("requests.request")
-    def test_make_request_connection_error(self, mock_request):
-        """Test make_request method with connection error."""
-        mock_request.side_effect = requests.RequestException("Connection error")
-
-        with self.assertRaises(APIConnectionError):
-            self.request.make_request(
-                "https://api.langbase.com/test",
-                "GET",
-                {"Authorization": "Bearer test-api-key"}
-            )
-
-    def test_handle_error_response(self):
-        """Test handle_error_response method."""
-        # Test with JSON response
-        mock_response = MagicMock()
-        mock_response.status_code = 404
-        mock_response.reason = "Not Found"
-        mock_response.headers = {}
-        mock_response.json.return_value = {"error": "Resource not found"}
-
-        with self.assertRaises(NotFoundError):
-            self.request.handle_error_response(mock_response)
-
-        # Test with text response
-        mock_response = MagicMock()
-        mock_response.status_code = 400
-        mock_response.reason = "Bad Request"
-        mock_response.headers = {}
-        mock_response.json.side_effect = ValueError
-        mock_response.text = "Bad request error"
-
-        with self.assertRaises(BadRequestError):
-            self.request.handle_error_response(mock_response)
-
-    def test_handle_stream_response(self):
-        """Test handle_stream_response method."""
-        mock_response = MagicMock()
-        mock_response.iter_lines.return_value = [b"line1", b"line2"]
-        mock_response.headers = {"lb-thread-id": "thread_123"}
-
-        result = self.request.handle_stream_response(mock_response)
-
-        self.assertEqual(result["thread_id"], "thread_123")
-        self.assertEqual(list(result["stream"]), [b"line1", b"line2"])
-
-    def test_handle_run_response_stream(self):
-        """Test handle_run_response_stream method."""
-        mock_response = MagicMock()
-        mock_response.iter_lines.return_value = [b"chunk1", b"chunk2"]
-        mock_response.headers = {
-            "lb-thread-id": "thread_123",
-            "content-type": "text/event-stream"
-        }
-
-        # Test without raw_response
-        result = self.request.handle_run_response_stream(mock_response)
-        self.assertEqual(result["thread_id"], "thread_123")
-        self.assertEqual(list(result["stream"]), [b"chunk1", b"chunk2"])
-        self.assertNotIn("raw_response", result)
-
-        # Test with raw_response
-        result = self.request.handle_run_response_stream(mock_response, raw_response=True)
-        self.assertEqual(result["thread_id"], "thread_123")
-        self.assertEqual(list(result["stream"]), [b"chunk1", b"chunk2"])
-        self.assertIn("raw_response", result)
-        self.assertEqual(
-            result["raw_response"]["headers"],
-            {"lb-thread-id": "thread_123", "content-type": "text/event-stream"}
-        )
-
-    def test_handle_run_response(self):
-        """Test handle_run_response method."""
-        mock_response = MagicMock()
-        mock_response.json.return_value = {"completion": "Hello, world!"}
-        mock_response.headers = {"lb-thread-id": "thread_123"}
-
-        # Test with thread_id, without raw_response
-        result = self.request.handle_run_response(mock_response, "thread_123")
-        self.assertEqual(result["completion"], "Hello, world!")
-        self.assertEqual(result["thread_id"], "thread_123")
-        self.assertNotIn("raw_response", result)
-
-        # Test with thread_id and raw_response
-        result = self.request.handle_run_response(mock_response, "thread_123", True)
-        self.assertEqual(result["completion"], "Hello, world!")
-        self.assertEqual(result["thread_id"], "thread_123")
-        self.assertIn("raw_response", result)
-        self.assertEqual(
-            result["raw_response"]["headers"],
-            {"lb-thread-id": "thread_123"}
-        )
-
-        # Test with raw field in response
-        mock_response.json.return_value = {
-            "completion": "Hello, world!",
-            "raw": {"id": "123", "model": "test-model"}
-        }
-        result = self.request.handle_run_response(mock_response, "thread_123")
-        self.assertEqual(result["completion"], "Hello, world!")
-        self.assertEqual(result["id"], "123")
-        self.assertEqual(result["model"], "test-model")
-        self.assertEqual(result["thread_id"], "thread_123")
-
-    @patch.object(Request, "make_request")
-    @patch.object(Request, "build_url")
-    @patch.object(Request, "build_headers")
-    def test_send(self, mock_build_headers, mock_build_url, mock_make_request):
-        """Test send method."""
-        mock_build_url.return_value = "https://api.langbase.com/test"
-        mock_build_headers.return_value = {"Authorization": "Bearer test-api-key"}
-
-        mock_response = MagicMock()
-        mock_response.ok = True
-        mock_response.json.return_value = {"result": "success"}
-        mock_response.headers = {}
-        mock_make_request.return_value = mock_response
-
-        # Test normal endpoint
-        result = self.request.send("/test", "GET")
-        mock_build_url.assert_called_with("/test")
-        mock_build_headers.assert_called_with(None)
-        mock_make_request.assert_called_with(
-            "https://api.langbase.com/test",
-            "GET",
-            {"Authorization": "Bearer test-api-key"},
-            None,
-            False
-        )
-        self.assertEqual(result, {"result": "success"})
-
-        # Test generation endpoint
-        mock_response.headers = {"lb-thread-id": "thread_123"}
-        mock_build_url.return_value = "https://api.langbase.com/v1/pipes/run"
-        result = self.request.send("/v1/pipes/run", "POST", body={"messages": []})
-        self.assertEqual(result["thread_id"], "thread_123")
-
-    @patch.object(Request, "send")
-    def test_post(self, mock_send):
-        """Test post method."""
-        mock_send.return_value = {"result": "success"}
-        result = self.request.post("/test", {"key": "value"}, {"X-Custom": "Value"})
-        mock_send.assert_called_with("/test", "POST", {"X-Custom": "Value"}, {"key": "value"}, False)
-        self.assertEqual(result, {"result": "success"})
-
-    @patch.object(Request, "send")
-    def test_get(self, mock_send):
-        """Test get method."""
-        mock_send.return_value = {"result": "success"}
-        result = self.request.get("/test", {"X-Custom": "Value"})
-        mock_send.assert_called_with("/test", "GET", {"X-Custom": "Value"})
-        self.assertEqual(result, {"result": "success"})
-
-    @patch.object(Request, "send")
-    def test_put(self, mock_send):
-        """Test put method."""
-        mock_send.return_value = {"result": "success"}
-        result = self.request.put("/test", {"key": "value"}, {"X-Custom": "Value"})
-        mock_send.assert_called_with("/test", "PUT", {"X-Custom": "Value"}, {"key": "value"})
-        self.assertEqual(result, {"result": "success"})
-
-    @patch.object(Request, "send")
-    def test_delete(self, mock_send):
-        """Test delete method."""
-        mock_send.return_value = {"result": "success"}
-        result = self.request.delete("/test", {"X-Custom": "Value"})
-        mock_send.assert_called_with("/test", "DELETE", {"X-Custom": "Value"})
-        self.assertEqual(result, {"result": "success"})
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_threads.py b/tests/test_threads.py
new file mode 100644
index 0000000..0eb64a0
--- /dev/null
+++ b/tests/test_threads.py
@@ -0,0 +1,231 @@
+"""
+Tests for the Threads API.
+"""
+
+import json
+
+import pytest
+import responses
+
+from langbase.constants import (
+    BASE_URL,
+    THREAD_DETAIL_ENDPOINT,
+    THREAD_MESSAGES_ENDPOINT,
+    THREADS_ENDPOINT,
+)
+from langbase.errors import NotFoundError
+from tests.constants import (
+    AUTH_AND_JSON_CONTENT_HEADER,
+    AUTHORIZATION_HEADER,
+    JSON_CONTENT_TYPE_HEADER,
+)
+from tests.validation_utils import validate_response_headers
+
+
+class TestThreads:
+    """Test the Threads API."""
+
+    @responses.activate
+    def test_threads_create_basic(self, langbase_client, mock_responses):
+        """Test threads.create method with basic parameters."""
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{THREADS_ENDPOINT}",
+            json=mock_responses["threads_create"],
+            status=200,
+        )
+
+        result = langbase_client.threads.create({})
+
+        assert result == mock_responses["threads_create"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+
+    @responses.activate
+    def test_threads_create_with_metadata(self, langbase_client, mock_responses):
+        """Test threads.create method with metadata."""
+        request_body = {"metadata": {"user_id": "123", "session": "abc"}}
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{THREADS_ENDPOINT}",
+            json=mock_responses["threads_create_with_metadata"],
+            status=200,
+        )
+
+        result = langbase_client.threads.create(metadata=request_body["metadata"])
+
+        assert result == mock_responses["threads_create_with_metadata"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
+
+    @responses.activate
+    def test_threads_create_with_thread_id(self, langbase_client, mock_responses):
+        """Test threads.create method with specific thread ID."""
+        thread_id = "custom_thread_456"
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{THREADS_ENDPOINT}",
+            json=mock_responses["threads_create_with_thread_id"],
+            status=200,
+        )
+
+        result = langbase_client.threads.create(thread_id=thread_id)
+
+        assert result == mock_responses["threads_create_with_thread_id"]
+
+        # Verify thread_id was included
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        print("request.body", request.body)
+        assert json.loads(request.body) == {"threadId": thread_id}
+
+    @responses.activate
+    def test_threads_create_with_messages(self, langbase_client, mock_responses):
+        """Test threads.create method with initial messages."""
+        request_body = {
+            "messages": [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there!"},
+            ]
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{THREADS_ENDPOINT}",
+            json=mock_responses["threads_create_with_messages"],
+            status=200,
+        )
+
+        result = langbase_client.threads.create(messages=request_body["messages"])
+
+        assert result == mock_responses["threads_create_with_messages"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
+
+    @responses.activate
+    def test_threads_update(self, langbase_client, mock_responses):
+        """Test threads.update method."""
+        request_data = {
+            "thread_id": "thread_123",
+            "metadata": {"user_id": "123", "session": "abc"},
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{THREAD_DETAIL_ENDPOINT.format(thread_id=request_data['thread_id'])}",
+            json=mock_responses["threads_update"],
+            status=200,
+        )
+
+        result = langbase_client.threads.update(**request_data)
+
+        assert result == mock_responses["threads_update"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert (
+            request.url
+            == f"{BASE_URL}{THREAD_DETAIL_ENDPOINT.format(thread_id=request_data['thread_id'])}"
+        )
+        assert json.loads(request.body) == {"metadata": request_data["metadata"]}
+
+    @responses.activate
+    def test_threads_get(self, langbase_client, mock_responses):
+        """Test threads.get method."""
+        thread_id = "thread_123"
+
+        responses.add(
+            responses.GET,
+            f"{BASE_URL}{THREAD_DETAIL_ENDPOINT.format(thread_id=thread_id)}",
+            json=mock_responses["threads_get"],
+            status=200,
+        )
+
+        result = langbase_client.threads.get(thread_id)
+
+        assert result == mock_responses["threads_get"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert (
+            request.url
+            == f"{BASE_URL}{THREAD_DETAIL_ENDPOINT.format(thread_id=thread_id)}"
+        )
+
+    @responses.activate
+    def test_threads_delete(self, langbase_client, mock_responses):
+        """Test threads.delete method."""
+        thread_id = "thread_123"
+
+        responses.add(
+            responses.DELETE,
+            f"{BASE_URL}{THREAD_DETAIL_ENDPOINT.format(thread_id=thread_id)}",
+            json=mock_responses["threads_delete"],
+            status=200,
+        )
+
+        result = langbase_client.threads.delete(thread_id)
+
+        assert result == mock_responses["threads_delete"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert (
+            request.url
+            == f"{BASE_URL}{THREAD_DETAIL_ENDPOINT.format(thread_id=thread_id)}"
+        )
+
+    @responses.activate
+    def test_threads_messages_list(self, langbase_client, mock_responses):
+        """Test threads.messages.list method."""
+        thread_id = "thread_123"
+
+        responses.add(
+            responses.GET,
+            f"{BASE_URL}{THREAD_MESSAGES_ENDPOINT.format(thread_id=thread_id)}",
+            json=mock_responses["threads_messages_list"],
+            status=200,
+        )
+
+        result = langbase_client.threads.messages.list(thread_id)
+
+        assert result == mock_responses["threads_messages_list"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert (
+            request.url
+            == f"{BASE_URL}{THREAD_MESSAGES_ENDPOINT.format(thread_id=thread_id)}"
+        )
+
+    @responses.activate
+    def test_threads_append(self, langbase_client, mock_responses):
+        """Test threads.append method."""
+        thread_id = "thread_123"
+        request_body = {"messages": [{"role": "user", "content": "New message"}]}
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{THREAD_MESSAGES_ENDPOINT.format(thread_id=thread_id)}",
+            json=mock_responses["threads_append"],
+            status=200,
+        )
+
+        result = langbase_client.threads.append(thread_id, request_body["messages"])
+
+        assert result == mock_responses["threads_append"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body["messages"]
+        assert (
+            request.url
+            == f"{BASE_URL}{THREAD_MESSAGES_ENDPOINT.format(thread_id=thread_id)}"
+        )
diff --git a/tests/test_tools.py b/tests/test_tools.py
new file mode 100644
index 0000000..fdc32a6
--- /dev/null
+++ b/tests/test_tools.py
@@ -0,0 +1,94 @@
+"""
+Tests for the Tools.
+"""
+
+import json
+
+import responses
+
+from langbase.constants import BASE_URL, TOOLS_CRAWL_ENDPOINT, TOOLS_WEB_SEARCH_ENDPOINT
+from tests.constants import AUTH_AND_JSON_CONTENT_HEADER
+from tests.validation_utils import validate_response_headers
+
+
+class TestTools:
+    """Test the Tools API."""
+
+    @responses.activate
+    def test_tools_web_search_basic(self, langbase_client, mock_responses):
+        """Test tools.web_search method with basic parameters."""
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{TOOLS_WEB_SEARCH_ENDPOINT}",
+            json=mock_responses["tools_web_search"],
+            status=200,
+        )
+
+        request_body = {"query": "test search", "api_key": "search_api_key"}
+
+        result = langbase_client.tools.web_search(**request_body)
+
+        assert result == mock_responses["tools_web_search"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(
+            request.headers,
+            {
+                **AUTH_AND_JSON_CONTENT_HEADER,
+                "LB-WEB-SEARCH-KEY": request_body["api_key"],
+            },
+        )
+        assert json.loads(request.body) == {"query": "test search", "service": "exa"}
+
+    @responses.activate
+    def test_tools_crawl_basic(self, langbase_client, mock_responses):
+        """Test tools.crawl method with basic parameters."""
+        request_body = {"url": ["https://example.com"], "api_key": "crawl_api_key"}
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{TOOLS_CRAWL_ENDPOINT}",
+            json=mock_responses["tools_crawl"],
+            status=200,
+        )
+
+        result = langbase_client.tools.crawl(**request_body)
+
+        assert result == mock_responses["tools_crawl"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(
+            request.headers,
+            {**AUTH_AND_JSON_CONTENT_HEADER, "LB-CRAWL-KEY": request_body["api_key"]},
+        )
+        assert json.loads(request.body) == {"url": ["https://example.com"]}
+
+    @responses.activate
+    def test_tools_crawl_multiple_urls(self, langbase_client, mock_responses):
+        """Test tools.crawl method with multiple URLs."""
+        request_body = {
+            "url": ["https://example.com", "https://test.com", "https://demo.org"],
+            "api_key": "crawl_api_key",
+            "max_pages": 1,
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{TOOLS_CRAWL_ENDPOINT}",
+            json=mock_responses["tools_crawl"],
+            status=200,
+        )
+
+        result = langbase_client.tools.crawl(**request_body)
+
+        assert result == mock_responses["tools_crawl"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(
+            request.headers,
+            {**AUTH_AND_JSON_CONTENT_HEADER, "LB-CRAWL-KEY": request_body["api_key"]},
+        )
+        assert json.loads(request.body) == {
+            "url": request_body["url"],
+            "maxPages": request_body["max_pages"],
+        }
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
new file mode 100644
index 0000000..f652149
--- /dev/null
+++ b/tests/test_utilities.py
@@ -0,0 +1,253 @@
+"""
+Tests for utility methods.
+"""
+
+import json
+
+import responses
+
+from langbase.constants import (
+    AGENT_RUN_ENDPOINT,
+    BASE_URL,
+    CHUNKER_ENDPOINT,
+    EMBED_ENDPOINT,
+    PARSER_ENDPOINT,
+)
+from langbase.types import (
+    AgentRunResponse,
+    ChunkResponse,
+    EmbedResponse,
+    ParseResponse,
+    RunResponseStream,
+)
+from tests.constants import (
+    AUTH_AND_JSON_CONTENT_HEADER,
+    AUTHORIZATION_HEADER,
+    JSON_CONTENT_TYPE_HEADER,
+)
+from tests.validation_utils import validate_response_headers
+
+
+class TestUtilities:
+    """Test utility methods."""
+
+    @responses.activate
+    def test_embed_with_model(self, langbase_client, mock_responses):
+        """Test embed method with specific model."""
+        request_body = {
+            "chunks": ["First chunk", "Second chunk"],
+            "embeddingModel": "openai:text-embedding-ada-002",
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{EMBED_ENDPOINT}",
+            json=mock_responses["embed"],
+            status=200,
+        )
+
+        result = langbase_client.embed(
+            request_body["chunks"], embedding_model="openai:text-embedding-ada-002"
+        )
+
+        assert result == mock_responses["embed"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
+
+    @responses.activate
+    def test_chunker_with_parameters(self, langbase_client, mock_responses):
+        """Test chunker method with custom parameters."""
+        request_body = {
+            "content": "Long document content for chunking test.",
+            "chunkMaxLength": 500,
+            "chunkOverlap": 50,
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{CHUNKER_ENDPOINT}",
+            json=mock_responses["chunker"],
+            status=200,
+        )
+
+        result = langbase_client.chunker(
+            content=request_body["content"],
+            chunk_max_length=request_body["chunkMaxLength"],
+            chunk_overlap=request_body["chunkOverlap"],
+        )
+
+        assert result == mock_responses["chunker"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
+
+    @responses.activate
+    def test_parser_with_different_content_types(
+        self, langbase_client, mock_responses, upload_file_content
+    ):
+        """Test parser method with different content types."""
+        test_cases = [
+            ("document.pdf", "application/pdf"),
+            (
+                "document.docx",
+                "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            ),
+            ("document.txt", "text/plain"),
+        ]
+
+        for i, (document_name, content_type) in enumerate(test_cases):
+            responses.add(
+                responses.POST,
+                f"{BASE_URL}{PARSER_ENDPOINT}",
+                json=mock_responses["parser"],
+                status=200,
+            )
+
+            result = langbase_client.parser(
+                document=upload_file_content,
+                document_name=document_name,
+                content_type=content_type,
+            )
+
+            assert result == {
+                "document_name": mock_responses["parser"]["documentName"],
+                "content": mock_responses["parser"]["content"],
+            }
+            # The number of calls increases with each iteration
+            assert len(responses.calls) == i + 1
+            request = responses.calls[i].request
+            validate_response_headers(request.headers, AUTHORIZATION_HEADER)
+
+    @responses.activate
+    def test_agent_run_basic(self, langbase_client, mock_responses):
+        """Test agent.run method with basic parameters."""
+        request_body = {
+            "input": "Hello, agent!",
+            "model": "anthropic:claude-3-sonnet",
+            "apiKey": "test-llm-key",
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{AGENT_RUN_ENDPOINT}",
+            json=mock_responses["agent.run"],
+            status=200,
+        )
+
+        result = langbase_client.agent.run(
+            input=request_body["input"],
+            model=request_body["model"],
+            api_key=request_body["apiKey"],
+        )
+
+        assert result == mock_responses["agent.run"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
+
+    @responses.activate
+    def test_agent_run_with_messages(self, langbase_client, mock_responses):
+        """Test agent.run method with message format input."""
+        request_body = {
+            "input": [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there!"},
+            ],
+            "model": "openai:gpt-4",
+            "apiKey": "openai-key",
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{AGENT_RUN_ENDPOINT}",
+            json=mock_responses["agent.run"],
+            status=200,
+        )
+
+        result = langbase_client.agent.run(
+            input=request_body["input"],
+            model=request_body["model"],
+            api_key=request_body["apiKey"],
+        )
+
+        assert result == mock_responses["agent.run"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
+
+    @responses.activate
+    def test_agent_run_with_all_parameters(self, langbase_client, mock_responses):
+        """Test agent.run method with all parameters."""
+        request_body = {
+            "input": "Complex query",
+            "model": "anthropic:claude-3-sonnet",
+            "apiKey": "test-key",
+            "instructions": "Be helpful and concise",
+            "temperature": 0.7,
+            "max_tokens": 150,
+            "top_p": 0.9,
+            "tools": [{"type": "function", "function": {"name": "test"}}],
+        }
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{AGENT_RUN_ENDPOINT}",
+            json=mock_responses["agent.run"],
+            status=200,
+        )
+
+        result = langbase_client.agent.run(
+            input=request_body["input"],
+            model=request_body["model"],
+            api_key=request_body["apiKey"],
+            instructions=request_body["instructions"],
+            temperature=request_body["temperature"],
+            max_tokens=request_body["max_tokens"],
+            top_p=request_body["top_p"],
+            tools=request_body["tools"],
+            stream=False,
+        )
+
+        assert result == mock_responses["agent.run"]
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
+
+    @responses.activate
+    def test_agent_run_streaming(self, langbase_client, stream_chunks):
+        """Test agent.run method with streaming."""
+        request_body = {
+            "input": "Streaming query",
+            "model": "openai:gpt-4",
+            "apiKey": "stream-key",
+            "stream": True,
+        }
+        stream_content = b"".join(stream_chunks)
+
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}{AGENT_RUN_ENDPOINT}",
+            body=stream_content,
+            status=200,
+            headers={"Content-Type": "text/event-stream"},
+        )
+
+        result = langbase_client.agent.run(
+            input=request_body["input"],
+            model=request_body["model"],
+            api_key=request_body["apiKey"],
+            stream=True,
+        )
+
+        assert "stream" in result
+        assert hasattr(result["stream"], "__iter__")
+        assert len(responses.calls) == 1
+        request = responses.calls[0].request
+        validate_response_headers(request.headers, AUTH_AND_JSON_CONTENT_HEADER)
+        assert json.loads(request.body) == request_body
diff --git a/tests/test_utils.py b/tests/test_utils.py
deleted file mode 100644
index 689bd0e..0000000
--- a/tests/test_utils.py
+++ /dev/null
@@ -1,120 +0,0 @@
-"""
-Tests for utility functions.
-"""
-import os
-import unittest
-from io import BytesIO
-from unittest.mock import patch, mock_open
-
-from langbase.utils import (
-    convert_document_to_request_files,
-    prepare_headers,
-    format_thread_id,
-    clean_null_values
-)
-
-
-class TestUtils(unittest.TestCase):
-    """Test utility functions."""
-
-    def test_convert_document_to_request_files_bytes(self):
-        """Test convert_document_to_request_files with bytes."""
-        document = b"Test document content"
-        result = convert_document_to_request_files(
-            document, "test.txt", "text/plain"
-        )
-
-        self.assertIn("document", result)
-        self.assertIn("documentName", result)
-        self.assertEqual(result["document"][0], "test.txt")
-        self.assertEqual(result["document"][1], b"Test document content")
-        self.assertEqual(result["document"][2], "text/plain")
-        self.assertEqual(result["documentName"], (None, "test.txt"))
-
-    def test_convert_document_to_request_files_bytesio(self):
-        """Test convert_document_to_request_files with BytesIO."""
-        document = BytesIO(b"Test document content")
-        result = convert_document_to_request_files(
-            document, "test.txt", "text/plain"
-        )
-
-        self.assertIn("document", result)
-        self.assertIn("documentName", result)
-        self.assertEqual(result["document"][0], "test.txt")
-        self.assertEqual(result["document"][1], b"Test document content")
-        self.assertEqual(result["document"][2], "text/plain")
-        self.assertEqual(result["documentName"], (None, "test.txt"))
-
-        # Check that the file pointer was reset
-        self.assertEqual(document.tell(), 0)
-
-    @patch("builtins.open", new_callable=mock_open, read_data=b"Test document content")
-    @patch("os.path.isfile", return_value=True)
-    def test_convert_document_to_request_files_filepath(self, mock_isfile, mock_file_open):
-        """Test convert_document_to_request_files with file path."""
-        result = convert_document_to_request_files(
-            "test.txt", "test.txt", "text/plain"
-        )
-
-        mock_isfile.assert_called_once_with("test.txt")
-        mock_file_open.assert_called_once_with("test.txt", "rb")
-
-        self.assertIn("document", result)
-        self.assertIn("documentName", result)
-        self.assertEqual(result["document"][0], "test.txt")
-        self.assertEqual(result["document"][1], b"Test document content")
-        self.assertEqual(result["document"][2], "text/plain")
-        self.assertEqual(result["documentName"], (None, "test.txt"))
-
-    def test_convert_document_to_request_files_invalid_type(self):
-        """Test convert_document_to_request_files with invalid type."""
-        with self.assertRaises(ValueError):
-            convert_document_to_request_files(
-                123, "test.txt", "text/plain"
-            )
-
-    def test_prepare_headers(self):
-        """Test prepare_headers."""
-        # Basic test
-        headers = prepare_headers("test-api-key")
-        self.assertEqual(headers["Content-Type"], "application/json")
-        self.assertEqual(headers["Authorization"], "Bearer test-api-key")
-
-        # With additional headers
-        headers = prepare_headers("test-api-key", {"X-Custom": "Value"})
-        self.assertEqual(headers["Content-Type"], "application/json")
-        self.assertEqual(headers["Authorization"], "Bearer test-api-key")
-        self.assertEqual(headers["X-Custom"], "Value")
-
-    def test_format_thread_id(self):
-        """Test format_thread_id."""
-        # Already formatted
-        self.assertEqual(format_thread_id("thread_123"), "thread_123")
-
-        # Not formatted
-        self.assertEqual(format_thread_id("123"), "thread_123")
-
-        # With whitespace
-        self.assertEqual(format_thread_id(" 123 "), "thread_123")
-
-    def test_clean_null_values(self):
-        """Test clean_null_values."""
-        data = {
-            "name": "test",
-            "description": None,
-            "value": 123,
-            "options": None
-        }
-
-        result = clean_null_values(data)
-
-        self.assertIn("name", result)
-        self.assertIn("value", result)
-        self.assertNotIn("description", result)
-        self.assertNotIn("options", result)
-        self.assertEqual(result["name"], "test")
-        self.assertEqual(result["value"], 123)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_workflow.py b/tests/test_workflow.py
new file mode 100644
index 0000000..7f5f2de
--- /dev/null
+++ b/tests/test_workflow.py
@@ -0,0 +1,399 @@
+"""
+Tests for the Workflow execution engine.
+"""
+
+import asyncio
+import time
+
+import pytest
+
+from langbase.errors import APIError
+from langbase.workflow import (
+    RetryConfig,
+    StepConfig,
+    TimeoutError,
+    Workflow,
+    WorkflowContext,
+)
+
+
+class TestWorkflow:
+    """Test the Workflow execution engine."""
+
+    def test_workflow_initialization(self):
+        """Test workflow initialization with and without debug mode."""
+        # Test default initialization
+        workflow = Workflow()
+        assert workflow._debug is False
+        assert workflow.context == {"outputs": {}}
+
+        # Test with debug enabled
+        debug_workflow = Workflow(debug=True)
+        assert debug_workflow._debug is True
+        assert debug_workflow.context == {"outputs": {}}
+
+    @pytest.mark.asyncio
+    async def test_basic_step_execution(self):
+        """Test basic step execution without retries or timeout."""
+        workflow = Workflow()
+
+        async def simple_task():
+            return "success"
+
+        config: StepConfig = {"id": "test_step", "run": simple_task}
+
+        result = await workflow.step(config)
+
+        assert result == "success"
+        assert workflow.context["outputs"]["test_step"] == "success"
+
+    @pytest.mark.asyncio
+    async def test_step_with_timeout_success(self):
+        """Test step execution with timeout that completes in time."""
+        workflow = Workflow()
+
+        async def quick_task():
+            await asyncio.sleep(0.01)  # 10ms
+            return "completed"
+
+        config: StepConfig = {
+            "id": "quick_step",
+            "timeout": 100,  # 100ms timeout
+            "run": quick_task,
+        }
+
+        result = await workflow.step(config)
+
+        assert result == "completed"
+        assert workflow.context["outputs"]["quick_step"] == "completed"
+
+    @pytest.mark.asyncio
+    async def test_step_with_timeout_failure(self):
+        """Test step execution that times out."""
+        workflow = Workflow()
+
+        async def slow_task():
+            await asyncio.sleep(0.2)  # 200ms
+            return "should_not_complete"
+
+        config: StepConfig = {
+            "id": "slow_step",
+            "timeout": 50,  # 50ms timeout
+            "run": slow_task,
+        }
+
+        with pytest.raises(TimeoutError) as exc_info:
+            await workflow.step(config)
+
+        assert exc_info.value.step_id == "slow_step"
+        assert exc_info.value.timeout == 50
+        assert "slow_step" in str(exc_info.value)
+        assert "50ms" in str(exc_info.value)
+
+    @pytest.mark.asyncio
+    async def test_step_with_retries_success_on_retry(self):
+        """Test step that fails initially but succeeds on retry."""
+        workflow = Workflow()
+
+        call_count = 0
+
+        async def flaky_task():
+            nonlocal call_count
+            call_count += 1
+            if call_count < 3:
+                msg = "Temporary failure"
+                raise APIError(msg)
+            return "success_on_retry"
+
+        config: StepConfig = {
+            "id": "flaky_step",
+            "retries": {"limit": 3, "delay": 10, "backoff": "fixed"},  # 10ms delay
+            "run": flaky_task,
+        }
+
+        result = await workflow.step(config)
+
+        assert result == "success_on_retry"
+        assert call_count == 3
+        assert workflow.context["outputs"]["flaky_step"] == "success_on_retry"
+
+    @pytest.mark.asyncio
+    async def test_step_with_retries_failure_after_all_attempts(self):
+        """Test step that fails after all retry attempts."""
+        workflow = Workflow()
+
+        async def always_fail_task():
+            msg = "Persistent failure"
+            raise APIError(msg)
+
+        config: StepConfig = {
+            "id": "failing_step",
+            "retries": {"limit": 2, "delay": 10, "backoff": "fixed"},
+            "run": always_fail_task,
+        }
+
+        with pytest.raises(APIError) as exc_info:
+            await workflow.step(config)
+
+        assert "Persistent failure" in str(exc_info.value)
+
+    def test_exponential_backoff_calculation(self):
+        """Test exponential backoff delay calculation."""
+        workflow = Workflow()
+
+        # Test exponential backoff: base_delay * (2 ** (attempt - 1))
+        assert workflow._calculate_delay(100, 1, "exponential") == 100  # 100 * 2^0
+        assert workflow._calculate_delay(100, 2, "exponential") == 200  # 100 * 2^1
+        assert workflow._calculate_delay(100, 3, "exponential") == 400  # 100 * 2^2
+        assert workflow._calculate_delay(100, 4, "exponential") == 800  # 100 * 2^3
+
+    def test_linear_backoff_calculation(self):
+        """Test linear backoff delay calculation."""
+        workflow = Workflow()
+
+        # Test linear backoff: base_delay * attempt
+        assert workflow._calculate_delay(100, 1, "linear") == 100  # 100 * 1
+        assert workflow._calculate_delay(100, 2, "linear") == 200  # 100 * 2
+        assert workflow._calculate_delay(100, 3, "linear") == 300  # 100 * 3
+        assert workflow._calculate_delay(50, 4, "linear") == 200  # 50 * 4
+
+    def test_fixed_backoff_calculation(self):
+        """Test fixed backoff delay calculation."""
+        workflow = Workflow()
+
+        # Test fixed backoff: always base_delay
+        assert workflow._calculate_delay(100, 1, "fixed") == 100
+        assert workflow._calculate_delay(100, 2, "fixed") == 100
+        assert workflow._calculate_delay(100, 3, "fixed") == 100
+        assert workflow._calculate_delay(100, 10, "fixed") == 100
+
+    @pytest.mark.asyncio
+    async def test_multiple_steps_context_accumulation(self):
+        """Test that multiple steps accumulate results in context."""
+        workflow = Workflow()
+
+        async def step1():
+            return "result1"
+
+        async def step2():
+            return "result2"
+
+        async def step3():
+            return {"data": "result3"}
+
+        # Execute multiple steps
+        result1 = await workflow.step({"id": "step1", "run": step1})
+        result2 = await workflow.step({"id": "step2", "run": step2})
+        result3 = await workflow.step({"id": "step3", "run": step3})
+
+        assert result1 == "result1"
+        assert result2 == "result2"
+        assert result3 == {"data": "result3"}
+
+        # Check context accumulation
+        context = workflow.context
+        assert context["outputs"]["step1"] == "result1"
+        assert context["outputs"]["step2"] == "result2"
+        assert context["outputs"]["step3"] == {"data": "result3"}
+        assert len(context["outputs"]) == 3
+
+    @pytest.mark.asyncio
+    async def test_debug_mode_output(self, capsys):
+        """Test debug mode logging output using pytest's capsys fixture."""
+        workflow = Workflow(debug=True)
+
+        async def test_task():
+            await asyncio.sleep(0.01)
+            return "debug_result"
+
+        config: StepConfig = {"id": "debug_step", "timeout": 1000, "run": test_task}
+
+        result = await workflow.step(config)
+
+        # Capture the printed output
+        captured = capsys.readouterr()
+        output = captured.out
+
+        assert result == "debug_result"
+        assert "🔄 Starting step: debug_step" in output
+        assert "⏳ Timeout: 1000ms" in output
+        assert "⏱️ Step debug_step:" in output
+        assert "📤 Output: debug_result" in output
+        assert "✅ Completed step: debug_step" in output
+
+    @pytest.mark.asyncio
+    async def test_debug_mode_retry_output(self, capsys):
+        """Test debug mode output during retries using pytest's capsys fixture."""
+        workflow = Workflow(debug=True)
+
+        call_count = 0
+
+        async def retry_task():
+            nonlocal call_count
+            call_count += 1
+            if call_count < 2:
+                msg = "Debug retry test"
+                raise APIError(msg)
+            return "retry_success"
+
+        config: StepConfig = {
+            "id": "retry_debug",
+            "retries": {"limit": 2, "delay": 10, "backoff": "fixed"},
+            "run": retry_task,
+        }
+
+        result = await workflow.step(config)
+
+        # Capture the printed output
+        captured = capsys.readouterr()
+        output = captured.out
+
+        assert result == "retry_success"
+        assert "🔄 Retries:" in output
+        assert "⚠️ Attempt 1 failed, retrying in 10ms..." in output
+        assert "Error: Unknown Error (Debug retry test)" in output
+
+    @pytest.mark.asyncio
+    async def test_step_with_complex_return_type(self):
+        """Test step with complex return types (dict, list, etc.)."""
+        workflow = Workflow()
+
+        async def complex_task():
+            return {
+                "status": "success",
+                "data": [1, 2, 3],
+                "metadata": {"timestamp": "2023-01-01"},
+            }
+
+        config: StepConfig = {"id": "complex_step", "run": complex_task}
+
+        result = await workflow.step(config)
+
+        expected = {
+            "status": "success",
+            "data": [1, 2, 3],
+            "metadata": {"timestamp": "2023-01-01"},
+        }
+
+        assert result == expected
+        assert workflow.context["outputs"]["complex_step"] == expected
+
+    @pytest.mark.asyncio
+    async def test_step_error_without_retries(self):
+        """Test step that fails without retry configuration."""
+        workflow = Workflow()
+
+        async def failing_task():
+            msg = "Test error without retries"
+            raise ValueError(msg)
+
+        config: StepConfig = {"id": "no_retry_step", "run": failing_task}
+
+        with pytest.raises(ValueError) as exc_info:
+            await workflow.step(config)
+
+        assert "Test error without retries" in str(exc_info.value)
+
+    @pytest.mark.asyncio
+    async def test_concurrent_step_execution(self):
+        """Test that workflow steps can be executed concurrently."""
+        workflow1 = Workflow()
+        workflow2 = Workflow()
+
+        async def task1():
+            await asyncio.sleep(0.02)
+            return "task1_result"
+
+        async def task2():
+            await asyncio.sleep(0.02)
+            return "task2_result"
+
+        config1: StepConfig = {"id": "concurrent1", "run": task1}
+        config2: StepConfig = {"id": "concurrent2", "run": task2}
+
+        # Execute concurrently
+        start_time = time.time()
+        results = await asyncio.gather(workflow1.step(config1), workflow2.step(config2))
+        end_time = time.time()
+
+        # Should complete in roughly the time of one task (not two)
+        execution_time = end_time - start_time
+        assert execution_time < 0.05  # Less than 50ms for both tasks
+
+        assert results == ["task1_result", "task2_result"]
+        assert workflow1.context["outputs"]["concurrent1"] == "task1_result"
+        assert workflow2.context["outputs"]["concurrent2"] == "task2_result"
+
+
+class TestTimeoutError:
+    """Test the TimeoutError exception class."""
+
+    def test_timeout_error_creation(self):
+        """Test TimeoutError creation with step_id and timeout."""
+        error = TimeoutError("test_step", 5000)
+
+        assert error.step_id == "test_step"
+        assert error.timeout == 5000
+        assert 'Step "test_step" timed out after 5000ms' in str(error)
+
+    def test_timeout_error_inheritance(self):
+        """Test that TimeoutError inherits from APIError."""
+        error = TimeoutError("step", 1000)
+
+        assert isinstance(error, APIError)
+        assert isinstance(error, Exception)
+
+
+class TestWorkflowTypes:
+    """Test the TypedDict definitions for workflow types."""
+
+    def test_workflow_context_structure(self):
+        """Test WorkflowContext type structure."""
+        context: WorkflowContext = {"outputs": {"step1": "result1", "step2": 42}}
+
+        assert "outputs" in context
+        assert isinstance(context["outputs"], dict)
+        assert context["outputs"]["step1"] == "result1"
+        assert context["outputs"]["step2"] == 42
+
+    def test_retry_config_structure(self):
+        """Test RetryConfig type structure."""
+        retry_config: RetryConfig = {
+            "limit": 3,
+            "delay": 1000,
+            "backoff": "exponential",
+        }
+
+        assert retry_config["limit"] == 3
+        assert retry_config["delay"] == 1000
+        assert retry_config["backoff"] == "exponential"
+
+        # Test other backoff types
+        linear_config: RetryConfig = {"limit": 2, "delay": 500, "backoff": "linear"}
+        assert linear_config["backoff"] == "linear"
+
+        fixed_config: RetryConfig = {"limit": 1, "delay": 100, "backoff": "fixed"}
+        assert fixed_config["backoff"] == "fixed"
+
+    def test_step_config_structure(self):
+        """Test StepConfig type structure."""
+
+        async def dummy_task():
+            return "test"
+
+        # Minimal step config
+        minimal_config: StepConfig = {"id": "test_step", "run": dummy_task}
+        assert minimal_config["id"] == "test_step"
+        assert callable(minimal_config["run"])
+
+        # Full step config
+        full_config: StepConfig = {
+            "id": "full_step",
+            "timeout": 5000,
+            "retries": {"limit": 3, "delay": 1000, "backoff": "exponential"},
+            "run": dummy_task,
+        }
+        assert full_config["id"] == "full_step"
+        assert full_config["timeout"] == 5000
+        assert full_config["retries"]["limit"] == 3
+        assert callable(full_config["run"])
diff --git a/tests/validation_utils.py b/tests/validation_utils.py
new file mode 100644
index 0000000..6ab2c04
--- /dev/null
+++ b/tests/validation_utils.py
@@ -0,0 +1,46 @@
+import types
+from typing import Any, Dict, Literal, Type, Union, get_args, get_origin
+
+
+def validate_response_headers(
+    headers: Dict[str, Any], expected_headers: Dict[str, Any]
+):
+    """Validates that the response headers contain the expected headers."""
+    for key, value in expected_headers.items():
+        assert key in headers
+        assert headers[key] == value
+
+
+def validate_response_body(body: Dict[str, Any], response_type: Type):
+    """Validates that the response body conforms to the given type."""
+    if not hasattr(response_type, "__annotations__"):
+        origin = get_origin(response_type)
+        if origin:
+            assert isinstance(body, origin)
+        elif response_type is not Any:
+            assert isinstance(body, response_type)
+        return
+
+    for key, value_type in response_type.__annotations__.items():
+        if key in body and body[key] is not None:
+            origin = get_origin(value_type)
+            args = get_args(value_type)
+
+            if origin is Literal:
+                assert (
+                    body[key] in args
+                ), f"Field '{key}' has value '{body[key]}' which is not in Literal args {args}"
+            elif origin is Union or origin is types.UnionType:
+                # For now, we just pass on Union to avoid complexity.
+                pass
+            # Check if it's a TypedDict
+            elif hasattr(value_type, "__annotations__"):
+                validate_response_body(body[key], value_type)
+            elif origin:  # This handles list, dict, etc.
+                assert isinstance(
+                    body[key], origin
+                ), f"Field '{key}' has wrong type. Expected {origin}, got {type(body[key])}"
+            elif value_type is not Any:
+                assert isinstance(
+                    body[key], value_type
+                ), f"Field '{key}' has wrong type. Expected {value_type}, got {type(body[key])}"