Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions docs/user-guide/draft-validation.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ data = pb.load_dataset(dataset="global_sales", tbl_type="polars")
# Generate a validation plan
pb.DraftValidation(
data=data,
model="anthropic:claude-3-7-sonnet-latest",
model="anthropic:claude-sonnet-4-5",
api_key="your_api_key_here" # Replace with your actual API key
)
```
Expand Down Expand Up @@ -159,7 +159,7 @@ api_key = os.getenv("ANTHROPIC_API_KEY")

draft_validation = pb.DraftValidation(
data=data,
model="anthropic:claude-3-7-sonnet-latest",
model="anthropic:claude-sonnet-4-5",
api_key=api_key
)
```
Expand All @@ -179,7 +179,7 @@ If your API keys have standard names (like `ANTHROPIC_API_KEY` or `OPENAI_API_KE
# No API key needed if stored in .env with standard names
draft_validation = pb.DraftValidation(
data=data,
model="anthropic:claude-3-7-sonnet-latest"
model="anthropic:claude-sonnet-4-5"
)
```

Expand All @@ -191,7 +191,7 @@ Here's an example of a validation plan that might be generated by `DraftValidati
```python
pb.DraftValidation(
pb.load_dataset(dataset="nycflights", tbl_type="duckdb",
model="anthropic:claude-3-7-sonnet-latest"
model="anthropic:claude-sonnet-4-5"
)
```

Expand Down Expand Up @@ -269,7 +269,7 @@ When using `DraftValidation`, you specify the model in the format `"provider:mod

```python
# Using Anthropic's Claude model
pb.DraftValidation(data=data, model="anthropic:claude-3-7-sonnet-latest")
pb.DraftValidation(data=data, model="anthropic:claude-sonnet-4-5")

# Using OpenAI's GPT model
pb.DraftValidation(data=data, model="openai:gpt-4-turbo")
Expand All @@ -285,7 +285,7 @@ pb.DraftValidation(data=data, model="bedrock:anthropic.claude-3-sonnet-20240229-

Different models have different capabilities when it comes to generating validation plans:

- Anthropic Claude 3.7 Sonnet generally provides the most comprehensive and accurate validation
- Anthropic Claude Sonnet 4.5 generally provides the most comprehensive and accurate validation
plans
- OpenAI GPT-4 models also perform well
- Local models through Ollama can be useful for private data but they currently have reduced
Expand Down
1 change: 1 addition & 0 deletions pointblank/_interrogation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1895,6 +1895,7 @@ def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config:
provider=llm_provider,
model=llm_model,
api_key=None, # Will be loaded from environment variables
verify_ssl=True, # Default to verifying SSL certificates
)

# Set up batch configuration
Expand Down
31 changes: 28 additions & 3 deletions pointblank/_utils_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,22 @@ class _LLMConfig:
provider
LLM provider name (e.g., 'anthropic', 'openai', 'ollama', 'bedrock').
model
Model name (e.g., 'claude-3-sonnet-20240229', 'gpt-4').
Model name (e.g., 'claude-sonnet-4-5', 'gpt-4').
api_key
API key for the provider. If None, will be read from environment.
verify_ssl
Whether to verify SSL certificates when making requests. Defaults to True.
"""

provider: str
model: str
api_key: Optional[str] = None
verify_ssl: bool = True


def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str] = None):
def _create_chat_instance(
provider: str, model_name: str, api_key: Optional[str] = None, verify_ssl: bool = True
):
"""
Create a chatlas chat instance for the specified provider.

Expand All @@ -50,6 +55,8 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
The model name for the provider.
api_key
Optional API key. If None, will be read from environment.
verify_ssl
Whether to verify SSL certificates when making requests. Defaults to True.

Returns
-------
Expand Down Expand Up @@ -89,6 +96,17 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
{"index": 2, "result": true}
]"""

# Create httpx client with SSL verification settings
try:
import httpx # noqa
except ImportError: # pragma: no cover
raise ImportError( # pragma: no cover
"The `httpx` package is required for SSL configuration. "
"Please install it using `pip install httpx`."
)

http_client = httpx.AsyncClient(verify=verify_ssl)

# Create provider-specific chat instance
if provider == "anthropic": # pragma: no cover
# Check that the anthropic package is installed
Expand All @@ -106,6 +124,7 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
model=model_name,
api_key=api_key,
system_prompt=system_prompt,
kwargs={"http_client": http_client},
)

elif provider == "openai": # pragma: no cover
Expand All @@ -124,6 +143,7 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
model=model_name,
api_key=api_key,
system_prompt=system_prompt,
kwargs={"http_client": http_client},
)

elif provider == "ollama": # pragma: no cover
Expand All @@ -141,6 +161,7 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
chat = ChatOllama(
model=model_name,
system_prompt=system_prompt,
kwargs={"http_client": http_client},
)

elif provider == "bedrock": # pragma: no cover
Expand All @@ -149,6 +170,7 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
chat = ChatBedrockAnthropic(
model=model_name,
system_prompt=system_prompt,
kwargs={"http_client": http_client},
)

else:
Expand Down Expand Up @@ -722,7 +744,10 @@ def __init__(self, llm_config: _LLMConfig):
"""
self.llm_config = llm_config
self.chat = _create_chat_instance(
provider=llm_config.provider, model_name=llm_config.model, api_key=llm_config.api_key
provider=llm_config.provider,
model_name=llm_config.model,
api_key=llm_config.api_key,
verify_ssl=llm_config.verify_ssl,
)

def validate_batches(
Expand Down
2 changes: 1 addition & 1 deletion pointblank/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def assistant(
----------
model
The model to be used. This should be in the form of `provider:model` (e.g.,
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
`"ollama"`, and `"bedrock"`.
data
An optional data table to focus on during discussion with the PbA, which could be a
Expand Down
50 changes: 41 additions & 9 deletions pointblank/data/api-docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,7 @@ Definition of a schema object.
`Schema` object is used in a validation workflow.


DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None) -> None
DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None, verify_ssl: 'bool' = True) -> None

Draft a validation plan for a given table using an LLM.

Expand All @@ -1180,10 +1180,15 @@ DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None
The data to be used for drafting a validation plan.
model
The model to be used. This should be in the form of `provider:model` (e.g.,
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
`"ollama"`, and `"bedrock"`.
api_key
The API key to be used for the model.
verify_ssl
Whether to verify SSL certificates when making requests to the LLM provider. Set to `False`
to disable SSL verification (e.g., when behind a corporate firewall with self-signed
certificates). Defaults to `True`. Use with caution as disabling SSL verification can pose
security risks.

Returns
-------
Expand Down Expand Up @@ -1225,6 +1230,33 @@ DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None
There's no need to have the `python-dotenv` package installed when using `.env` files in this
way.

Notes on SSL Certificate Verification
--------------------------------------
By default, SSL certificate verification is enabled for all requests to LLM providers. However,
in certain network environments (such as corporate networks with self-signed certificates or
firewall proxies), you may encounter SSL certificate verification errors.

To disable SSL verification, set the `verify_ssl` parameter to `False`:

```python
import pointblank as pb

data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")

# Disable SSL verification for networks with self-signed certificates
pb.DraftValidation(
data=data,
model="anthropic:claude-sonnet-4-5",
verify_ssl=False
)
```

:::{.callout-warning}
Disabling SSL verification (through `verify_ssl=False`) can expose your API keys and data to
man-in-the-middle attacks. Only use this option in trusted network environments and when
absolutely necessary.
:::

Notes on Data Sent to the Model Provider
----------------------------------------
The data sent to the model provider is a JSON summary of the table. This data summary is
Expand All @@ -1251,7 +1283,7 @@ DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None
Let's look at how the `DraftValidation` class can be used to draft a validation plan for a
table. The table to be used is `"nycflights"`, which is available here via the
[`load_dataset()`](`pointblank.load_dataset`) function. The model to be used is
`"anthropic:claude-3-5-sonnet-latest"` (which performs very well compared to other LLMs). The
`"anthropic:claude-sonnet-4-5"` (which performs very well compared to other LLMs). The
example assumes that the API key is stored in an `.env` file as `ANTHROPIC_API_KEY`.

```python
Expand All @@ -1261,7 +1293,7 @@ DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None
data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")

# Draft a validation plan for the "nycflights" table
pb.DraftValidation(data=data, model="anthropic:claude-3-5-sonnet-latest")
pb.DraftValidation(data=data, model="anthropic:claude-sonnet-4-5")
```

The output will be a drafted validation plan for the `"nycflights"` table and this will appear
Expand Down Expand Up @@ -5853,10 +5885,10 @@ prompt(self, prompt: 'str', model: 'str', columns_subset: 'str | list[str] | Non
so try to include only the columns necessary for the validation.
model
The model to be used. This should be in the form of `provider:model` (e.g.,
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`,
`"openai"`, `"ollama"`, and `"bedrock"`. The model name should be the specific model to
be used from the provider. Model names are subject to change so consult the provider's
documentation for the most up-to-date model names.
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
`"ollama"`, and `"bedrock"`. The model name should be the specific model to be used from
the provider. Model names are subject to change so consult the provider's documentation
for the most up-to-date model names.
batch_size
Number of rows to process in each batch. Larger batches are more efficient but may hit
API limits. Default is `1000`.
Expand Down Expand Up @@ -9927,7 +9959,7 @@ assistant(model: 'str', data: 'FrameT | Any | None' = None, tbl_name: 'str | Non
----------
model
The model to be used. This should be in the form of `provider:model` (e.g.,
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
`"ollama"`, and `"bedrock"`.
data
An optional data table to focus on during discussion with the PbA, which could be a
Expand Down
55 changes: 52 additions & 3 deletions pointblank/draft.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,15 @@ class DraftValidation:
The data to be used for drafting a validation plan.
model
The model to be used. This should be in the form of `provider:model` (e.g.,
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
`"ollama"`, and `"bedrock"`.
api_key
The API key to be used for the model.
verify_ssl
Whether to verify SSL certificates when making requests to the LLM provider. Set to `False`
to disable SSL verification (e.g., when behind a corporate firewall with self-signed
certificates). Defaults to `True`. Use with caution as disabling SSL verification can pose
security risks.

Returns
-------
Expand Down Expand Up @@ -83,6 +88,33 @@ class DraftValidation:
There's no need to have the `python-dotenv` package installed when using `.env` files in this
way.

Notes on SSL Certificate Verification
--------------------------------------
By default, SSL certificate verification is enabled for all requests to LLM providers. However,
in certain network environments (such as corporate networks with self-signed certificates or
firewall proxies), you may encounter SSL certificate verification errors.

To disable SSL verification, set the `verify_ssl` parameter to `False`:

```python
import pointblank as pb

data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")

# Disable SSL verification for networks with self-signed certificates
pb.DraftValidation(
data=data,
model="anthropic:claude-sonnet-4-5",
verify_ssl=False
)
```

:::{.callout-warning}
Disabling SSL verification (through `verify_ssl=False`) can expose your API keys and data to
man-in-the-middle attacks. Only use this option in trusted network environments and when
absolutely necessary.
:::

Notes on Data Sent to the Model Provider
----------------------------------------
The data sent to the model provider is a JSON summary of the table. This data summary is
Expand All @@ -109,7 +141,7 @@ class DraftValidation:
Let's look at how the `DraftValidation` class can be used to draft a validation plan for a
table. The table to be used is `"nycflights"`, which is available here via the
[`load_dataset()`](`pointblank.load_dataset`) function. The model to be used is
`"anthropic:claude-3-5-sonnet-latest"` (which performs very well compared to other LLMs). The
`"anthropic:claude-sonnet-4-5"` (which performs very well compared to other LLMs). The
example assumes that the API key is stored in an `.env` file as `ANTHROPIC_API_KEY`.

```python
Expand All @@ -119,7 +151,7 @@ class DraftValidation:
data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")

# Draft a validation plan for the "nycflights" table
pb.DraftValidation(data=data, model="anthropic:claude-3-5-sonnet-latest")
pb.DraftValidation(data=data, model="anthropic:claude-sonnet-4-5")
```

The output will be a drafted validation plan for the `"nycflights"` table and this will appear
Expand Down Expand Up @@ -194,6 +226,7 @@ class DraftValidation:
data: FrameT | Any
model: str
api_key: str | None = None
verify_ssl: bool = True
response: str = field(init=False)

def __post_init__(self):
Expand Down Expand Up @@ -280,6 +313,18 @@ def __post_init__(self):
" per line)"
)

# Create httpx client with SSL verification settings
# This will be passed to the LLM provider's chat client
try:
import httpx # noqa
except ImportError: # pragma: no cover
raise ImportError( # pragma: no cover
"The `httpx` package is required for SSL configuration. "
"Please install it using `pip install httpx`."
)

http_client = httpx.AsyncClient(verify=self.verify_ssl)

if provider == "anthropic": # pragma: no cover
# Check that the anthropic package is installed
try:
Expand All @@ -296,6 +341,7 @@ def __post_init__(self):
model=model_name,
system_prompt="You are a terse assistant and a Python expert.",
api_key=self.api_key,
kwargs={"http_client": http_client},
)

if provider == "openai": # pragma: no cover
Expand All @@ -314,6 +360,7 @@ def __post_init__(self):
model=model_name,
system_prompt="You are a terse assistant and a Python expert.",
api_key=self.api_key,
kwargs={"http_client": http_client},
)

if provider == "ollama": # pragma: no cover
Expand All @@ -331,6 +378,7 @@ def __post_init__(self):
chat = ChatOllama( # pragma: no cover
model=model_name,
system_prompt="You are a terse assistant and a Python expert.",
kwargs={"http_client": http_client},
)

if provider == "bedrock": # pragma: no cover
Expand All @@ -339,6 +387,7 @@ def __post_init__(self):
chat = ChatBedrockAnthropic( # pragma: no cover
model=model_name,
system_prompt="You are a terse assistant and a Python expert.",
kwargs={"http_client": http_client},
)

self.response = str(chat.chat(prompt, stream=False, echo="none")) # pragma: no cover
Expand Down
Loading
Loading