Skip to content

Commit ad9b01a

Browse files
authored
Merge pull request #302 from posit-dev/feat-ignore-ssl-option-draft-validation
feat: add option to ignore SSL in `DraftValidation`
2 parents 968bba4 + 3d7692f commit ad9b01a

File tree

10 files changed

+147
-33
lines changed

10 files changed

+147
-33
lines changed

docs/user-guide/draft-validation.qmd

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ data = pb.load_dataset(dataset="global_sales", tbl_type="polars")
6666
# Generate a validation plan
6767
pb.DraftValidation(
6868
data=data,
69-
model="anthropic:claude-3-7-sonnet-latest",
69+
model="anthropic:claude-sonnet-4-5",
7070
api_key="your_api_key_here" # Replace with your actual API key
7171
)
7272
```
@@ -159,7 +159,7 @@ api_key = os.getenv("ANTHROPIC_API_KEY")
159159

160160
draft_validation = pb.DraftValidation(
161161
data=data,
162-
model="anthropic:claude-3-7-sonnet-latest",
162+
model="anthropic:claude-sonnet-4-5",
163163
api_key=api_key
164164
)
165165
```
@@ -179,7 +179,7 @@ If your API keys have standard names (like `ANTHROPIC_API_KEY` or `OPENAI_API_KE
179179
# No API key needed if stored in .env with standard names
180180
draft_validation = pb.DraftValidation(
181181
data=data,
182-
model="anthropic:claude-3-7-sonnet-latest"
182+
model="anthropic:claude-sonnet-4-5"
183183
)
184184
```
185185

@@ -191,7 +191,7 @@ Here's an example of a validation plan that might be generated by `DraftValidati
191191
```python
192192
pb.DraftValidation(
193193
pb.load_dataset(dataset="nycflights", tbl_type="duckdb",
194-
model="anthropic:claude-3-7-sonnet-latest"
194+
model="anthropic:claude-sonnet-4-5"
195195
)
196196
```
197197

@@ -269,7 +269,7 @@ When using `DraftValidation`, you specify the model in the format `"provider:mod
269269

270270
```python
271271
# Using Anthropic's Claude model
272-
pb.DraftValidation(data=data, model="anthropic:claude-3-7-sonnet-latest")
272+
pb.DraftValidation(data=data, model="anthropic:claude-sonnet-4-5")
273273

274274
# Using OpenAI's GPT model
275275
pb.DraftValidation(data=data, model="openai:gpt-4-turbo")
@@ -285,7 +285,7 @@ pb.DraftValidation(data=data, model="bedrock:anthropic.claude-3-sonnet-20240229-
285285

286286
Different models have different capabilities when it comes to generating validation plans:
287287

288-
- Anthropic Claude 3.7 Sonnet generally provides the most comprehensive and accurate validation
288+
- Anthropic Claude Sonnet 4.5 generally provides the most comprehensive and accurate validation
289289
plans
290290
- OpenAI GPT-4 models also perform well
291291
- Local models through Ollama can be useful for private data but they currently have reduced

pointblank/_interrogation.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,6 +1895,7 @@ def interrogate_prompt(tbl: FrameT, columns_subset: list[str] | None, ai_config:
18951895
provider=llm_provider,
18961896
model=llm_model,
18971897
api_key=None, # Will be loaded from environment variables
1898+
verify_ssl=True, # Default to verifying SSL certificates
18981899
)
18991900

19001901
# Set up batch configuration

pointblank/_utils_ai.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,22 @@ class _LLMConfig:
2828
provider
2929
LLM provider name (e.g., 'anthropic', 'openai', 'ollama', 'bedrock').
3030
model
31-
Model name (e.g., 'claude-3-sonnet-20240229', 'gpt-4').
31+
Model name (e.g., 'claude-sonnet-4-5', 'gpt-4').
3232
api_key
3333
API key for the provider. If None, will be read from environment.
34+
verify_ssl
35+
Whether to verify SSL certificates when making requests. Defaults to True.
3436
"""
3537

3638
provider: str
3739
model: str
3840
api_key: Optional[str] = None
41+
verify_ssl: bool = True
3942

4043

41-
def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str] = None):
44+
def _create_chat_instance(
45+
provider: str, model_name: str, api_key: Optional[str] = None, verify_ssl: bool = True
46+
):
4247
"""
4348
Create a chatlas chat instance for the specified provider.
4449
@@ -50,6 +55,8 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
5055
The model name for the provider.
5156
api_key
5257
Optional API key. If None, will be read from environment.
58+
verify_ssl
59+
Whether to verify SSL certificates when making requests. Defaults to True.
5360
5461
Returns
5562
-------
@@ -89,6 +96,17 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
8996
{"index": 2, "result": true}
9097
]"""
9198

99+
# Create httpx client with SSL verification settings
100+
try:
101+
import httpx # noqa
102+
except ImportError: # pragma: no cover
103+
raise ImportError( # pragma: no cover
104+
"The `httpx` package is required for SSL configuration. "
105+
"Please install it using `pip install httpx`."
106+
)
107+
108+
http_client = httpx.AsyncClient(verify=verify_ssl)
109+
92110
# Create provider-specific chat instance
93111
if provider == "anthropic": # pragma: no cover
94112
# Check that the anthropic package is installed
@@ -106,6 +124,7 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
106124
model=model_name,
107125
api_key=api_key,
108126
system_prompt=system_prompt,
127+
kwargs={"http_client": http_client},
109128
)
110129

111130
elif provider == "openai": # pragma: no cover
@@ -124,6 +143,7 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
124143
model=model_name,
125144
api_key=api_key,
126145
system_prompt=system_prompt,
146+
kwargs={"http_client": http_client},
127147
)
128148

129149
elif provider == "ollama": # pragma: no cover
@@ -141,6 +161,7 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
141161
chat = ChatOllama(
142162
model=model_name,
143163
system_prompt=system_prompt,
164+
kwargs={"http_client": http_client},
144165
)
145166

146167
elif provider == "bedrock": # pragma: no cover
@@ -149,6 +170,7 @@ def _create_chat_instance(provider: str, model_name: str, api_key: Optional[str]
149170
chat = ChatBedrockAnthropic(
150171
model=model_name,
151172
system_prompt=system_prompt,
173+
kwargs={"http_client": http_client},
152174
)
153175

154176
else:
@@ -722,7 +744,10 @@ def __init__(self, llm_config: _LLMConfig):
722744
"""
723745
self.llm_config = llm_config
724746
self.chat = _create_chat_instance(
725-
provider=llm_config.provider, model_name=llm_config.model, api_key=llm_config.api_key
747+
provider=llm_config.provider,
748+
model_name=llm_config.model,
749+
api_key=llm_config.api_key,
750+
verify_ssl=llm_config.verify_ssl,
726751
)
727752

728753
def validate_batches(

pointblank/assistant.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def assistant(
5555
----------
5656
model
5757
The model to be used. This should be in the form of `provider:model` (e.g.,
58-
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
58+
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
5959
`"ollama"`, and `"bedrock"`.
6060
data
6161
An optional data table to focus on during discussion with the PbA, which could be a

pointblank/data/api-docs.txt

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,7 +1157,7 @@ Definition of a schema object.
11571157
`Schema` object is used in a validation workflow.
11581158

11591159

1160-
DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None) -> None
1160+
DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None, verify_ssl: 'bool' = True) -> None
11611161

11621162
Draft a validation plan for a given table using an LLM.
11631163

@@ -1180,10 +1180,15 @@ DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None
11801180
The data to be used for drafting a validation plan.
11811181
model
11821182
The model to be used. This should be in the form of `provider:model` (e.g.,
1183-
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
1183+
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
11841184
`"ollama"`, and `"bedrock"`.
11851185
api_key
11861186
The API key to be used for the model.
1187+
verify_ssl
1188+
Whether to verify SSL certificates when making requests to the LLM provider. Set to `False`
1189+
to disable SSL verification (e.g., when behind a corporate firewall with self-signed
1190+
certificates). Defaults to `True`. Use with caution as disabling SSL verification can pose
1191+
security risks.
11871192

11881193
Returns
11891194
-------
@@ -1225,6 +1230,33 @@ DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None
12251230
There's no need to have the `python-dotenv` package installed when using `.env` files in this
12261231
way.
12271232

1233+
Notes on SSL Certificate Verification
1234+
--------------------------------------
1235+
By default, SSL certificate verification is enabled for all requests to LLM providers. However,
1236+
in certain network environments (such as corporate networks with self-signed certificates or
1237+
firewall proxies), you may encounter SSL certificate verification errors.
1238+
1239+
To disable SSL verification, set the `verify_ssl` parameter to `False`:
1240+
1241+
```python
1242+
import pointblank as pb
1243+
1244+
data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")
1245+
1246+
# Disable SSL verification for networks with self-signed certificates
1247+
pb.DraftValidation(
1248+
data=data,
1249+
model="anthropic:claude-sonnet-4-5",
1250+
verify_ssl=False
1251+
)
1252+
```
1253+
1254+
:::{.callout-warning}
1255+
Disabling SSL verification (through `verify_ssl=False`) can expose your API keys and data to
1256+
man-in-the-middle attacks. Only use this option in trusted network environments and when
1257+
absolutely necessary.
1258+
:::
1259+
12281260
Notes on Data Sent to the Model Provider
12291261
----------------------------------------
12301262
The data sent to the model provider is a JSON summary of the table. This data summary is
@@ -1251,7 +1283,7 @@ DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None
12511283
Let's look at how the `DraftValidation` class can be used to draft a validation plan for a
12521284
table. The table to be used is `"nycflights"`, which is available here via the
12531285
[`load_dataset()`](`pointblank.load_dataset`) function. The model to be used is
1254-
`"anthropic:claude-3-5-sonnet-latest"` (which performs very well compared to other LLMs). The
1286+
`"anthropic:claude-sonnet-4-5"` (which performs very well compared to other LLMs). The
12551287
example assumes that the API key is stored in an `.env` file as `ANTHROPIC_API_KEY`.
12561288

12571289
```python
@@ -1261,7 +1293,7 @@ DraftValidation(data: 'FrameT | Any', model: 'str', api_key: 'str | None' = None
12611293
data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")
12621294

12631295
# Draft a validation plan for the "nycflights" table
1264-
pb.DraftValidation(data=data, model="anthropic:claude-3-5-sonnet-latest")
1296+
pb.DraftValidation(data=data, model="anthropic:claude-sonnet-4-5")
12651297
```
12661298

12671299
The output will be a drafted validation plan for the `"nycflights"` table and this will appear
@@ -5853,10 +5885,10 @@ prompt(self, prompt: 'str', model: 'str', columns_subset: 'str | list[str] | Non
58535885
so try to include only the columns necessary for the validation.
58545886
model
58555887
The model to be used. This should be in the form of `provider:model` (e.g.,
5856-
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`,
5857-
`"openai"`, `"ollama"`, and `"bedrock"`. The model name should be the specific model to
5858-
be used from the provider. Model names are subject to change so consult the provider's
5859-
documentation for the most up-to-date model names.
5888+
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
5889+
`"ollama"`, and `"bedrock"`. The model name should be the specific model to be used from
5890+
the provider. Model names are subject to change so consult the provider's documentation
5891+
for the most up-to-date model names.
58605892
batch_size
58615893
Number of rows to process in each batch. Larger batches are more efficient but may hit
58625894
API limits. Default is `1000`.
@@ -9927,7 +9959,7 @@ assistant(model: 'str', data: 'FrameT | Any | None' = None, tbl_name: 'str | Non
99279959
----------
99289960
model
99299961
The model to be used. This should be in the form of `provider:model` (e.g.,
9930-
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
9962+
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
99319963
`"ollama"`, and `"bedrock"`.
99329964
data
99339965
An optional data table to focus on during discussion with the PbA, which could be a

pointblank/draft.py

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,15 @@ class DraftValidation:
3838
The data to be used for drafting a validation plan.
3939
model
4040
The model to be used. This should be in the form of `provider:model` (e.g.,
41-
`"anthropic:claude-3-5-sonnet-latest"`). Supported providers are `"anthropic"`, `"openai"`,
41+
`"anthropic:claude-sonnet-4-5"`). Supported providers are `"anthropic"`, `"openai"`,
4242
`"ollama"`, and `"bedrock"`.
4343
api_key
4444
The API key to be used for the model.
45+
verify_ssl
46+
Whether to verify SSL certificates when making requests to the LLM provider. Set to `False`
47+
to disable SSL verification (e.g., when behind a corporate firewall with self-signed
48+
certificates). Defaults to `True`. Use with caution as disabling SSL verification can pose
49+
security risks.
4550
4651
Returns
4752
-------
@@ -83,6 +88,33 @@ class DraftValidation:
8388
There's no need to have the `python-dotenv` package installed when using `.env` files in this
8489
way.
8590
91+
Notes on SSL Certificate Verification
92+
--------------------------------------
93+
By default, SSL certificate verification is enabled for all requests to LLM providers. However,
94+
in certain network environments (such as corporate networks with self-signed certificates or
95+
firewall proxies), you may encounter SSL certificate verification errors.
96+
97+
To disable SSL verification, set the `verify_ssl` parameter to `False`:
98+
99+
```python
100+
import pointblank as pb
101+
102+
data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")
103+
104+
# Disable SSL verification for networks with self-signed certificates
105+
pb.DraftValidation(
106+
data=data,
107+
model="anthropic:claude-sonnet-4-5",
108+
verify_ssl=False
109+
)
110+
```
111+
112+
:::{.callout-warning}
113+
Disabling SSL verification (through `verify_ssl=False`) can expose your API keys and data to
114+
man-in-the-middle attacks. Only use this option in trusted network environments and when
115+
absolutely necessary.
116+
:::
117+
86118
Notes on Data Sent to the Model Provider
87119
----------------------------------------
88120
The data sent to the model provider is a JSON summary of the table. This data summary is
@@ -109,7 +141,7 @@ class DraftValidation:
109141
Let's look at how the `DraftValidation` class can be used to draft a validation plan for a
110142
table. The table to be used is `"nycflights"`, which is available here via the
111143
[`load_dataset()`](`pointblank.load_dataset`) function. The model to be used is
112-
`"anthropic:claude-3-5-sonnet-latest"` (which performs very well compared to other LLMs). The
144+
`"anthropic:claude-sonnet-4-5"` (which performs very well compared to other LLMs). The
113145
example assumes that the API key is stored in an `.env` file as `ANTHROPIC_API_KEY`.
114146
115147
```python
@@ -119,7 +151,7 @@ class DraftValidation:
119151
data = pb.load_dataset(dataset="nycflights", tbl_type="duckdb")
120152
121153
# Draft a validation plan for the "nycflights" table
122-
pb.DraftValidation(data=data, model="anthropic:claude-3-5-sonnet-latest")
154+
pb.DraftValidation(data=data, model="anthropic:claude-sonnet-4-5")
123155
```
124156
125157
The output will be a drafted validation plan for the `"nycflights"` table and this will appear
@@ -194,6 +226,7 @@ class DraftValidation:
194226
data: FrameT | Any
195227
model: str
196228
api_key: str | None = None
229+
verify_ssl: bool = True
197230
response: str = field(init=False)
198231

199232
def __post_init__(self):
@@ -280,6 +313,18 @@ def __post_init__(self):
280313
" per line)"
281314
)
282315

316+
# Create httpx client with SSL verification settings
317+
# This will be passed to the LLM provider's chat client
318+
try:
319+
import httpx # noqa
320+
except ImportError: # pragma: no cover
321+
raise ImportError( # pragma: no cover
322+
"The `httpx` package is required for SSL configuration. "
323+
"Please install it using `pip install httpx`."
324+
)
325+
326+
http_client = httpx.AsyncClient(verify=self.verify_ssl)
327+
283328
if provider == "anthropic": # pragma: no cover
284329
# Check that the anthropic package is installed
285330
try:
@@ -296,6 +341,7 @@ def __post_init__(self):
296341
model=model_name,
297342
system_prompt="You are a terse assistant and a Python expert.",
298343
api_key=self.api_key,
344+
kwargs={"http_client": http_client},
299345
)
300346

301347
if provider == "openai": # pragma: no cover
@@ -314,6 +360,7 @@ def __post_init__(self):
314360
model=model_name,
315361
system_prompt="You are a terse assistant and a Python expert.",
316362
api_key=self.api_key,
363+
kwargs={"http_client": http_client},
317364
)
318365

319366
if provider == "ollama": # pragma: no cover
@@ -331,6 +378,7 @@ def __post_init__(self):
331378
chat = ChatOllama( # pragma: no cover
332379
model=model_name,
333380
system_prompt="You are a terse assistant and a Python expert.",
381+
kwargs={"http_client": http_client},
334382
)
335383

336384
if provider == "bedrock": # pragma: no cover
@@ -339,6 +387,7 @@ def __post_init__(self):
339387
chat = ChatBedrockAnthropic( # pragma: no cover
340388
model=model_name,
341389
system_prompt="You are a terse assistant and a Python expert.",
390+
kwargs={"http_client": http_client},
342391
)
343392

344393
self.response = str(chat.chat(prompt, stream=False, echo="none")) # pragma: no cover

0 commit comments

Comments
 (0)