Skip to content

Commit 175de3a

Browse files
committed
Added unstructred data validation tests for Snowflake, Databricks and BigQuery
1 parent ea6215c commit 175de3a

File tree

1 file changed

+82
-0
lines changed

1 file changed

+82
-0
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{% test validate_unstructured_data(model, column_name, expectation_prompt, llm_model_name='claude-3-5-sonnet') %}
2+
{{ config(tags = ['elementary-tests']) }}
3+
{%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
4+
{% set model_relation = elementary.get_model_relation_for_test(model, context["model"]) %}
5+
{% if not model_relation %}
6+
{{ exceptions.raise_compiler_error("Unsupported model: " ~ model ~ " (this might happen if you override 'ref' or 'source')") }}
7+
{% endif %}
8+
9+
{%- set full_table_name = elementary.relation_to_full_name(model_relation) %}
10+
11+
{# Prompt to supply to the LLM #}
12+
{% set prompt_template = "You are an unstructred data validator that should reply with string true if the expectation is met or the string false otherwise. You got the following expectation: " ~ expectation_prompt ~ ". Your only role is to determine if the following text meets this expectation: "%}
13+
14+
{{ elementary.generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name) }}
15+
16+
{%- else %}
17+
18+
{#- test must run an sql query -#}
19+
{{ elementary.no_results_query() }}
20+
21+
{%- endif %}
22+
{% endtest %}
23+
24+
25+
{% macro generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name) %}
26+
{{ return(adapter.dispatch('generate_unstructured_data_validation', 'elementary')(model, column_name, prompt_template, llm_model_name)) }}
27+
{% endmacro %}
28+
29+
{% macro default__generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name) %}
30+
{{ exceptions.raise_compiler_error("Unstructured data validation is not supported for target: " ~ target.type) }}
31+
{% endmacro %}
32+
33+
{% macro snowflake__generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name) %}
34+
with unstructured_data_validation as (
35+
select
36+
snowflake.cortex.complete(
37+
'{{ llm_model_name }}',
38+
concat('{{ prompt_template }}', {{ column_name }}::text)
39+
) as result
40+
from {{ model }}
41+
)
42+
43+
select *
44+
from unstructured_data_validation
45+
where result like '%false%'
46+
{% endmacro %}
47+
48+
{% macro databricks__generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name='databricks-meta-llama-3-3-70b-instruct') %}
49+
with unstructured_data_validation as (
50+
select
51+
ai_query(
52+
'{{ llm_model_name }}',
53+
concat('{{ prompt_template }}', cast({{ column_name }} as string))
54+
) as result
55+
from {{ model }}
56+
)
57+
58+
select *
59+
from unstructured_data_validation
60+
where result like '%false%'
61+
{% endmacro %}
62+
63+
64+
{% macro bigquery__generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name='flash15_model') %}
65+
with unstructured_data_validation as (
66+
SELECT ml_generate_text_llm_result as
67+
FROM
68+
ML.GENERATE_TEXT(
69+
MODEL `{{model.schema}}.{{llm_model_name}}`,
70+
(
71+
SELECT
72+
CONCAT(
73+
'{{ prompt_template }}',
74+
{{column_name}}) AS prompt
75+
FROM {{model}}),
76+
STRUCT(TRUE AS flatten_json_output))
77+
)
78+
79+
select *
80+
from unstructured_data_validation
81+
where result like '%false%'
82+
{% endmacro %}

0 commit comments

Comments
 (0)