1+ {% test validate_unstructured_data(model, column_name, expectation_prompt, llm_model_name= ' claude-3-5-sonnet' ) %}
2+ {{ config(tags = [' elementary-tests' ]) }}
3+ {%- if execute and elementary .is_test_command () and elementary .is_elementary_enabled () %}
4+ {% set model_relation = elementary .get_model_relation_for_test (model, context[" model" ]) %}
5+ {% if not model_relation %}
6+ {{ exceptions .raise_compiler_error (" Unsupported model: " ~ model ~ " (this might happen if you override 'ref' or 'source')" ) }}
7+ {% endif %}
8+
9+ {%- set full_table_name = elementary .relation_to_full_name (model_relation) %}
10+
11+ {# Prompt to supply to the LLM #}
12+ {% set prompt_template = " You are an unstructred data validator that should reply with string true if the expectation is met or the string false otherwise. You got the following expectation: " ~ expectation_prompt ~ " . Your only role is to determine if the following text meets this expectation: " %}
13+
14+ {{ elementary .generate_unstructured_data_validation (model, column_name, prompt_template, llm_model_name) }}
15+
16+ {%- else %}
17+
18+ {# - test must run an sql query -#}
19+ {{ elementary .no_results_query () }}
20+
21+ {%- endif %}
22+ {% endtest %}
23+
24+
25+ {% macro generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name) %}
26+ {{ return(adapter .dispatch (' generate_unstructured_data_validation' , ' elementary' )(model, column_name, prompt_template, llm_model_name)) }}
27+ {% endmacro %}
28+
29+ {% macro default__generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name) %}
30+ {{ exceptions .raise_compiler_error (" Unstructured data validation is not supported for target: " ~ target .type ) }}
31+ {% endmacro %}
32+
33+ {% macro snowflake__generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name) %}
34+ with unstructured_data_validation as (
35+ select
36+ snowflake .cortex .complete(
37+ ' {{ llm_model_name }}' ,
38+ concat(' {{ prompt_template }}' , {{ column_name }}::text )
39+ ) as result
40+ from {{ model }}
41+ )
42+
43+ select *
44+ from unstructured_data_validation
45+ where result like ' %false%'
46+ {% endmacro %}
47+
48+ {% macro databricks__generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name= ' databricks-meta-llama-3-3-70b-instruct' ) %}
49+ with unstructured_data_validation as (
50+ select
51+ ai_query(
52+ ' {{ llm_model_name }}' ,
53+ concat(' {{ prompt_template }}' , cast({{ column_name }} as string))
54+ ) as result
55+ from {{ model }}
56+ )
57+
58+ select *
59+ from unstructured_data_validation
60+ where result like ' %false%'
61+ {% endmacro %}
62+
63+
64+ {% macro bigquery__generate_unstructured_data_validation(model, column_name, prompt_template, llm_model_name= ' flash15_model' ) %}
65+ with unstructured_data_validation as (
66+ SELECT ml_generate_text_llm_result as
67+ FROM
68+ ML .GENERATE_TEXT (
69+ MODEL ` {{model.schema}}.{{llm_model_name}}` ,
70+ (
71+ SELECT
72+ CONCAT(
73+ ' {{ prompt_template }}' ,
74+ {{column_name}}) AS prompt
75+ FROM {{model}}),
76+ STRUCT(TRUE AS flatten_json_output))
77+ )
78+
79+ select *
80+ from unstructured_data_validation
81+ where result like ' %false%'
82+ {% endmacro %}
0 commit comments