diff --git a/sdks/python/apache_beam/yaml/examples/transforms/sql/sql_calcite_connection_properties.yaml b/sdks/python/apache_beam/yaml/examples/transforms/sql/sql_calcite_connection_properties.yaml new file mode 100644 index 000000000000..ea75af64eb2d --- /dev/null +++ b/sdks/python/apache_beam/yaml/examples/transforms/sql/sql_calcite_connection_properties.yaml @@ -0,0 +1,51 @@ +# coding=utf-8 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This example demonstrates using calcite_connection_properties to enable +# dialect-specific SQL functions. By setting "fun" to "postgresql", you can +# use PostgreSQL-specific functions like STRING_TO_ARRAY in your SQL queries. +# +# The calcite_connection_properties option is passed to the underlying Calcite +# SQL engine to configure its behavior. Common properties include: +# - "fun": SQL function library to use ("standard", "postgresql", "bigquery", etc.) +# - "lex": Lexical policy for identifiers +# +# See Apache Calcite documentation for all available connection properties. +pipeline: + transforms: + - type: Create + name: CreateSampleData + config: + elements: + - {id: 1, tags: "java python go"} + - {id: 2, tags: "rust cpp"} + - {id: 3, tags: "javascript typescript"} + - type: Sql + name: TransformWithPostgresFunction + input: CreateSampleData + config: + query: "SELECT id, STRING_TO_ARRAY(tags, ' ') as tag_list FROM PCOLLECTION" + - type: LogForTesting + input: TransformWithPostgresFunction + +options: + calcite_connection_properties: {"fun": "postgresql"} + +# Expected: +# Row(id=1, tag_list=['java', 'python', 'go']) +# Row(id=2, tag_list=['rust', 'cpp']) +# Row(id=3, tag_list=['javascript', 'typescript']) diff --git a/sdks/python/apache_beam/yaml/tests/sql.yaml b/sdks/python/apache_beam/yaml/tests/sql.yaml index 0040a2790c54..1f0ddeb1cd1d 100644 --- a/sdks/python/apache_beam/yaml/tests/sql.yaml +++ b/sdks/python/apache_beam/yaml/tests/sql.yaml @@ -93,3 +93,26 @@ pipelines: - type: PyTransform config: constructor: apache_beam.transforms.util.LogElements + + # Test calcite_connection_properties option with PostgreSQL functions + - pipeline: + type: chain + transforms: + - type: Create + name: CreateData + config: + elements: + - {id: 1, text: "hello world"} + - {id: 2, text: "foo bar baz"} + - type: Sql + name: SqlWithPostgresFunction + config: + query: "SELECT id, INITCAP(text) as title_text FROM PCOLLECTION" + - type: AssertEqual + config: + elements: + - {id: 1, title_text: "Hello World"} + - {id: 2, title_text: "Foo Bar Baz"} + options: + calcite_connection_properties: + fun: "postgresql"