diff --git a/sdks/python/apache_beam/yaml/examples/transforms/sql/calcite_connection_properties.yaml b/sdks/python/apache_beam/yaml/examples/transforms/sql/calcite_connection_properties.yaml new file mode 100644 index 000000000000..9474e5e77d36 --- /dev/null +++ b/sdks/python/apache_beam/yaml/examples/transforms/sql/calcite_connection_properties.yaml @@ -0,0 +1,41 @@ +# SQL transform — how to provide calcite_connection_properties +# +# This example shows how to provide Calcite connection properties (for +# example to enable PostgreSQL-specific functions) to a YAML pipeline. +# +# The connection properties can be provided under the top-level `options:` +# key. Most of the time you can provide them as normal YAML mappings. +# Some environments may expect a JSON-formatted string instead; both forms +# are shown below. + +pipeline: + transforms: + - name: Source + type: Create + config: + elements: + - {a: "x", b: 1} + - {a: "x", b: 2} + - {a: "x", b: 3} + - {a: "y", b: 10} + - name: Transform + type: Sql + config: + query: "SELECT STRING_TO_ARRAY('abc def g', ' ') as col_name" + input: Source + - name: Sink + type: LogForTesting + input: Transform + config: + level: INFO + +# Preferred: pass connection properties as YAML mapping +options: + calcite_connection_properties: + fun: postgresql + +# Alternative: pass as a JSON string (useful if your runner or expansion +# service expects a stringified JSON). Note the quoting. +# +# options: +# calcite_connection_properties: '{"fun": "postgresql"}' diff --git a/sdks/python/apache_beam/yaml/generate_yaml_docs.py b/sdks/python/apache_beam/yaml/generate_yaml_docs.py index 8e7471c90de9..8126e414d0b3 100644 --- a/sdks/python/apache_beam/yaml/generate_yaml_docs.py +++ b/sdks/python/apache_beam/yaml/generate_yaml_docs.py @@ -229,34 +229,56 @@ def add_transform_links(transform, description, provider_list): def transform_docs(transform_base, transforms, providers, extra_docs=''): + # Allow adding transform-specific extra documentation. For example, the + # SQL transform needs a short callout on how to provide calcite connection + # properties via the YAML `options:` section. + extra_docs_out = extra_docs or '' + if transform_base.lower() == 'sql': + callout = ( + "**Note on Calcite connection properties**: Some SQL functions and " + "dialect-specific behavior are controlled by Calcite connection " + "properties. In Beam YAML pipelines you can provide these under the " + "top-level `options:` key. For example (preferred as YAML mapping):\n\n" + " :::yaml\n\n" + " options:\n" + " calcite_connection_properties:\n" + " fun: postgresql\n\n" + "If your environment expects a JSON string, you can also provide the " + "properties as a JSON-formatted string (note the quoting):\n\n" + " :::yaml\n\n" + " options:\n" + " calcite_connection_properties: '{\"fun\": \"postgresql\"}'\n" + ) + extra_docs_out = (extra_docs_out + "\n\n" if extra_docs_out else "") + callout + return '\n'.join([ - f'## {transform_base}', - '', - longest( - lambda t: longest( - lambda p: add_transform_links( - t, p.description(t), providers.keys()), providers[t]), - transforms).replace('::\n', '\n\n :::yaml\n'), - '', - extra_docs, - '', - '### Configuration', - '', + f'## {transform_base}', + '', + longest( + lambda t: longest( + lambda p: add_transform_links( + t, p.description(t), providers.keys()), providers[t]), + transforms).replace('::\n', '\n\n :::yaml\n'), + '', + extra_docs_out, + '', + '### Configuration', + '', + longest( + lambda t: longest( + lambda p: config_docs(p.config_schema(t)), providers[t]), + transforms), + '', + '### Usage', + '', + ' :::yaml', + '', + indent( longest( - lambda t: longest( - lambda p: config_docs(p.config_schema(t)), providers[t]), - transforms), - '', - '### Usage', - '', - ' :::yaml', - '', - indent( - longest( - lambda t: longest( - lambda p: pretty_example(p, t, transform_base), providers[t]), - transforms), - 4), + lambda t: longest( + lambda p: pretty_example(p, t, transform_base), providers[t]), + transforms), + 4), ])