Skip to content

Commit a4ed976

Browse files
derrickawMnkyGns
authored andcommitted
Yaml: Add yaml syntax UT (GoogleCloudPlatform#2872)
* add jinjava library * add dependency for ut * clean up formatting * add yaml syntaxt ut * fix pom conflict * add python unit test for yaml to replace java based * remove java yaml ut * remove old dependencies * remove jinjava * remove comment * fix comment about test case generation
1 parent abfbb0e commit a4ed976

File tree

5 files changed

+159
-5
lines changed

5 files changed

+159
-5
lines changed

pom.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
<maven-checkstyle-plugin.version>3.2.1</maven-checkstyle-plugin.version>
3939
<maven-compiler-plugin.version>3.11.0</maven-compiler-plugin.version>
4040
<maven-dependency-plugin.version>3.8.1</maven-dependency-plugin.version>
41+
<exec-maven-plugin.version>3.6.2</exec-maven-plugin.version>
4142
<maven-enforcer-plugin.version>3.5.0</maven-enforcer-plugin.version>
4243
<extra.enforcer.rules.version>1.10.0</extra.enforcer.rules.version>
4344
<maven-jar-plugin.version>3.3.0</maven-jar-plugin.version>
@@ -442,7 +443,7 @@
442443
<plugin>
443444
<groupId>org.codehaus.mojo</groupId>
444445
<artifactId>exec-maven-plugin</artifactId>
445-
<version>3.6.2</version>
446+
<version>${exec-maven-plugin.version}</version>
446447
<executions>
447448
<execution>
448449
<id>clean-surefire-xml</id>

yaml/pom.xml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,44 @@
8181
</resources>
8282

8383
<plugins>
84+
<plugin>
85+
<groupId>org.codehaus.mojo</groupId>
86+
<artifactId>exec-maven-plugin</artifactId>
87+
<version>${exec-maven-plugin.version}</version>
88+
<executions>
89+
<execution>
90+
<id>pip-install</id>
91+
<phase>test-compile</phase>
92+
<goals>
93+
<goal>exec</goal>
94+
</goals>
95+
<configuration>
96+
<executable>pip</executable>
97+
<arguments>
98+
<argument>install</argument>
99+
<argument>-r</argument>
100+
<argument>src/test/python/requirements-test.txt</argument>
101+
</arguments>
102+
</configuration>
103+
</execution>
104+
<execution>
105+
<id>python-test</id>
106+
<phase>test</phase>
107+
<goals>
108+
<goal>exec</goal>
109+
</goals>
110+
<configuration>
111+
<executable>python</executable>
112+
<workingDirectory>${project.basedir}</workingDirectory>
113+
<arguments>
114+
<argument>-m</argument>
115+
<argument>unittest</argument>
116+
<argument>src/test/python/yaml_syntax_test.py</argument>
117+
</arguments>
118+
</configuration>
119+
</execution>
120+
</executions>
121+
</plugin>
84122
</plugins>
85123
</build>
86124

yaml/src/main/yaml/KafkaToBigQuery.yaml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,23 @@ template:
3434
type: text
3535

3636
- name: "schema"
37-
help: "Kafka schema. A schema is required if data format is JSON, AVRO or PROTO."
37+
help: >
38+
Kafka schema. A schema is required if data format is JSON, AVRO or
39+
PROTO.
3840
required: true
3941
type: text
4042

4143
- name: "numStorageWriteApiStreams"
42-
help: "Number of streams defines the parallelism of the BigQueryIO’s Write."
44+
help: >
45+
Number of streams defines the parallelism of the BigQueryIO’s Write.
4346
required: false
4447
default: 1
4548
type: integer
4649

4750
- name: "storageWriteApiTriggeringFrequencySec"
48-
help: "Triggering frequency will determine how soon the data will be visible for querying in BigQuery."
51+
help: >
52+
Triggering frequency will determine how soon the data will be visible
53+
for querying in BigQuery.
4954
required: false
5055
default: 5
5156
type: integer
@@ -87,4 +92,4 @@ pipeline:
8792
num_streams: {{ numStorageWriteApiStreams | default(1) }}
8893

8994
options:
90-
streaming: true
95+
streaming: true
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
apache-beam[gcp,test]
2+
jinja2
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#
2+
# Copyright (C) 2025 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5+
# use this file except in compliance with the License. You may obtain a copy of
6+
# the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
# License for the specific language governing permissions and limitations under
14+
# the License.
15+
#
16+
17+
import logging
18+
import os
19+
import unittest
20+
21+
import apache_beam as beam
22+
import yaml
23+
from apache_beam.yaml import yaml_transform
24+
from jinja2 import Environment, FileSystemLoader, meta
25+
26+
# Configure logging at the module level to ensure it's set up early.
27+
# This will ensure logs are printed to console.
28+
# Using basicConfig with a format to make logs more informative.
29+
logging.basicConfig(level=logging.DEBUG,
30+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
31+
logging.getLogger('apache_beam').setLevel(logging.DEBUG)
32+
33+
34+
def create_test_method(template_name, yaml_dir):
35+
"""Creates a test method that validates a single YAML template.
36+
37+
This factory function generates a test method that will be dynamically added
38+
to the YamlSyntaxTest class. Each generated test validates a single YAML
39+
template file by rendering it with placeholder values for any Jinja
40+
variables and then validating the resulting YAML against the Beam 'generic'
41+
schema.
42+
43+
Args:
44+
template_name (str): The filename of the YAML template to be tested.
45+
yaml_dir (str): The directory where the YAML templates are located.
46+
47+
Returns:
48+
function: A test method that can be attached to a unittest.TestCase class.
49+
"""
50+
51+
def test_method(self):
52+
self._logger.info(f"Validating {template_name}")
53+
env = Environment(loader=FileSystemLoader(yaml_dir), autoescape=False)
54+
template_source = env.loader.get_source(env, template_name)[0]
55+
56+
# Find all undeclared variables in the template
57+
parsed_content = env.parse(template_source)
58+
undeclared_vars = meta.find_undeclared_variables(parsed_content)
59+
60+
# Use placeholder values for Jinja variables for validation purposes
61+
context = {var: 'placeholder' for var in undeclared_vars}
62+
template = env.get_template(template_name)
63+
rendered_yaml = template.render(context)
64+
self._logger.debug(f"Rendered YAML for {template_name}:\n{rendered_yaml}...")
65+
66+
self._logger.debug(f"Loading YAML into Beam pipeline_spec: {template_name}")
67+
pipeline_spec = yaml.load(rendered_yaml, Loader=yaml_transform.SafeLineLoader)
68+
69+
# Validate the pipeline spec against the generic schema without trying to
70+
# expand the transforms, which avoids the need for expansion services.
71+
yaml_transform.validate_against_schema(pipeline_spec, 'generic')
72+
self._logger.info(f"Successfully validated YAML syntax for: {template_name}")
73+
74+
return test_method
75+
76+
77+
class YamlSyntaxTest(unittest.TestCase):
78+
"""A test suite for validating the syntax of Beam YAML templates.
79+
80+
This class is dynamically populated with test methods, one for each
81+
.yaml file found in the `src/main/yaml` directory. This is accomplished
82+
by the `_create_tests` function, which runs at module-load time.
83+
"""
84+
_logger = logging.getLogger(__name__)
85+
86+
87+
def _create_tests():
88+
"""Discovers all YAML templates and dynamically creates a test for each.
89+
90+
This function scans the `src/main/yaml` directory for `.yaml` files and,
91+
for each file, generates a unique test method on the `YamlSyntaxTest` class.
92+
This allows `unittest` or `pytest` to discover and run each validation as a
93+
separate test case, making it easy to identify which template is invalid.
94+
"""
95+
yaml_dir = os.path.join(os.path.dirname(__file__), '../../main/yaml')
96+
if not os.path.isdir(yaml_dir):
97+
return
98+
99+
env = Environment(loader=FileSystemLoader(yaml_dir))
100+
for template_name in env.list_templates(filter_func=lambda x: x.endswith('.yaml')):
101+
test_name = f"test_{template_name.replace('.yaml', '').replace('-', '_')}"
102+
test_method = create_test_method(template_name, yaml_dir)
103+
setattr(YamlSyntaxTest, test_name, test_method)
104+
105+
_create_tests()
106+
107+
if __name__ == '__main__':
108+
unittest.main()

0 commit comments

Comments
 (0)