@@ -418,7 +418,8 @@ def _wordcount_test_preprocessor(
418418 env .input_file ('kinglear.txt' , '\n ' .join (lines )))
419419
420420
421- @YamlExamplesTestSuite .register_test_preprocessor ('test_kafka_yaml' )
421+ @YamlExamplesTestSuite .register_test_preprocessor (
422+ ['test_kafka_yaml' , 'test_kafka_to_iceberg_yaml' ])
422423def _kafka_test_preprocessor (
423424 test_spec : dict , expected : List [str ], env : TestEnvironment ):
424425
@@ -448,7 +449,15 @@ def _kafka_test_preprocessor(
448449 'test_pubsub_topic_to_bigquery_yaml' ,
449450 'test_pubsub_subscription_to_bigquery_yaml' ,
450451 'test_jdbc_to_bigquery_yaml' ,
451- 'test_spanner_to_avro_yaml'
452+ 'test_spanner_to_avro_yaml' ,
453+ 'test_gcs_text_to_bigquery_yaml' ,
454+ 'test_sqlserver_to_bigquery_yaml' ,
455+ 'test_postgres_to_bigquery_yaml' ,
456+ 'test_kafka_to_iceberg_yaml' ,
457+ 'test_pubsub_to_iceberg_yaml' ,
458+ 'test_oracle_to_bigquery_yaml' ,
459+ 'test_mysql_to_bigquery_yaml' ,
460+ 'test_spanner_to_bigquery_yaml'
452461])
453462def _io_write_test_preprocessor (
454463 test_spec : dict , expected : List [str ], env : TestEnvironment ):
@@ -482,8 +491,11 @@ def _io_write_test_preprocessor(
482491 return test_spec
483492
484493
485- @YamlExamplesTestSuite .register_test_preprocessor (
486- ['test_simple_filter_yaml' , 'test_simple_filter_and_combine_yaml' ])
494+ @YamlExamplesTestSuite .register_test_preprocessor ([
495+ 'test_simple_filter_yaml' ,
496+ 'test_simple_filter_and_combine_yaml' ,
497+ 'test_gcs_text_to_bigquery_yaml'
498+ ])
487499def _file_io_read_test_preprocessor (
488500 test_spec : dict , expected : List [str ], env : TestEnvironment ):
489501 """
@@ -560,7 +572,8 @@ def _iceberg_io_read_test_preprocessor(
560572@YamlExamplesTestSuite .register_test_preprocessor ([
561573 'test_spanner_read_yaml' ,
562574 'test_enrich_spanner_with_bigquery_yaml' ,
563- "test_spanner_to_avro_yaml"
575+ 'test_spanner_to_avro_yaml' ,
576+ 'test_spanner_to_bigquery_yaml'
564577])
565578def _spanner_io_read_test_preprocessor (
566579 test_spec : dict , expected : List [str ], env : TestEnvironment ):
@@ -642,13 +655,13 @@ def _enrichment_test_preprocessor(
642655
643656@YamlExamplesTestSuite .register_test_preprocessor ([
644657 'test_pubsub_topic_to_bigquery_yaml' ,
645- 'test_pubsub_subscription_to_bigquery_yaml'
658+ 'test_pubsub_subscription_to_bigquery_yaml' ,
659+ 'test_pubsub_to_iceberg_yaml'
646660])
647661def _pubsub_io_read_test_preprocessor (
648662 test_spec : dict , expected : List [str ], env : TestEnvironment ):
649663 """
650664 Preprocessor for tests that involve reading from Pub/Sub.
651-
652665 This preprocessor replaces any ReadFromPubSub transform with a Create
653666 transform that reads from a predefined in-memory list of messages.
654667 This allows the test to verify the pipeline's correctness without relying
@@ -668,27 +681,91 @@ def _pubsub_io_read_test_preprocessor(
668681def _jdbc_io_read_test_preprocessor (
669682 test_spec : dict , expected : List [str ], env : TestEnvironment ):
670683 """
671- Preprocessor for tests that involve reading from JDBC.
684+ Preprocessor for tests that involve reading from generic Jdbc.
685+ url syntax: 'jdbc:<database-type>://<host>:<port>/<database>'
686+ """
687+ return _db_io_read_test_processor (
688+ test_spec , lambda url : url .split ('/' )[- 1 ], 'Jdbc' )
672689
673- This preprocessor replaces any ReadFromJdbc transform with a Create
674- transform that reads from a predefined in-memory list of records.
675- This allows the test to verify the pipeline's correctness without
676- relying on an active JDBC connection.
690+
691+ @YamlExamplesTestSuite .register_test_preprocessor ([
692+ 'test_sqlserver_to_bigquery_yaml' ,
693+ ])
694+ def __sqlserver_io_read_test_preprocessor (
695+ test_spec : dict , expected : List [str ], env : TestEnvironment ):
696+ """
697+ Preprocessor for tests that involve reading from SqlServer.
698+ url syntax: 'jdbc:sqlserver://<host>:<port>;databaseName=<database>;
699+ user=<user>;password=<password>;encrypt=false;trustServerCertificate=true'
700+ """
701+ return _db_io_read_test_processor (
702+ test_spec , lambda url : url .split (';' )[1 ].split ('=' )[- 1 ], 'SqlServer' )
703+
704+
705+ @YamlExamplesTestSuite .register_test_preprocessor ([
706+ 'test_postgres_to_bigquery_yaml' ,
707+ ])
708+ def __postgres_io_read_test_preprocessor (
709+ test_spec : dict , expected : List [str ], env : TestEnvironment ):
710+ """
711+ Preprocessor for tests that involve reading from Postgres.
712+ url syntax: 'jdbc:postgresql://<host>:<port>/shipment?user=<user>&
713+ password=<password>'
714+ """
715+ return _db_io_read_test_processor (
716+ test_spec , lambda url : url .split ('/' )[3 ].split ('?' )[0 ], 'Postgres' )
717+
718+
719+ @YamlExamplesTestSuite .register_test_preprocessor ([
720+ 'test_oracle_to_bigquery_yaml' ,
721+ ])
722+ def __oracle_io_read_test_preprocessor (
723+ test_spec : dict , expected : List [str ], env : TestEnvironment ):
724+ """
725+ Preprocessor for tests that involve reading from Oracle.
726+ url syntax: 'jdbc:oracle:thin:system/oracle@<host>:{port}/<database>'
727+ """
728+ return _db_io_read_test_processor (
729+ test_spec , lambda url : url .split ('/' )[2 ], 'Oracle' )
730+
731+
732+ @YamlExamplesTestSuite .register_test_preprocessor ([
733+ 'test_mysql_to_bigquery_yaml' ,
734+ ])
735+ def __mysql_io_read_test_preprocessor (
736+ test_spec : dict , expected : List [str ], env : TestEnvironment ):
737+ """
738+ Preprocessor for tests that involve reading from MySql.
739+ url syntax: 'jdbc:mysql://<host>:<port>/<database>?user=<user>&
740+ password=<password>'
741+ """
742+ return _db_io_read_test_processor (
743+ test_spec , lambda url : url .split ('/' )[3 ].split ('?' )[0 ], 'MySql' )
744+
745+
746+ def _db_io_read_test_processor (
747+ test_spec : dict , database_url_fn : Callable , database_type : str ):
748+ """
749+ This preprocessor replaces any ReadFrom<database> transform with a Create
750+ transform that reads from a predefined in-memory list of records. This allows
751+ the test to verify the pipeline's correctness without relying on an active
752+ database.
677753 """
678754 if pipeline := test_spec .get ('pipeline' , None ):
679755 for transform in pipeline .get ('transforms' , []):
680- if transform .get ('type' , '' ).startswith ('ReadFromJdbc' ):
756+ transform_name = f"ReadFrom{ database_type } "
757+ if transform .get ('type' , '' ).startswith (transform_name ):
681758 config = transform ['config' ]
682759 url = config ['url' ]
683- database = url . split ( '/' )[ - 1 ]
760+ database = database_url_fn ( url )
684761 if (table := config .get ('table' , None )) is None :
685762 table = config .get ('query' , '' ).split ('FROM' )[- 1 ].strip ()
686763 transform ['type' ] = 'Create'
687764 transform ['config' ] = {
688765 k : v
689766 for k , v in config .items () if k .startswith ('__' )
690767 }
691- elements = INPUT_TABLES [("Jdbc" , database , table )]
768+ elements = INPUT_TABLES [(database_type , database , table )]
692769 if config .get ('query' , None ):
693770 config ['query' ].replace ('select ' ,
694771 'SELECT ' ).replace (' from ' , ' FROM ' )
@@ -705,17 +782,24 @@ def _jdbc_io_read_test_preprocessor(
705782 return test_spec
706783
707784
708- INPUT_FILES = {'products.csv' : input_data .products_csv ()}
785+ INPUT_FILES = {
786+ 'products.csv' : input_data .products_csv (),
787+ 'kinglear.txt' : input_data .text_data ()
788+ }
789+
709790INPUT_TABLES = {
710- ('shipment-test' , 'shipment' , 'shipments' ): input_data .
711- spanner_shipments_data (),
791+ ('shipment-test' , 'shipment' , 'shipments' ): input_data .shipments_data (),
712792 ('orders-test' , 'order-database' , 'orders' ): input_data .
713793 spanner_orders_data (),
714794 ('db' , 'users' , 'NY' ): input_data .iceberg_dynamic_destinations_users_data (),
715795 ('BigTable' , 'beam-test' , 'bigtable-enrichment-test' ): input_data .
716796 bigtable_data (),
717797 ('BigQuery' , 'ALL_TEST' , 'customers' ): input_data .bigquery_data (),
718- ('Jdbc' , 'shipment' , 'shipments' ): input_data .jdbc_shipments_data ()
798+ ('Jdbc' , 'shipment' , 'shipments' ): input_data .shipments_data (),
799+ ('SqlServer' , 'shipment' , 'shipments' ): input_data .shipments_data (),
800+ ('Postgres' , 'shipment' , 'shipments' ): input_data .shipments_data (),
801+ ('Oracle' , 'shipment' , 'shipments' ): input_data .shipments_data (),
802+ ('MySql' , 'shipment' , 'shipments' ): input_data .shipments_data ()
719803}
720804YAML_DOCS_DIR = os .path .join (os .path .dirname (__file__ ))
721805
0 commit comments