1616
1717package io .cdap .plugin .gcp .bigquery .connector ;
1818
19+ import java .util .regex .Pattern ;
1920import javax .annotation .Nullable ;
2021
2122/**
@@ -33,7 +34,11 @@ public class BigQueryPath {
3334 private String dataset ;
3435 private String table ;
3536 private static final int NAME_MAX_LENGTH = 1024 ;
36- private static final String VALID_NAME_REGEX = "[\\ w]+" ;
37+ // Valid BigQuery dataset names can contain only letters, numbers, and underscores.
38+ // See here: https://cloud.google.com/bigquery/docs/datasets#dataset-naming
39+ private static final Pattern VALID_DATASET_NAME_REGEX = Pattern .compile ("[\\ w]+" );
40+ // Valid BigQuery table names are defined here: https://cloud.google.com/bigquery/docs/tables#table_naming
41+ private static final Pattern VALID_TABLE_NAME_REGEX = Pattern .compile ("[\\ p{L}\\ p{M}\\ p{N}\\ p{Pc}\\ p{Pd}\\ p{Zs}]+" );
3742
3843 public BigQueryPath (String path ) {
3944 parsePath (path );
@@ -66,31 +71,50 @@ private void parsePath(String path) {
6671 }
6772
6873 dataset = parts [0 ];
69- validateName ( "Dataset" , dataset );
74+ validateDatasetName ( dataset );
7075
7176 if (parts .length == 2 ) {
7277 table = parts [1 ];
73- validateName ( "Table" , table );
78+ validateTableName ( table );
7479 }
7580 }
7681
7782
7883 /**
79- * The dataset and table name must contain only letters, numbers, and underscores.
80- * And it must be 1024 characters or fewer.
84+ * The dataset name can contain only letters, numbers, and underscores, and must be 1024 characters or fewer .
85+ * See here: https://cloud.google.com/bigquery/docs/datasets#dataset-naming
8186 */
82- private void validateName ( String property , String name ) {
87+ private void validateDatasetName ( String name ) {
8388 if (name .isEmpty ()) {
84- throw new IllegalArgumentException (
85- String .format ("%s should not be empty." , property ));
89+ throw new IllegalArgumentException ("Dataset should not be empty." );
8690 }
8791 if (name .length () > NAME_MAX_LENGTH ) {
8892 throw new IllegalArgumentException (
89- String .format ("%s is invalid, it should contain at most %d characters." , property , NAME_MAX_LENGTH ));
93+ String .format ("Dataset is invalid, it should contain at most %d characters." , NAME_MAX_LENGTH ));
94+ }
95+ if (!VALID_DATASET_NAME_REGEX .matcher (name ).matches ()) {
96+ throw new IllegalArgumentException ("Dataset is invalid, it should contain only letters, numbers, " +
97+ "and underscores." );
98+ }
99+ }
100+
101+ /**
102+ * Table name can contain only Unicode characters in category L (letter), M (mark), N (number),
103+ * Pc (connector, including underscore), Pd (dash), Zs (space).
104+ * It also must be 1024 characters or fewer.
105+ * See here: https://cloud.google.com/bigquery/docs/tables#table_naming
106+ */
107+ private void validateTableName (String name ) {
108+ if (name .isEmpty ()) {
109+ throw new IllegalArgumentException ("Table should not be empty." );
90110 }
91- if (! name .matches ( VALID_NAME_REGEX ) ) {
111+ if (name .length () > NAME_MAX_LENGTH ) {
92112 throw new IllegalArgumentException (
93- String .format ("%s is invalid, it should contain only letters, numbers, and underscores." , property ));
113+ String .format ("Table is invalid, it should contain at most %d characters." , NAME_MAX_LENGTH ));
114+ }
115+ if (!VALID_TABLE_NAME_REGEX .matcher (name ).matches ()) {
116+ throw new IllegalArgumentException ("Table is invalid, it should only contain Unicode characters in category L " +
117+ "(letter), M (mark), N (number), Pc (connector, including underscore), Pd (dash), Zs (space)." );
94118 }
95119 }
96120
0 commit comments