Skip to content

Commit 4afdf33

Browse files
authored
Merge pull request #1090 from data-integrations/fix/bigquery-tablename
Updated BigQuery table name validation to work with all valid characters
2 parents 5181f91 + 73e890f commit 4afdf33

File tree

3 files changed

+60
-14
lines changed

3 files changed

+60
-14
lines changed

src/main/java/io/cdap/plugin/gcp/bigquery/connector/BigQueryPath.java

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package io.cdap.plugin.gcp.bigquery.connector;
1818

19+
import java.util.regex.Pattern;
1920
import javax.annotation.Nullable;
2021

2122
/**
@@ -33,7 +34,11 @@ public class BigQueryPath {
3334
private String dataset;
3435
private String table;
3536
private static final int NAME_MAX_LENGTH = 1024;
36-
private static final String VALID_NAME_REGEX = "[\\w]+";
37+
// Valid BigQuery dataset names can contain only letters, numbers, and underscores.
38+
// See here: https://cloud.google.com/bigquery/docs/datasets#dataset-naming
39+
private static final Pattern VALID_DATASET_NAME_REGEX = Pattern.compile("[\\w]+");
40+
// Valid BigQuery table names are defined here: https://cloud.google.com/bigquery/docs/tables#table_naming
41+
private static final Pattern VALID_TABLE_NAME_REGEX = Pattern.compile("[\\p{L}\\p{M}\\p{N}\\p{Pc}\\p{Pd}\\p{Zs}]+");
3742

3843
public BigQueryPath(String path) {
3944
parsePath(path);
@@ -66,31 +71,50 @@ private void parsePath(String path) {
6671
}
6772

6873
dataset = parts[0];
69-
validateName("Dataset" , dataset);
74+
validateDatasetName(dataset);
7075

7176
if (parts.length == 2) {
7277
table = parts[1];
73-
validateName("Table", table);
78+
validateTableName(table);
7479
}
7580
}
7681

7782

7883
/**
79-
* The dataset and table name must contain only letters, numbers, and underscores.
80-
* And it must be 1024 characters or fewer.
84+
* The dataset name can contain only letters, numbers, and underscores, and must be 1024 characters or fewer.
85+
* See here: https://cloud.google.com/bigquery/docs/datasets#dataset-naming
8186
*/
82-
private void validateName(String property, String name) {
87+
private void validateDatasetName(String name) {
8388
if (name.isEmpty()) {
84-
throw new IllegalArgumentException(
85-
String.format("%s should not be empty.", property));
89+
throw new IllegalArgumentException("Dataset should not be empty.");
8690
}
8791
if (name.length() > NAME_MAX_LENGTH) {
8892
throw new IllegalArgumentException(
89-
String.format("%s is invalid, it should contain at most %d characters.", property, NAME_MAX_LENGTH));
93+
String.format("Dataset is invalid, it should contain at most %d characters.", NAME_MAX_LENGTH));
94+
}
95+
if (!VALID_DATASET_NAME_REGEX.matcher(name).matches()) {
96+
throw new IllegalArgumentException("Dataset is invalid, it should contain only letters, numbers, " +
97+
"and underscores.");
98+
}
99+
}
100+
101+
/**
102+
* Table name can contain only Unicode characters in category L (letter), M (mark), N (number),
103+
* Pc (connector, including underscore), Pd (dash), Zs (space).
104+
* It also must be 1024 characters or fewer.
105+
* See here: https://cloud.google.com/bigquery/docs/tables#table_naming
106+
*/
107+
private void validateTableName(String name) {
108+
if (name.isEmpty()) {
109+
throw new IllegalArgumentException("Table should not be empty.");
90110
}
91-
if (!name.matches(VALID_NAME_REGEX)) {
111+
if (name.length() > NAME_MAX_LENGTH) {
92112
throw new IllegalArgumentException(
93-
String.format("%s is invalid, it should contain only letters, numbers, and underscores.", property));
113+
String.format("Table is invalid, it should contain at most %d characters.", NAME_MAX_LENGTH));
114+
}
115+
if (!VALID_TABLE_NAME_REGEX.matcher(name).matches()) {
116+
throw new IllegalArgumentException("Table is invalid, it should only contain Unicode characters in category L " +
117+
"(letter), M (mark), N (number), Pc (connector, including underscore), Pd (dash), Zs (space).");
94118
}
95119
}
96120

src/main/java/io/cdap/plugin/gcp/spanner/connector/SpannerPath.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
package io.cdap.plugin.gcp.spanner.connector;
1818

19+
import java.util.regex.Pattern;
1920
import javax.annotation.Nullable;
2021

2122
/**
@@ -26,7 +27,7 @@ public class SpannerPath {
2627
private String database;
2728
private String table;
2829
private static final int NAME_MAX_LENGTH = 1024;
29-
private static final String VALID_NAME_REGEX = "[\\w-]+";
30+
private static final Pattern VALID_NAME_REGEX = Pattern.compile("[\\w-]+");
3031

3132
public SpannerPath(String path) {
3233
parsePath(path);
@@ -88,7 +89,7 @@ private void validateName(String property, String name) {
8889
throw new IllegalArgumentException(
8990
String.format("%s is invalid, it should contain at most %d characters.", property, NAME_MAX_LENGTH));
9091
}
91-
if (!name.matches(VALID_NAME_REGEX)) {
92+
if (!VALID_NAME_REGEX.matcher(name).matches()) {
9293
throw new IllegalArgumentException(
9394
String.format("%s is invalid, it should contain only letters, numbers, and underscores.", property));
9495
}

src/test/java/io/cdap/plugin/gcp/bigquery/connector/BigQueryPathTest.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,26 @@ public void testValidPath() {
5353
path = new BigQueryPath("/dataset/table/");
5454
Assert.assertEquals("dataset", path.getDataset());
5555
Assert.assertEquals("table", path.getTable());
56+
57+
//table path with space
58+
path = new BigQueryPath("/dataset/table 01");
59+
Assert.assertEquals("dataset", path.getDataset());
60+
Assert.assertEquals("table 01", path.getTable());
61+
62+
//table path with foreign characters
63+
path = new BigQueryPath("/dataset/ग्राहक");
64+
Assert.assertEquals("dataset", path.getDataset());
65+
Assert.assertEquals("ग्राहक", path.getTable());
66+
67+
//table path with foreign characters
68+
path = new BigQueryPath("/dataset/00_お客様");
69+
Assert.assertEquals("dataset", path.getDataset());
70+
Assert.assertEquals("00_お客様", path.getTable());
71+
72+
//table path with an accent and a dash
73+
path = new BigQueryPath("/dataset/étudiant-01");
74+
Assert.assertEquals("dataset", path.getDataset());
75+
Assert.assertEquals("étudiant-01", path.getTable());
5676
}
5777

5878

@@ -87,7 +107,8 @@ public void testInvalidPath() {
87107
() -> new BigQueryPath("/b/" + Strings.repeat('a', 1025)));
88108

89109
//table contains invalid character
90-
Assert.assertThrows("Dataset is invalid, it should contain only letters, numbers, and underscores.",
110+
Assert.assertThrows("Dataset is invalid, it should only contain Unicode characters in category " +
111+
"L (letter), M (mark), N (number), Pc (connector, including underscore), Pd (dash), Zs (space).",
91112
IllegalArgumentException.class, () -> new BigQueryPath("/a/%"));
92113
}
93114
}

0 commit comments

Comments
 (0)