Skip to content

Commit 3b10ee1

Browse files
authored
Merge branch 'main' into feat-374142081-add-source-column-match
2 parents b7eabe1 + 289446d commit 3b10ee1

File tree

5 files changed

+77
-0
lines changed

5 files changed

+77
-0
lines changed

google/cloud/bigquery/external_config.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,27 @@ def source_column_match(self, value: Optional[SourceColumnMatch]):
508508
)
509509
self._properties["sourceColumnMatch"] = value.value if value else None
510510

511+
@property
512+
def null_markers(self) -> Optional[Iterable[str]]:
513+
"""Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file.
514+
515+
.. note::
516+
null_marker and null_markers can't be set at the same time.
517+
If null_marker is set, null_markers has to be not set.
518+
If null_markers is set, null_marker has to be not set.
519+
If both null_marker and null_markers are set at the same time, a user error would be thrown.
520+
Any strings listed in null_markers, including empty string would be interpreted as SQL NULL.
521+
This applies to all column types.
522+
523+
See
524+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers
525+
"""
526+
return self._properties.get("nullMarkers")
527+
528+
@null_markers.setter
529+
def null_markers(self, value: Optional[Iterable[str]]):
530+
self._properties["nullMarkers"] = value
531+
511532
def to_api_repr(self) -> dict:
512533
"""Build an API representation of this object.
513534

google/cloud/bigquery/job/load.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,27 @@ def null_marker(self):
387387
def null_marker(self, value):
388388
self._set_sub_prop("nullMarker", value)
389389

390+
@property
391+
def null_markers(self) -> Optional[List[str]]:
392+
"""Optional[List[str]]: A list of strings represented as SQL NULL values in a CSV file.
393+
394+
.. note::
395+
null_marker and null_markers can't be set at the same time.
396+
If null_marker is set, null_markers has to be not set.
397+
If null_markers is set, null_marker has to be not set.
398+
If both null_marker and null_markers are set at the same time, a user error would be thrown.
399+
Any strings listed in null_markers, including empty string would be interpreted as SQL NULL.
400+
This applies to all column types.
401+
402+
See:
403+
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers
404+
"""
405+
return self._get_sub_prop("nullMarkers")
406+
407+
@null_markers.setter
408+
def null_markers(self, value: Optional[List[str]]):
409+
self._set_sub_prop("nullMarkers", value)
410+
390411
@property
391412
def preserve_ascii_control_characters(self):
392413
"""Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV.
@@ -888,6 +909,13 @@ def null_marker(self):
888909
"""
889910
return self.configuration.null_marker
890911

912+
@property
913+
def null_markers(self):
914+
"""See
915+
:attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`.
916+
"""
917+
return self.configuration.null_markers
918+
891919
@property
892920
def quote_character(self):
893921
"""See

tests/unit/job/test_load.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def _setUpConstants(self):
4444
self.TIME_ZONE = "UTC"
4545
self.TIME_FORMAT = "%H:%M:%S"
4646
self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ"
47+
self.NULL_MARKERS = ["", "NA"]
4748

4849
def _make_resource(self, started=False, ended=False):
4950
resource = super(TestLoadJob, self)._make_resource(started, ended)
@@ -55,6 +56,7 @@ def _make_resource(self, started=False, ended=False):
5556
config["timeZone"] = self.TIME_ZONE
5657
config["timeFormat"] = self.TIME_FORMAT
5758
config["timestampFormat"] = self.TIMESTAMP_FORMAT
59+
config["nullMarkers"] = self.NULL_MARKERS
5860

5961
config["destinationTable"] = {
6062
"projectId": self.PROJECT,
@@ -143,6 +145,10 @@ def _verifyResourceProperties(self, job, resource):
143145
self.assertEqual(job.null_marker, config["nullMarker"])
144146
else:
145147
self.assertIsNone(job.null_marker)
148+
if "nullMarkers" in config:
149+
self.assertEqual(job.null_markers, config["nullMarkers"])
150+
else:
151+
self.assertIsNone(job.null_markers)
146152
if "quote" in config:
147153
self.assertEqual(job.quote_character, config["quote"])
148154
else:
@@ -223,6 +229,7 @@ def test_ctor(self):
223229
self.assertIsNone(job.ignore_unknown_values)
224230
self.assertIsNone(job.max_bad_records)
225231
self.assertIsNone(job.null_marker)
232+
self.assertIsNone(job.null_markers)
226233
self.assertIsNone(job.quote_character)
227234
self.assertIsNone(job.skip_leading_rows)
228235
self.assertIsNone(job.source_format)

tests/unit/job/test_load_config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,22 @@ def test_null_marker_setter(self):
469469
config.null_marker = null_marker
470470
self.assertEqual(config._properties["load"]["nullMarker"], null_marker)
471471

472+
def test_null_markers_missing(self):
473+
config = self._get_target_class()()
474+
self.assertIsNone(config.null_markers)
475+
476+
def test_null_markers_hit(self):
477+
null_markers = ["", "NA"]
478+
config = self._get_target_class()()
479+
config._properties["load"]["nullMarkers"] = null_markers
480+
self.assertEqual(config.null_markers, null_markers)
481+
482+
def test_null_markers_setter(self):
483+
null_markers = ["", "NA"]
484+
config = self._get_target_class()()
485+
config.null_markers = null_markers
486+
self.assertEqual(config._properties["load"]["nullMarkers"], null_markers)
487+
472488
def test_preserve_ascii_control_characters_missing(self):
473489
config = self._get_target_class()()
474490
self.assertIsNone(config.preserve_ascii_control_characters)

tests/unit/test_external_config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ def test_from_api_repr_csv(self):
280280
"encoding": "encoding",
281281
"preserveAsciiControlCharacters": False,
282282
"sourceColumnMatch": self.SOURCE_COLUMN_MATCH,
283+
"nullMarkers": ["", "NA"],
283284
},
284285
},
285286
)
@@ -300,6 +301,7 @@ def test_from_api_repr_csv(self):
300301
ec.options.source_column_match,
301302
self.SOURCE_COLUMN_MATCH,
302303
)
304+
self.assertEqual(ec.options.null_markers, ["", "NA"])
303305

304306
got_resource = ec.to_api_repr()
305307

@@ -322,6 +324,8 @@ def test_to_api_repr_csv(self):
322324
options.allow_jagged_rows = False
323325
options.preserve_ascii_control_characters = False
324326
options.source_column_match = self.SOURCE_COLUMN_MATCH
327+
options.null_markers = ["", "NA"]
328+
325329
ec.csv_options = options
326330

327331
exp_resource = {
@@ -335,6 +339,7 @@ def test_to_api_repr_csv(self):
335339
"encoding": "encoding",
336340
"preserveAsciiControlCharacters": False,
337341
"sourceColumnMatch": self.SOURCE_COLUMN_MATCH,
342+
"nullMarkers": ["", "NA"],
338343
},
339344
}
340345

0 commit comments

Comments
 (0)