Skip to content

Commit 289446d

Browse files
feat: Add null_markers property to LoadJobConfig and CSVOptions (#2239)
* feat: Add null_markers property to LoadJobConfig and CSVOptions * feat: adds null_markers as a load and external_config option --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
1 parent 69a2c2b commit 289446d

File tree

5 files changed

+76
-0
lines changed

5 files changed

+76
-0
lines changed

google/cloud/bigquery/external_config.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,27 @@ def skip_leading_rows(self):
474474
def skip_leading_rows(self, value):
475475
self._properties["skipLeadingRows"] = str(value)
476476

477+
@property
478+
def null_markers(self) -> Optional[Iterable[str]]:
479+
"""Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file.
480+
481+
.. note::
482+
null_marker and null_markers can't be set at the same time.
483+
If null_marker is set, null_markers has to be not set.
484+
If null_markers is set, null_marker has to be not set.
485+
If both null_marker and null_markers are set at the same time, a user error would be thrown.
486+
Any strings listed in null_markers, including empty string would be interpreted as SQL NULL.
487+
This applies to all column types.
488+
489+
See
490+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers
491+
"""
492+
return self._properties.get("nullMarkers")
493+
494+
@null_markers.setter
495+
def null_markers(self, value: Optional[Iterable[str]]):
496+
self._properties["nullMarkers"] = value
497+
477498
def to_api_repr(self) -> dict:
478499
"""Build an API representation of this object.
479500

google/cloud/bigquery/job/load.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,27 @@ def null_marker(self):
386386
def null_marker(self, value):
387387
self._set_sub_prop("nullMarker", value)
388388

389+
@property
390+
def null_markers(self) -> Optional[List[str]]:
391+
"""Optional[List[str]]: A list of strings represented as SQL NULL values in a CSV file.
392+
393+
.. note::
394+
null_marker and null_markers can't be set at the same time.
395+
If null_marker is set, null_markers has to be not set.
396+
If null_markers is set, null_marker has to be not set.
397+
If both null_marker and null_markers are set at the same time, a user error would be thrown.
398+
Any strings listed in null_markers, including empty string would be interpreted as SQL NULL.
399+
This applies to all column types.
400+
401+
See:
402+
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers
403+
"""
404+
return self._get_sub_prop("nullMarkers")
405+
406+
@null_markers.setter
407+
def null_markers(self, value: Optional[List[str]]):
408+
self._set_sub_prop("nullMarkers", value)
409+
389410
@property
390411
def preserve_ascii_control_characters(self):
391412
"""Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV.
@@ -854,6 +875,13 @@ def null_marker(self):
854875
"""
855876
return self.configuration.null_marker
856877

878+
@property
879+
def null_markers(self):
880+
"""See
881+
:attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`.
882+
"""
883+
return self.configuration.null_markers
884+
857885
@property
858886
def quote_character(self):
859887
"""See

tests/unit/job/test_load.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def _setUpConstants(self):
4242
self.TIME_ZONE = "UTC"
4343
self.TIME_FORMAT = "%H:%M:%S"
4444
self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ"
45+
self.NULL_MARKERS = ["", "NA"]
4546

4647
def _make_resource(self, started=False, ended=False):
4748
resource = super(TestLoadJob, self)._make_resource(started, ended)
@@ -52,6 +53,7 @@ def _make_resource(self, started=False, ended=False):
5253
config["timeZone"] = self.TIME_ZONE
5354
config["timeFormat"] = self.TIME_FORMAT
5455
config["timestampFormat"] = self.TIMESTAMP_FORMAT
56+
config["nullMarkers"] = self.NULL_MARKERS
5557

5658
config["destinationTable"] = {
5759
"projectId": self.PROJECT,
@@ -140,6 +142,10 @@ def _verifyResourceProperties(self, job, resource):
140142
self.assertEqual(job.null_marker, config["nullMarker"])
141143
else:
142144
self.assertIsNone(job.null_marker)
145+
if "nullMarkers" in config:
146+
self.assertEqual(job.null_markers, config["nullMarkers"])
147+
else:
148+
self.assertIsNone(job.null_markers)
143149
if "quote" in config:
144150
self.assertEqual(job.quote_character, config["quote"])
145151
else:
@@ -211,6 +217,7 @@ def test_ctor(self):
211217
self.assertIsNone(job.ignore_unknown_values)
212218
self.assertIsNone(job.max_bad_records)
213219
self.assertIsNone(job.null_marker)
220+
self.assertIsNone(job.null_markers)
214221
self.assertIsNone(job.quote_character)
215222
self.assertIsNone(job.skip_leading_rows)
216223
self.assertIsNone(job.source_format)

tests/unit/job/test_load_config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,22 @@ def test_null_marker_setter(self):
469469
config.null_marker = null_marker
470470
self.assertEqual(config._properties["load"]["nullMarker"], null_marker)
471471

472+
def test_null_markers_missing(self):
473+
config = self._get_target_class()()
474+
self.assertIsNone(config.null_markers)
475+
476+
def test_null_markers_hit(self):
477+
null_markers = ["", "NA"]
478+
config = self._get_target_class()()
479+
config._properties["load"]["nullMarkers"] = null_markers
480+
self.assertEqual(config.null_markers, null_markers)
481+
482+
def test_null_markers_setter(self):
483+
null_markers = ["", "NA"]
484+
config = self._get_target_class()()
485+
config.null_markers = null_markers
486+
self.assertEqual(config._properties["load"]["nullMarkers"], null_markers)
487+
472488
def test_preserve_ascii_control_characters_missing(self):
473489
config = self._get_target_class()()
474490
self.assertIsNone(config.preserve_ascii_control_characters)

tests/unit/test_external_config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ def test_from_api_repr_csv(self):
277277
"allowJaggedRows": False,
278278
"encoding": "encoding",
279279
"preserveAsciiControlCharacters": False,
280+
"nullMarkers": ["", "NA"],
280281
},
281282
},
282283
)
@@ -293,6 +294,7 @@ def test_from_api_repr_csv(self):
293294
self.assertEqual(ec.options.allow_jagged_rows, False)
294295
self.assertEqual(ec.options.encoding, "encoding")
295296
self.assertEqual(ec.options.preserve_ascii_control_characters, False)
297+
self.assertEqual(ec.options.null_markers, ["", "NA"])
296298

297299
got_resource = ec.to_api_repr()
298300

@@ -314,6 +316,7 @@ def test_to_api_repr_csv(self):
314316
options.skip_leading_rows = 123
315317
options.allow_jagged_rows = False
316318
options.preserve_ascii_control_characters = False
319+
options.null_markers = ["", "NA"]
317320
ec.csv_options = options
318321

319322
exp_resource = {
@@ -326,6 +329,7 @@ def test_to_api_repr_csv(self):
326329
"allowJaggedRows": False,
327330
"encoding": "encoding",
328331
"preserveAsciiControlCharacters": False,
332+
"nullMarkers": ["", "NA"],
329333
},
330334
}
331335

0 commit comments

Comments
 (0)