google
diff --git a/‎tfrutil/accessor.py‎
Lines changed: 8 additions & 9 deletions b/‎tfrutil/accessor.py‎
Lines changed: 8 additions & 9 deletions
diff --git a/‎tfrutil/accessor_test.py‎
Lines changed: 8 additions & 5 deletions b/‎tfrutil/accessor_test.py‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎tfrutil/beam_image.py‎
Lines changed: 8 additions & 8 deletions b/‎tfrutil/beam_image.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎tfrutil/beam_image_test.py‎
Lines changed: 11 additions & 11 deletions b/‎tfrutil/beam_image_test.py‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎tfrutil/beam_pipeline.py‎
Lines changed: 21 additions & 21 deletions b/‎tfrutil/beam_pipeline.py‎
Lines changed: 21 additions & 21 deletions
diff --git a/‎tfrutil/beam_pipeline_test.py‎
Lines changed: 16 additions & 16 deletions b/‎tfrutil/beam_pipeline_test.py‎
Lines changed: 16 additions & 16 deletions
@@ -26,7 +26,7 @@
 from tfrutil import client
 
 
-@pd.api.extensions.register_dataframe_accessor("tensorflow")
+@pd.api.extensions.register_dataframe_accessor('tensorflow')
 class TFRUtilAccessor:
   """DataFrame Accessor class for TFRUtil."""
 
@@ -37,13 +37,13 @@ def __init__(self, pandas_obj):
   def to_tfr(
       self,
       output_dir: str,
-      runner: str = "DirectRunner",
+      runner: str = 'DirectRunner',
       project: Optional[str] = None,
       region: Optional[str] = None,
       tfrutil_path: Optional[str] = None,
       dataflow_options: Union[Dict[str, Any], None] = None,
-      job_label: str = "to-tfr",
-      compression: Optional[str] = "gzip",
+      job_label: str = 'to-tfr',
+      compression: Optional[str] = 'gzip',
       num_shards: int = 0):
     """TFRUtil Pandas Accessor.
 
@@ -54,9 +54,9 @@ def to_tfr(
       import tfrutil
 
       df.tfrutil.to_tfr(
-          output_dir="gcs://foo/bar/train",
-          runner="DirectRunner",
-          compression="gzip",
+          output_dir='gcs://foo/bar/train',
+          runner='DirectRunner',
+          compression='gzip',
           num_shards=10)
 
     Args:
@@ -67,10 +67,9 @@ def to_tfr(
       tfrutil_path: Path to tfrutil source (Required if DataFlowRunner).
       dataflow_options: Optional dictionary containing DataFlow options.
       job_label: User supplied description for the beam job name.
-      compression: Can be "gzip" or None for no compression.
+      compression: Can be 'gzip' or None for no compression.
       num_shards: Number of shards to divide the TFRecords into. Default is
           0 = no sharding.
-
     """
     client.create_tfrecords(
         self._df,
 
@@ -16,11 +16,12 @@
 
 """Tests for pandas accessor."""
 
+import os
 import unittest
 
 # pylint: disable=unused-import
-from tfrutil import accessor
-from tfrutil import constants
+
+import tfrutil
 from tfrutil import test_utils
 
 
@@ -29,12 +30,14 @@ class DataFrameAccessor(unittest.TestCase):
 
   def setUp(self):
     self.test_df = test_utils.get_test_df()
+    self.output_dir = '/tmp/train'
+    os.makedirs(self.output_dir, exist_ok=True)
 
   def test_accessor(self):
     """Tests pandas accessor."""
 
-    self.assertIsNone(self.test_df.tensorflow.to_tfr(runner="DirectRunner",
-                                                     output_dir="/tmp/train"))
+    self.assertIsNone(self.test_df.tensorflow.to_tfr(
+        runner='DirectRunner', output_dir=self.output_dir))
 
-if __name__ == "__main__":
+if __name__ == '__main__':
   unittest.main()
@@ -61,10 +61,10 @@ def load(image_uri):
   """Loads an image."""
 
   try:
-    with tf.io.gfile.GFile(image_uri, "rb") as f:
+    with tf.io.gfile.GFile(image_uri, 'rb') as f:
       return Image.open(f)
   except tf.python.framework.errors_impl.NotFoundError:
-    raise OSError("File {} was not found.".format(image_uri))
+    raise OSError('File {} was not found.'.format(image_uri))
 
 
 # pylint: disable=abstract-method
@@ -93,14 +93,14 @@ def process(
     try:
       image_uri = element[self.image_key]
       image = load(image_uri)
-      d["image"] = encode(image)
-      d["image_width"], d["image_height"] = image.size
-      d["image_channels"] = mode_to_channel(image.mode)
+      d['image'] = encode(image)
+      d['image_width'], d['image_height'] = image.size
+      d['image_channels'] = mode_to_channel(image.mode)
 
-    #pylint: disable=broad-except
+    # pylint: disable=broad-except
     except Exception as e:
-      logging.warning("Could not load image: %s", image_uri)
-      logging.error("Exception was: %s", str(e))
+      logging.warning('Could not load image: %s', image_uri)
+      logging.error('Exception was: %s', str(e))
 
     element.update(d)
     yield element
@@ -37,17 +37,17 @@ class BeamImageTests(unittest.TestCase):
   def setUp(self):
     self.pipeline = test_utils.get_test_pipeline()
     self.df = test_utils.get_test_df()
-    self.image_file = "tfrutil/test_data/images/cat/cat-640x853-1.jpg"
+    self.image_file = 'tfrutil/test_data/images/cat/cat-640x853-1.jpg'
 
   def test_load(self):
     """Tests the image loading function."""
     img = beam_image.load(self.image_file)
     self.assertIsInstance(img, PIL.JpegImagePlugin.JpegImageFile)
 
   def test_file_not_found_load(self):
-    """Test loading an image that doesn"t exist."""
+    """Test loading an image that doesn't exist."""
     with self.assertRaises(OSError):
-      _ = beam_image.load("tfrutil/test_data/images/cat/food.jpg")
+      _ = beam_image.load('tfrutil/test_data/images/cat/food.jpg')
 
   def test_mode_to_channel(self):
     """Tests `mode_to_channel`."""
@@ -91,11 +91,11 @@ def test_extract_image_dofn(self):
 
       data = (
           p
-          | "ReadFromDataFrame" >> beam.Create(self.df.values.tolist())
-          | "FlattenDataFrame" >> beam.Map(
-              lambda x: ",".join([str(item) for item in x]))
-          | "DecodeCSV" >> beam.Map(converter.decode)
-          | "ExtractImage" >> beam.ParDo(extract_images_fn)
+          | 'ReadFromDataFrame' >> beam.Create(self.df.values.tolist())
+          | 'FlattenDataFrame' >> beam.Map(
+              lambda x: ','.join([str(item) for item in x]))
+          | 'DecodeCSV' >> beam.Map(converter.decode)
+          | 'ExtractImage' >> beam.ParDo(extract_images_fn)
       )
 
       def key_matcher(expected_keys):
@@ -108,10 +108,10 @@ def _equal(actual):
             actual_keys = set(element.keys())
             if actual_keys != expected_keys_:
               raise util.BeamAssertException(
-                  "PCollection key match failed. Actual ({}) vs. expected ({})"
+                  'PCollection key match failed. Actual ({}) vs. expected ({})'
                   .format(actual_keys, expected_keys_))
         return _equal
 
-      expected_keys = ["image_uri", "label", "split", "image",
-                       "image_height", "image_width", "image_channels"]
+      expected_keys = ['image_uri', 'label', 'split', 'image',
+                       'image_height', 'image_width', 'image_channels']
       util.assert_that(data, key_matcher(expected_keys))
@@ -45,10 +45,10 @@ def _get_job_name(job_label: str = None) -> str:
       insure uniqueness.
   """
 
-  job_name = "tfrutil-" + common.get_timestamp()
+  job_name = 'tfrutil-' + common.get_timestamp()
   if job_label:
-    job_label = job_label.replace("_", "-")
-    job_name += "-" + job_label
+    job_label = job_label.replace('_', '-')
+    job_name += '-' + job_label
 
   return job_name
 
@@ -71,13 +71,13 @@ def _get_pipeline_options(
   """Returns Beam pipeline options."""
 
   options_dict = {
-      "runner": runner,
-      "staging_location": os.path.join(job_dir, "staging"),
-      "temp_location": os.path.join(job_dir, "tmp"),
-      "job_name": job_name,
-      "teardown_policy": "TEARDOWN_ALWAYS",
-      "save_main_session": True,
-      "pipeline_type_check": False,
+      'runner': runner,
+      'staging_location': os.path.join(job_dir, 'staging'),
+      'temp_location': os.path.join(job_dir, 'tmp'),
+      'job_name': job_name,
+      'teardown_policy': 'TEARDOWN_ALWAYS',
+      'save_main_session': True,
+      'pipeline_type_check': False,
   }
 
   if project:
@@ -97,7 +97,7 @@ def _partition_fn(
     unused_num_partitions: int = -1) -> int:
   """Returns index used to partition an element from a PCollection."""
   del unused_num_partitions
-  dataset_type = element[constants.SPLIT_KEY].decode("utf-8")
+  dataset_type = element[constants.SPLIT_KEY].decode('utf-8')
   try:
     index = constants.SPLIT_VALUES.index(dataset_type)
   except ValueError as e:
@@ -198,7 +198,7 @@ def build_pipeline(
 
   #with beam.Pipeline(runner, options=options) as p:
   p = beam.Pipeline(options=options)
-  with tft_beam.Context(temp_dir=os.path.join(job_dir, "tft_tmp")):
+  with tft_beam.Context(temp_dir=os.path.join(job_dir, 'tft_tmp')):
 
     converter = tft.coders.CsvCoder(constants.IMAGE_CSV_COLUMNS,
                                     constants.IMAGE_CSV_METADATA.schema)
@@ -210,11 +210,11 @@ def build_pipeline(
     # extract_images_fn.
     image_csv_data = (
         p
-        | "ReadFromDataFrame" >> beam.Create(df.values.tolist())
-        | "ToCSVRows" >> beam.Map(
-            lambda x: ",".join([str(item) for item in x]))
-        | "DecodeCSV" >> beam.Map(converter.decode)
-        | "ReadImage" >> beam.ParDo(extract_images_fn)
+        | 'ReadFromDataFrame' >> beam.Create(df.values.tolist())
+        | 'ToCSVRows' >> beam.Map(
+            lambda x: ','.join([str(item) for item in x]))
+        | 'DecodeCSV' >> beam.Map(converter.decode)
+        | 'ReadImage' >> beam.ParDo(extract_images_fn)
     )
 
     # Split dataset into train and validation.
@@ -259,22 +259,22 @@ def build_pipeline(
     _ = (
         transformed_train_data
         | 'EncodeTrainData' >> beam.Map(transformed_data_coder.encode)
-        | 'WriteTrainData' >> tfr_writer(prefix="train"))
+        | 'WriteTrainData' >> tfr_writer(prefix='train'))
 
     _ = (
         transformed_val_data
         | 'EncodeValData' >> beam.Map(transformed_data_coder.encode)
-        | 'WriteValData' >> tfr_writer(prefix="val"))
+        | 'WriteValData' >> tfr_writer(prefix='val'))
 
     _ = (
         transformed_test_data
         | 'EncodeTestData' >> beam.Map(transformed_data_coder.encode)
-        | 'WriteTestData' >> tfr_writer(prefix="test"))
+        | 'WriteTestData' >> tfr_writer(prefix='test'))
 
     _ = (
         discard_data
         | 'DiscardDataWriter' >> beam.io.WriteToText(
-            os.path.join(job_dir, "discarded-data")))
+            os.path.join(job_dir, 'discarded-data')))
 
     # Output transform function and metadata
     _ = (transform_fn | 'WriteTransformFn' >> tft_beam.WriteTransformFn(
 
@@ -25,29 +25,29 @@
 from tfrutil import beam_pipeline
 
 
-#pylint: disable=protected-access
+# pylint: disable=protected-access
 
 class BeamPipelineTests(unittest.TestCase):
   """Tests for beam_image.py"""
 
   def test_processing_fn_with_int_label(self):
-    "Test preprocessing fn with integer label."
+    'Test preprocessing fn with integer label.'
     element = {
-        "split": "TRAIN",
-        "image_uri": "gs://foo/bar.jpg",
-        "label": 1}
+        'split': 'TRAIN',
+        'image_uri': 'gs://foo/bar.jpg',
+        'label': 1}
     result = beam_pipeline._preprocessing_fn(element, integer_label=True)
     self.assertEqual(element, result)
 
-  @mock.patch("tfrutil.beam_pipeline.tft")
+  @mock.patch('tfrutil.beam_pipeline.tft')
   def test_processing_fn_with_string_label(self, mock_transform):
-    "Test preprocessing fn with string label."
+    'Test preprocessing fn with string label.'
     mock_transform.compute_and_apply_vocabulary.return_value = tf.constant(
         0, dtype=tf.int64)
     element = {
-        "split": "TRAIN",
-        "image_uri": "gs://foo/bar.jpg",
-        "label": tf.constant("cat", dtype=tf.string)}
+        'split': 'TRAIN',
+        'image_uri': 'gs://foo/bar.jpg',
+        'label': tf.constant('cat', dtype=tf.string)}
     result = beam_pipeline._preprocessing_fn(element, integer_label=False)
     result['label'] = result['label'].numpy()
     self.assertEqual(0, result['label'])
@@ -65,14 +65,14 @@ def test_partition_fn(self):
     """Test the partition function."""
 
     test_data = {
-        "split": "update_me",
-        "image_uri": "gs://foo/bar0.jpg",
-        "label": 1}
+        'split': 'update_me',
+        'image_uri': 'gs://foo/bar0.jpg',
+        'label': 1}
 
-    for i, part in enumerate(["TRAIN", "VALIDATION", "TEST", "FOO"]):
-      test_data['split'] = part.encode("utf-8")
+    for i, part in enumerate(['TRAIN', 'VALIDATION', 'TEST', 'FOO']):
+      test_data['split'] = part.encode('utf-8')
       index = beam_pipeline._partition_fn(test_data)
 
       self.assertEqual(
           index, i,
-          "{} should be index {} but was index {}".format(part, i, index))
+          '{} should be index {} but was index {}'.format(part, i, index))