Automated rollback of commit efdaeb7

tfx-copybara · tfx-copybara · commit 8a28387f9c03 · 2021-06-14T15:08:46.000-07:00
PiperOrigin-RevId: 379358557
diff --git a/RELEASE.md b/RELEASE.md
@@ -6,8 +6,6 @@
 
 ## Bug Fixes and Other Changes
 
-*   Optimized certain stats generators that needs to materialize the input
-    RecordBatches.
 *   Depends on `protobuf>=3.13,<4`.
 
 ## Known Issues
diff --git a/tensorflow_data_validation/types.py b/tensorflow_data_validation/types.py
@@ -136,40 +136,3 @@ def __len__(self) -> int:
 
   def __bool__(self) -> bool:
     return bool(self._steps)
-
-
-# Do not use multiple threads to encode record batches, as parallelism
-# should be managed by beam.
-_ARROW_CODER_IPC_OPTIONS = pa.ipc.IpcWriteOptions(use_threads=False)
-
-
-# TODO(b/190756453): Make this into the upstream
-# (preference: Arrow, Beam, tfx_bsl).
-class _ArrowRecordBatchCoder(beam.coders.Coder):
-  """Custom coder for Arrow record batches."""
-
-  def encode(self, value: pa.RecordBatch) -> bytes:
-    sink = pa.BufferOutputStream()
-    writer = pa.ipc.new_stream(
-        sink, value.schema, options=_ARROW_CODER_IPC_OPTIONS)
-    writer.write_batch(value)
-    writer.close()
-    return sink.getvalue().to_pybytes()
-
-  def decode(self, encoded: bytes) -> pa.RecordBatch:
-    reader = pa.ipc.open_stream(encoded)
-    result = reader.read_next_batch()
-    try:
-      reader.read_next_batch()
-    except StopIteration:
-      pass
-    else:
-      raise ValueError("Expected only one RecordBatch in the stream.")
-    return result
-
-  def to_type_hint(self):
-    return pa.RecordBatch
-
-
-beam.coders.typecoders.registry.register_coder(pa.RecordBatch,
-                                               _ArrowRecordBatchCoder)
diff --git a/tensorflow_data_validation/types_test.py b/tensorflow_data_validation/types_test.py