diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py index b2fedb1746d4..ddab941f9278 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py @@ -46,7 +46,6 @@ import fastavro import numpy as np -import regex import apache_beam from apache_beam import coders @@ -70,6 +69,7 @@ # Protect against environments where bigquery library is not available. try: + import regex from apitools.base.py.exceptions import HttpError from apitools.base.py.exceptions import HttpForbiddenError from apitools.base.py.transfer import Upload diff --git a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py index 1f1e315fea09..10058351938e 100644 --- a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py +++ b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py @@ -32,14 +32,17 @@ from typing import Tuple from typing import Union -import pydot - import apache_beam as beam from apache_beam.portability.api import beam_runner_api_pb2 from apache_beam.runners.interactive import interactive_environment as ie from apache_beam.runners.interactive import pipeline_instrument as inst from apache_beam.runners.interactive.display import pipeline_graph_renderer +try: + import pydot +except ImportError: + pass + # pylint does not understand context # pylint:disable=dangerous-default-value diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index 2d8f32051973..832651a477dd 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -25,12 +25,15 @@ from typing import Any from typing import Optional -import jsonschema - import apache_beam as beam from apache_beam.portability.api import schema_pb2 from apache_beam.typehints import schemas +try: + import jsonschema +except ImportError: + pass + JSON_ATOMIC_TYPES_TO_BEAM = { 'boolean': schema_pb2.BOOLEAN, 'integer': schema_pb2.INT64, diff --git a/sdks/python/apache_beam/yaml/main_test.py b/sdks/python/apache_beam/yaml/main_test.py index d233e0e2d73c..43b8caa1853b 100644 --- a/sdks/python/apache_beam/yaml/main_test.py +++ b/sdks/python/apache_beam/yaml/main_test.py @@ -24,6 +24,11 @@ from apache_beam.yaml import main +try: + import jsonschema +except ImportError: + jsonschema = None + TEST_PIPELINE = ''' pipeline: type: chain @@ -79,6 +84,7 @@ ''' +@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed") class MainTest(unittest.TestCase): def test_pipeline_spec_from_file(self): with tempfile.TemporaryDirectory() as tmpdir: diff --git a/sdks/python/apache_beam/yaml/yaml_io_test.py b/sdks/python/apache_beam/yaml/yaml_io_test.py index a19dfd694a85..1e13038512cd 100644 --- a/sdks/python/apache_beam/yaml/yaml_io_test.py +++ b/sdks/python/apache_beam/yaml/yaml_io_test.py @@ -32,6 +32,11 @@ from apache_beam.typehints import schemas as schema_utils from apache_beam.yaml.yaml_transform import YamlTransform +try: + import jsonschema +except ImportError: + jsonschema = None + class FakeReadFromPubSub: def __init__( @@ -82,6 +87,7 @@ def __call__(self, topic, *, with_attributes, id_label, timestamp_attribute): return AssertThat(equal_to(self._messages)) +@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed") class YamlPubSubTest(unittest.TestCase): def test_simple_read(self): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( diff --git a/sdks/python/apache_beam/yaml/yaml_mapping_test.py b/sdks/python/apache_beam/yaml/yaml_mapping_test.py index cc2fe4639abc..169c86d7b87b 100644 --- a/sdks/python/apache_beam/yaml/yaml_mapping_test.py +++ b/sdks/python/apache_beam/yaml/yaml_mapping_test.py @@ -30,6 +30,11 @@ from apache_beam.yaml import yaml_mapping from apache_beam.yaml.yaml_transform import YamlTransform +try: + import jsonschema +except ImportError: + jsonschema = None + DATA = [ beam.Row(label='11a', conductor=11, rank=0), beam.Row(label='37a', conductor=37, rank=1), @@ -37,6 +42,7 @@ ] +@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed") class YamlMappingTest(unittest.TestCase): def test_basic(self): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py b/sdks/python/apache_beam/yaml/yaml_transform_test.py index 2ba49a1fab82..d5950fb9efaf 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform_test.py +++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py @@ -29,6 +29,11 @@ from apache_beam.yaml import yaml_provider from apache_beam.yaml.yaml_transform import YamlTransform +try: + import jsonschema +except ImportError: + jsonschema = None + class CreateTimestamped(beam.PTransform): _yaml_requires_inputs = False @@ -83,6 +88,7 @@ def raise_on_big(row): } +@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed") class YamlTransformE2ETest(unittest.TestCase): def test_composite(self): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( diff --git a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py index 14bd758ebae5..59b1619b6512 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py +++ b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py @@ -55,6 +55,7 @@ def new_pipeline(): pickle_library='cloudpickle')) +@unittest.skipIf(jsonschema is None, "Yaml dependencies not installed") class MainTest(unittest.TestCase): def assertYaml(self, expected, result): result = SafeLineLoader.strip_metadata(result) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 9ed2a124e94d..534324b83c18 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -379,7 +379,6 @@ def get_portability_package_data(): install_requires=[ 'crcmod>=1.7,<2.0', 'cryptography>=39.0.0,<48.0.0', - 'orjson>=3.9.7,<4', 'fastavro>=0.23.6,<2', 'fasteners>=0.3,<1.0', # TODO(https://github.com/grpc/grpc/issues/37710): Unpin grpc @@ -387,7 +386,6 @@ def get_portability_package_data(): 'grpcio>=1.67.0; python_version >= "3.13"', 'hdfs>=2.1.0,<3.0.0', 'httplib2>=0.8,<0.23.0', - 'jsonschema>=4.0.0,<5.0.0', 'jsonpickle>=3.0.0,<4.0.0', # numpy can have breaking changes in minor versions. # Use a strict upper bound. @@ -407,11 +405,9 @@ def get_portability_package_data(): # 3. Exclude protobuf 4 versions that leak memory, see: # https://github.com/apache/beam/issues/28246 'protobuf>=3.20.3,<7.0.0.dev0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*', # pylint: disable=line-too-long - 'pydot>=1.2.0,<2', 'python-dateutil>=2.8.0,<3', 'pytz>=2018.3', 'redis>=5.0.0,<6', - 'regex>=2020.6.8', 'requests>=2.32.4,<3.0.0', 'sortedcontainers>=2.4.0', 'typing-extensions>=3.7.0', @@ -509,7 +505,9 @@ def get_portability_package_data(): # --extra-index-url or --index-url in requirements.txt in # Dataflow, which allows installing python packages from private # Python repositories in GAR. - 'keyrings.google-artifactregistry-auth' + 'keyrings.google-artifactregistry-auth', + 'orjson>=3.9.7,<4', + 'regex>=2020.6.8', ], 'interactive': [ 'facets-overview>=1.1.0,<2', @@ -520,6 +518,7 @@ def get_portability_package_data(): # Skip version 6.1.13 due to # https://github.com/jupyter/jupyter_client/issues/637 'jupyter-client>=6.1.11,!=6.1.13,<8.2.1', + 'pydot>=1.2.0,<2', 'timeloop>=1.0.2,<2', 'nbformat>=5.0.5,<6', 'nbconvert>=6.2.0,<8', @@ -577,6 +576,7 @@ def get_portability_package_data(): 'virtualenv-clone>=0.5,<1.0', # https://github.com/PiotrDabkowski/Js2Py/issues/317 'js2py>=0.74,<1; python_version<"3.12"', + 'jsonschema>=4.0.0,<5.0.0', ] + dataframe_dependency, # Keep the following dependencies in line with what we test against # in https://github.com/apache/beam/blob/master/sdks/python/tox.ini diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 8ea95ad8fc8b..921833d9f4b5 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -33,7 +33,7 @@ pip_pre = True # allow apps that support color to use it. passenv=TERM,CLOUDSDK_CONFIG,DOCKER_*,TESTCONTAINERS_*,TC_*,ALLOYDB_PASSWORD # Set [] options for pip installation of apache-beam tarball. -extras = test,dataframe +extras = test,dataframe,yaml # Don't warn that these commands aren't installed. allowlist_externals = false