diff --git a/CHANGES.md b/CHANGES.md index 2e75be8a79c0..ee13aa620306 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -76,7 +76,15 @@ ## Breaking Changes -* X behavior was changed ([#X](https://github.com/apache/beam/issues/X)). +* X behavior was changed ([#X](https://github.com/apache/beam/issues/X)).======= +* Yapf version upgraded to 0.43.0 for formatting (Python) ([#34801](https://github.com/apache/beam/pull/34801/)). +* Extra packages moved out of `install_required` to individual extra sections as part of [#35297](https://github.com/apache/beam/pull/35297) + - `jsonschema` has beeen moved to existing `yaml` extra + - `hdfs` has been moved to `hdfs` extra + - `pydots` has been moved to existing `interactie` extra + - `pymongot` has been moeved to `mongodb` extra + - `redis` has been moved to `redis` extra + ## Deprecations diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 7867c4895c4e..074f388016e5 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -3087,7 +3087,7 @@ class BeamModulePlugin implements Plugin { dependsOn ':sdks:python:sdist' doLast { def distTarBall = "${pythonRootDir}/build/apache-beam.tar.gz" - def packages = "gcp,test,aws,azure,dataframe" + def packages = "gcp,test,aws,azure,dataframe,yaml" def extra = project.findProperty('beamPythonExtra') if (extra) { packages += ",${extra}" diff --git a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py index bf4c4c0380e5..3dc866907a40 100644 --- a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py +++ b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py @@ -31,6 +31,10 @@ from typing import Tuple from typing import Union +from IPython.core.magic import Magics +from IPython.core.magic import line_cell_magic +from IPython.core.magic import magics_class + import apache_beam as beam from apache_beam.pvalue import PValue from apache_beam.runners.interactive import interactive_environment as ie @@ -54,9 +58,6 @@ from apache_beam.testing.test_stream_service import TestStreamServiceController from apache_beam.transforms.sql import SqlTransform from apache_beam.typehints.native_type_compatibility import match_is_named_tuple -from IPython.core.magic import Magics -from IPython.core.magic import line_cell_magic -from IPython.core.magic import magics_class _LOGGER = logging.getLogger(__name__) diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index 6b17faec713b..aac4f1101036 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -25,8 +25,6 @@ from typing import Any from typing import Optional -import jsonschema - import apache_beam as beam from apache_beam.portability.api import schema_pb2 from apache_beam.typehints import schemas @@ -210,6 +208,7 @@ def json_parser( if json_schema is None: validate_fn = None else: + import jsonschema cls = jsonschema.validators.validator_for(json_schema) cls.check_schema(json_schema) validate_fn = _PicklableFromConstructor( @@ -315,6 +314,7 @@ def row_validator(beam_schema: schema_pb2.Schema, if not json_schema: return lambda x: None + import jsonschema # Validate that this compiles, but avoid pickling the validator itself. _ = jsonschema.validators.validator_for(json_schema)(json_schema) _validate_compatible(beam_schema_to_json_schema(beam_schema), json_schema) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 630bb7188ed5..74fa5cbcc05d 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -369,16 +369,13 @@ def get_portability_package_data(): # TODO(https://github.com/grpc/grpc/issues/37710): Unpin grpc 'grpcio>=1.33.1,<2,!=1.48.0,!=1.59.*,!=1.60.*,!=1.61.*,!=1.62.0,!=1.62.1,<1.66.0; python_version <= "3.12"', # pylint: disable=line-too-long 'grpcio>=1.67.0; python_version >= "3.13"', - 'hdfs>=2.1.0,<3.0.0', 'httplib2>=0.8,<0.23.0', - 'jsonschema>=4.0.0,<5.0.0', 'jsonpickle>=3.0.0,<4.0.0', # numpy can have breaking changes in minor versions. # Use a strict upper bound. 'numpy>=1.14.3,<2.3.0', # Update pyproject.toml as well. 'objsize>=0.6.1,<0.8.0', 'packaging>=22.0', - 'pymongo>=3.8.0,<5.0.0', 'proto-plus>=1.7.1,<2', # 1. Use a tighter upper bound in protobuf dependency to make sure # the minor version at job submission @@ -391,10 +388,8 @@ def get_portability_package_data(): # 3. Exclude protobuf 4 versions that leak memory, see: # https://github.com/apache/beam/issues/28246 'protobuf>=3.20.3,<6.0.0.dev0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*', # pylint: disable=line-too-long - 'pydot>=1.2.0,<2', 'python-dateutil>=2.8.0,<3', 'pytz>=2018.3', - 'redis>=5.0.0,<6', 'regex>=2020.6.8', 'requests>=2.24.0,<3.0.0', 'sortedcontainers>=2.4.0', @@ -489,6 +484,7 @@ def get_portability_package_data(): # Skip version 6.1.13 due to # https://github.com/jupyter/jupyter_client/issues/637 'jupyter-client>=6.1.11,!=6.1.13,<8.2.1', + 'pydot>=1.2.0,<2', 'timeloop>=1.0.2,<2', 'nbformat>=5.0.5,<6', 'nbconvert>=6.2.0,<8', @@ -560,12 +556,22 @@ def get_portability_package_data(): # `--update` / `-U` flag to replace the dask release brought in # by distributed. ], + 'hdfs': [ + 'hdfs>=2.1.0,<3.0.0', + ], + 'mongodb': [ + 'pymongo>=3.8.0,<5.0.0' + ], + 'redis': [ + 'redis>=5.0.0,<6' + ], 'yaml': [ 'docstring-parser>=0.15,<1.0', 'jinja2>=3.0,<3.2', 'virtualenv-clone>=0.5,<1.0', # https://github.com/PiotrDabkowski/Js2Py/issues/317 'js2py>=0.74,<1; python_version<"3.12"', + 'jsonschema>=4.0.0,<5.0.0', ] + dataframe_dependency, # Keep the following dependencies in line with what we test against # in https://github.com/apache/beam/blob/master/sdks/python/tox.ini diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 5131769509d9..8f56cb0e7f47 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -33,7 +33,7 @@ pip_pre = True # allow apps that support color to use it. passenv=TERM,CLOUDSDK_CONFIG # Set [] options for pip installation of apache-beam tarball. -extras = test,dataframe +extras = test,dataframe,mongodb,redis,hdfs,interactive,yaml # Don't warn that these commands aren't installed. allowlist_externals = false @@ -68,6 +68,7 @@ commands_post = commands = false {envname} is misconfigured [testenv:py{39,310,311,312,313}] +extras = test,interactive,yaml,hdfs,mongodb commands_pre = python --version pip --version @@ -85,11 +86,13 @@ commands_pre = pip --version # pip check bash {toxinidir}/scripts/run_tox_cleanup.sh +extras = test,interactive,yaml,hdfs,mongodb commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{39,310,311,312,313}-win] +extras = test,gcp,interactive,dataframe,aws,azure,mongodb,redis,hdfs commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" @@ -98,7 +101,7 @@ list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze [testenv:py{39,310,311,312,313}-cloud] ; extras = test,gcp,interactive,dataframe,aws,azure -extras = test,gcp,interactive,dataframe,aws,azure +extras = test,gcp,interactive,dataframe,aws,azure,mongodb,redis,hdfs commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" @@ -109,7 +112,7 @@ deps = pip==25.0.1 accelerate>=1.6.0 setenv = -extras = test,gcp,dataframe,ml_test +extras = test,gcp,dataframe,ml_test,mongodb,redis,hdfs commands = # Log tensorflow version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" @@ -121,14 +124,15 @@ commands = deps = accelerate>=1.6.0 setenv = -extras = test,gcp,dataframe,p312_ml_test +extras = test,gcp,dataframe,p312_ml_test,mongodb,redis,hdfs commands = # Log tensorflow version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" + [testenv:py{39,310,311,31,313}-dask] -extras = test,dask,dataframes +extras = test,dask,dataframes,mongodb,redis,hdfs commands_pre = pip install 'distributed>=2024.4.2' 'dask>=2024.4.2' commands = @@ -153,7 +157,7 @@ setenv = platform = linux passenv = GIT_*,BUILD_*,ghprb*,CHANGE_ID,BRANCH_NAME,JENKINS_*,CODECOV_*,GITHUB_* # NOTE: we could add ml_test to increase the collected code coverage metrics, but it would make the suite slower. -extras = test,gcp,interactive,dataframe,aws +extras = test,gcp,interactive,dataframe,aws,mongodb,redis,hdfs commands = bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append" @@ -186,13 +190,16 @@ deps = # make extras available in case any of these libs are typed extras = gcp + mongodb + redis + hdfs commands = mypy --version python setup.py mypy [testenv:docs] -extras = test,gcp,docs,interactive,dataframe,dask +extras = test,gcp,docs,interactive,dataframe,dask,mongodb,redis,hdfs deps = Sphinx==7.4.7 sphinx_rtd_theme==3.0.1