From c95aaf27304d50e1a359e86d3bc7b67af700b775 Mon Sep 17 00:00:00 2001 From: Rakesh Kumar Date: Sun, 15 Jun 2025 14:03:57 -0700 Subject: [PATCH 1/6] BEAM-9626: Make Pymongo package optional - 20259 --- sdks/python/setup.py | 4 +++- sdks/python/tox.ini | 15 ++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 2b21d0463c98..7a083b476abd 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -360,7 +360,6 @@ def get_portability_package_data(): 'numpy>=1.14.3,<2.3.0', # Update pyproject.toml as well. 'objsize>=0.6.1,<0.8.0', 'packaging>=22.0', - 'pymongo>=3.8.0,<5.0.0', 'proto-plus>=1.7.1,<2', # 1. Use a tighter upper bound in protobuf dependency to make sure # the minor version at job submission @@ -535,6 +534,9 @@ def get_portability_package_data(): # `--update` / `-U` flag to replace the dask release brought in # by distributed. ], + 'mongodb': [ + 'pymongo>=3.8.0,<5.0.0' + ], 'yaml': [ 'docstring-parser>=0.15,<1.0', 'jinja2>=3.0,<3.2', diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index b87b5ecc6f67..84e2761e993d 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -33,7 +33,7 @@ pip_pre = True # allow apps that support color to use it. passenv=TERM,CLOUDSDK_CONFIG # Set [] options for pip installation of apache-beam tarball. -extras = test,dataframe +extras = test,dataframe,mongodb # Don't warn that these commands aren't installed. allowlist_externals = false @@ -98,7 +98,7 @@ list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze [testenv:py{39,310,311,312}-cloud] ; extras = test,gcp,interactive,dataframe,aws,azure -extras = test,gcp,interactive,dataframe,aws,azure +extras = test,gcp,interactive,dataframe,aws,azure,mongodb commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" @@ -108,7 +108,7 @@ commands = deps = accelerate>=1.6.0 setenv = -extras = test,gcp,dataframe,ml_test +extras = test,gcp,dataframe,ml_test, mongodb commands = # Log tensorflow version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" @@ -120,14 +120,14 @@ commands = deps = accelerate>=1.6.0 setenv = -extras = test,gcp,dataframe,p312_ml_test +extras = test,gcp,dataframe,p312_ml_test,mongodb commands = # Log tensorflow version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{39,310,311,312}-dask] -extras = test,dask,dataframes +extras = test,dask,dataframes,mongodb commands_pre = pip install 'distributed>=2024.4.2' 'dask>=2024.4.2' commands = @@ -152,7 +152,7 @@ setenv = platform = linux passenv = GIT_*,BUILD_*,ghprb*,CHANGE_ID,BRANCH_NAME,JENKINS_*,CODECOV_*,GITHUB_* # NOTE: we could add ml_test to increase the collected code coverage metrics, but it would make the suite slower. -extras = test,gcp,interactive,dataframe,aws +extras = test,gcp,interactive,dataframe,aws,mongodb commands = bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append" @@ -185,13 +185,14 @@ deps = # make extras available in case any of these libs are typed extras = gcp + mongodb commands = mypy --version python setup.py mypy [testenv:docs] -extras = test,gcp,docs,interactive,dataframe,dask +extras = test,gcp,docs,interactive,dataframe,dask,mongodb deps = Sphinx==7.4.7 sphinx_rtd_theme==3.0.1 From e31b3ae386c55304a9e5f21ba46cf75765e09acc Mon Sep 17 00:00:00 2001 From: Rakesh Kumar Date: Mon, 16 Jun 2025 23:31:52 -0700 Subject: [PATCH 2/6] Added few more extra sections --- CHANGES.md | 5 ++++- sdks/python/setup.py | 12 ++++++++---- sdks/python/tox.ini | 16 +++++++++------- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2e75be8a79c0..fcce721f1eb6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -76,7 +76,10 @@ ## Breaking Changes -* X behavior was changed ([#X](https://github.com/apache/beam/issues/X)). +* X behavior was changed ([#X](https://github.com/apache/beam/issues/X)).======= +* Yapf version upgraded to 0.43.0 for formatting (Python) ([#34801](https://github.com/apache/beam/pull/34801/)). +* Extra packages moved out of `install_required` to individual extra sections (`hdfs`, `mongodb`, `redis`) as part of [#35297](https://github.com/apache/beam/pull/35297) + ## Deprecations diff --git a/sdks/python/setup.py b/sdks/python/setup.py index aceec31ec9f0..74fa5cbcc05d 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -369,9 +369,7 @@ def get_portability_package_data(): # TODO(https://github.com/grpc/grpc/issues/37710): Unpin grpc 'grpcio>=1.33.1,<2,!=1.48.0,!=1.59.*,!=1.60.*,!=1.61.*,!=1.62.0,!=1.62.1,<1.66.0; python_version <= "3.12"', # pylint: disable=line-too-long 'grpcio>=1.67.0; python_version >= "3.13"', - 'hdfs>=2.1.0,<3.0.0', 'httplib2>=0.8,<0.23.0', - 'jsonschema>=4.0.0,<5.0.0', 'jsonpickle>=3.0.0,<4.0.0', # numpy can have breaking changes in minor versions. # Use a strict upper bound. @@ -390,10 +388,8 @@ def get_portability_package_data(): # 3. Exclude protobuf 4 versions that leak memory, see: # https://github.com/apache/beam/issues/28246 'protobuf>=3.20.3,<6.0.0.dev0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*', # pylint: disable=line-too-long - 'pydot>=1.2.0,<2', 'python-dateutil>=2.8.0,<3', 'pytz>=2018.3', - 'redis>=5.0.0,<6', 'regex>=2020.6.8', 'requests>=2.24.0,<3.0.0', 'sortedcontainers>=2.4.0', @@ -488,6 +484,7 @@ def get_portability_package_data(): # Skip version 6.1.13 due to # https://github.com/jupyter/jupyter_client/issues/637 'jupyter-client>=6.1.11,!=6.1.13,<8.2.1', + 'pydot>=1.2.0,<2', 'timeloop>=1.0.2,<2', 'nbformat>=5.0.5,<6', 'nbconvert>=6.2.0,<8', @@ -559,15 +556,22 @@ def get_portability_package_data(): # `--update` / `-U` flag to replace the dask release brought in # by distributed. ], + 'hdfs': [ + 'hdfs>=2.1.0,<3.0.0', + ], 'mongodb': [ 'pymongo>=3.8.0,<5.0.0' ], + 'redis': [ + 'redis>=5.0.0,<6' + ], 'yaml': [ 'docstring-parser>=0.15,<1.0', 'jinja2>=3.0,<3.2', 'virtualenv-clone>=0.5,<1.0', # https://github.com/PiotrDabkowski/Js2Py/issues/317 'js2py>=0.74,<1; python_version<"3.12"', + 'jsonschema>=4.0.0,<5.0.0', ] + dataframe_dependency, # Keep the following dependencies in line with what we test against # in https://github.com/apache/beam/blob/master/sdks/python/tox.ini diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index d219b1ae69eb..b9197fb32f3e 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -33,7 +33,7 @@ pip_pre = True # allow apps that support color to use it. passenv=TERM,CLOUDSDK_CONFIG # Set [] options for pip installation of apache-beam tarball. -extras = test,dataframe,mongodb +extras = test,dataframe,mongodb,redis,hdfs # Don't warn that these commands aren't installed. allowlist_externals = false @@ -98,7 +98,7 @@ list_dependencies_command = {envbindir}/python.exe {envbindir}/pip.exe freeze [testenv:py{39,310,311,312,313}-cloud] ; extras = test,gcp,interactive,dataframe,aws,azure -extras = test,gcp,interactive,dataframe,aws,azure,mongodb +extras = test,gcp,interactive,dataframe,aws,azure,mongodb,redis,hdfs commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" @@ -109,7 +109,7 @@ deps = pip==25.0.1 accelerate>=1.6.0 setenv = -extras = test,gcp,dataframe,ml_test, mongodb +extras = test,gcp,dataframe,ml_test,mongodb,redis,hdfs commands = # Log tensorflow version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" @@ -121,7 +121,7 @@ commands = deps = accelerate>=1.6.0 setenv = -extras = test,gcp,dataframe,p312_ml_test,mongodb +extras = test,gcp,dataframe,p312_ml_test,mongodb,redis,hdfs commands = # Log tensorflow version for debugging /bin/sh -c "pip freeze | grep -E tensorflow" @@ -129,7 +129,7 @@ commands = [testenv:py{39,310,311,31,313}-dask] -extras = test,dask,dataframes,mongodb +extras = test,dask,dataframes,mongodb,redis,hdfs commands_pre = pip install 'distributed>=2024.4.2' 'dask>=2024.4.2' commands = @@ -154,7 +154,7 @@ setenv = platform = linux passenv = GIT_*,BUILD_*,ghprb*,CHANGE_ID,BRANCH_NAME,JENKINS_*,CODECOV_*,GITHUB_* # NOTE: we could add ml_test to increase the collected code coverage metrics, but it would make the suite slower. -extras = test,gcp,interactive,dataframe,aws,mongodb +extras = test,gcp,interactive,dataframe,aws,mongodb,redis,hdfs commands = bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append" @@ -188,13 +188,15 @@ deps = extras = gcp mongodb + redis + hdfs commands = mypy --version python setup.py mypy [testenv:docs] -extras = test,gcp,docs,interactive,dataframe,dask,mongodb +extras = test,gcp,docs,interactive,dataframe,dask,mongodb,redis,hdfs deps = Sphinx==7.4.7 sphinx_rtd_theme==3.0.1 From 26e012687aa555d1b4d92844129c4a538641794d Mon Sep 17 00:00:00 2001 From: Rakesh Kumar Date: Tue, 17 Jun 2025 22:52:31 -0700 Subject: [PATCH 3/6] fix tox environment --- sdks/python/apache_beam/yaml/json_utils.py | 3 +-- sdks/python/tox.ini | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index 6b17faec713b..939c3b297185 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -25,8 +25,6 @@ from typing import Any from typing import Optional -import jsonschema - import apache_beam as beam from apache_beam.portability.api import schema_pb2 from apache_beam.typehints import schemas @@ -210,6 +208,7 @@ def json_parser( if json_schema is None: validate_fn = None else: + import jsonschema cls = jsonschema.validators.validator_for(json_schema) cls.check_schema(json_schema) validate_fn = _PicklableFromConstructor( diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index b9197fb32f3e..77075c40bd40 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -68,6 +68,7 @@ commands_post = commands = false {envname} is misconfigured [testenv:py{39,310,311,312,313}] +extras = interactive,yaml commands_pre = python --version pip --version @@ -85,6 +86,7 @@ commands_pre = pip --version # pip check bash {toxinidir}/scripts/run_tox_cleanup.sh +extras = interactive,yaml commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" From 2d13c45e88f78f9340367324ae9ca1f75f26d34f Mon Sep 17 00:00:00 2001 From: Rakesh Kumar Date: Tue, 17 Jun 2025 23:12:18 -0700 Subject: [PATCH 4/6] fixed the changes file and tox environments --- CHANGES.md | 7 ++++++- sdks/python/tox.ini | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index fcce721f1eb6..ee13aa620306 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -78,7 +78,12 @@ * X behavior was changed ([#X](https://github.com/apache/beam/issues/X)).======= * Yapf version upgraded to 0.43.0 for formatting (Python) ([#34801](https://github.com/apache/beam/pull/34801/)). -* Extra packages moved out of `install_required` to individual extra sections (`hdfs`, `mongodb`, `redis`) as part of [#35297](https://github.com/apache/beam/pull/35297) +* Extra packages moved out of `install_required` to individual extra sections as part of [#35297](https://github.com/apache/beam/pull/35297) + - `jsonschema` has beeen moved to existing `yaml` extra + - `hdfs` has been moved to `hdfs` extra + - `pydots` has been moved to existing `interactie` extra + - `pymongot` has been moeved to `mongodb` extra + - `redis` has been moved to `redis` extra ## Deprecations diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 77075c40bd40..e03dcc1d43e5 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -86,7 +86,7 @@ commands_pre = pip --version # pip check bash {toxinidir}/scripts/run_tox_cleanup.sh -extras = interactive,yaml +extras = test,interactive,yaml commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" From 9dd92620ac4c2e9a954c5a0560a2870b2afd2931 Mon Sep 17 00:00:00 2001 From: Rakesh Kumar Date: Wed, 18 Jun 2025 22:38:21 -0700 Subject: [PATCH 5/6] fix tox file --- sdks/python/apache_beam/yaml/json_utils.py | 1 + sdks/python/tox.ini | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index 939c3b297185..aac4f1101036 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -314,6 +314,7 @@ def row_validator(beam_schema: schema_pb2.Schema, if not json_schema: return lambda x: None + import jsonschema # Validate that this compiles, but avoid pickling the validator itself. _ = jsonschema.validators.validator_for(json_schema)(json_schema) _validate_compatible(beam_schema_to_json_schema(beam_schema), json_schema) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index e03dcc1d43e5..e3541ac97f60 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -33,7 +33,7 @@ pip_pre = True # allow apps that support color to use it. passenv=TERM,CLOUDSDK_CONFIG # Set [] options for pip installation of apache-beam tarball. -extras = test,dataframe,mongodb,redis,hdfs +extras = test,dataframe,mongodb,redis,hdfs,interactive,yaml # Don't warn that these commands aren't installed. allowlist_externals = false @@ -68,7 +68,7 @@ commands_post = commands = false {envname} is misconfigured [testenv:py{39,310,311,312,313}] -extras = interactive,yaml +extras = test,interactive,yaml commands_pre = python --version pip --version @@ -86,12 +86,13 @@ commands_pre = pip --version # pip check bash {toxinidir}/scripts/run_tox_cleanup.sh -extras = test,interactive,yaml +extras = test,interactive,yaml,hdfs,mongodb commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{39,310,311,312,313}-win] +extras = test,gcp,interactive,dataframe,aws,azure,mongodb,redis,hdfs commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" From ccc51b0acee6b3f04be8a0a3262bec0d9d591e28 Mon Sep 17 00:00:00 2001 From: Rakesh Kumar Date: Thu, 19 Jun 2025 00:18:18 -0700 Subject: [PATCH 6/6] fix some dependency and lint warning --- .../groovy/org/apache/beam/gradle/BeamModulePlugin.groovy | 2 +- .../apache_beam/runners/interactive/sql/beam_sql_magics.py | 7 ++++--- sdks/python/tox.ini | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index 7867c4895c4e..074f388016e5 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -3087,7 +3087,7 @@ class BeamModulePlugin implements Plugin { dependsOn ':sdks:python:sdist' doLast { def distTarBall = "${pythonRootDir}/build/apache-beam.tar.gz" - def packages = "gcp,test,aws,azure,dataframe" + def packages = "gcp,test,aws,azure,dataframe,yaml" def extra = project.findProperty('beamPythonExtra') if (extra) { packages += ",${extra}" diff --git a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py index bf4c4c0380e5..3dc866907a40 100644 --- a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py +++ b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py @@ -31,6 +31,10 @@ from typing import Tuple from typing import Union +from IPython.core.magic import Magics +from IPython.core.magic import line_cell_magic +from IPython.core.magic import magics_class + import apache_beam as beam from apache_beam.pvalue import PValue from apache_beam.runners.interactive import interactive_environment as ie @@ -54,9 +58,6 @@ from apache_beam.testing.test_stream_service import TestStreamServiceController from apache_beam.transforms.sql import SqlTransform from apache_beam.typehints.native_type_compatibility import match_is_named_tuple -from IPython.core.magic import Magics -from IPython.core.magic import line_cell_magic -from IPython.core.magic import magics_class _LOGGER = logging.getLogger(__name__) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index e3541ac97f60..8f56cb0e7f47 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -68,7 +68,7 @@ commands_post = commands = false {envname} is misconfigured [testenv:py{39,310,311,312,313}] -extras = test,interactive,yaml +extras = test,interactive,yaml,hdfs,mongodb commands_pre = python --version pip --version