diff --git a/.github/workflows/devcontainer-docker-image.yml b/.github/workflows/devcontainer-docker-image.yml index 4262499a8..5ed355959 100644 --- a/.github/workflows/devcontainer-docker-image.yml +++ b/.github/workflows/devcontainer-docker-image.yml @@ -23,10 +23,10 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Setup Docker buildx - uses: docker/setup-buildx-action@v3.4.0 + uses: docker/setup-buildx-action@v3.7.1 - name: Prepare metadata id: meta diff --git a/.github/workflows/dispatched_pre-commit.yml b/.github/workflows/dispatched_pre-commit.yml deleted file mode 100644 index aa15698ec..000000000 --- a/.github/workflows/dispatched_pre-commit.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: run pre-commit - -on: - repository_dispatch: - types: [pre-commit-run-command] -jobs: - runPreCommit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 - with: - repository: ${{github.event.client_payload.pull_request.head.repo.full_name}} - ref: ${{github.event.client_payload.pull_request.head.ref}} - token: ${{ secrets.ACTION_TRIGGER_TOKEN }} - - name: Cache multiple paths - uses: actions/cache@v4 - with: - path: | - ~/.cache/pre-commit - ~/.cache/pip - key: pre-commit-dispatched-${{ runner.os }}-build - - uses: actions/setup-python@v5 - with: - python-version: 3.8 - - name: install-pre-commit - run: python -m pip install --upgrade pre-commit - - name: Slash Command Dispatch - run: pre-commit run --all-files || (exit 0) - - run: | - git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" - git config --local user.name "github-actions[bot]" - git commit -m "Run pre-commit" -a - git push diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 49861f6fa..bbbbd27ae 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Login to Docker Hub uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 864faea59..afa32a443 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -5,53 +5,27 @@ on: push: branches: [main] +defaults: + run: + shell: bash -leo pipefail {0} jobs: mypy: runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 - - name: Cache conda - uses: actions/cache@v4 - env: - # Increase this value to reset cache if environment-test.yml has not changed - CACHE_NUMBER: 0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + - uses: mamba-org/setup-micromamba@v2 with: - path: ~/conda_pkgs_dir - key: ${{ runner.os }}-py39-conda-${{ env.CACHE_NUMBER }}-${{ - hashFiles('conda-envs/environment-test.yml') }} - - name: Cache multiple paths - uses: actions/cache@v4 - env: - # Increase this value to reset cache if requirements.txt has not changed - CACHE_NUMBER: 0 - with: - path: | - ~/.cache/pip - $RUNNER_TOOL_CACHE/Python/* - ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-build-${{ matrix.python-version }}-${{ env.CACHE_NUMBER }}-${{ - hashFiles('requirements.txt') }} - - uses: conda-incubator/setup-miniconda@v2 - with: - miniforge-variant: Mambaforge - miniforge-version: latest - mamba-version: "*" - activate-environment: pymc-test - channel-priority: strict environment-file: conda-envs/environment-test.yml - python-version: "3.10" # Run pre-commit on oldest supported Python version - use-mamba: true - use-only-tar-bz2: false # IMPORTANT: This may break caching of conda packages! See https://github.com/conda-incubator/setup-miniconda/issues/267 + create-args: >- + python=3.10 + environment-name: pymc-test + init-shell: bash + cache-environment: true - name: Install-pymc and mypy dependencies run: | - conda activate pymc-test pip install -e . pip install --pre -U polyagamma python --version - name: Run mypy run: | - conda activate pymc-test python ./scripts/run_mypy.py --verbose diff --git a/.github/workflows/pr-auto-label.yml b/.github/workflows/pr-auto-label.yml new file mode 100644 index 000000000..2dcb2dd3d --- /dev/null +++ b/.github/workflows/pr-auto-label.yml @@ -0,0 +1,19 @@ +name: "Pull Request Labeler" +on: +- pull_request_target + +jobs: + sync: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + - name: Sync labels with closing issues + uses: wd60622/closing-labels@v0.0.3 + with: + exclude: "help wanted,needs info,beginner friendly" + env: + GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5ca602d2f..ff00df31b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,55 +1,49 @@ name: release-pipeline on: + push: + branches: + - main release: types: - - created + - published jobs: - release-job: + build-package: runs-on: ubuntu-latest - env: - PYPI_TOKEN: ${{ secrets.PYPI_TOKEN_PYMC }} + permissions: + # write attestations and id-token are necessary for attest-build-provenance-github + attestations: write + id-token: write steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.7 - # TODO: ideally, this pipeline should run parallelized tests in upstream jobs.. - #- name: Install test tooling - # run: | - # pip install pytest pytest-cov nose nose-parameterized - # pip install -r requirements.txt - #- name: Run tests - # run: | - # pytest --cov=./pymc --cov-report term-missing pymc/ - - name: Install release tooling - run: | - pip install build twine - - name: Build package - run: | - python -m build - - name: Check version number match - run: | - echo "GITHUB_REF: ${GITHUB_REF}" - # The GITHUB_REF should be something like "refs/tags/v1.2.3" - # Make sure the package version is the same as the tag - grep -Rq "^Version: ${GITHUB_REF:11}$" pymc.egg-info/PKG-INFO - - name: Publish to PyPI - run: | - twine check dist/* - twine upload --repository pypi --username __token__ --password ${PYPI_TOKEN} dist/* - test-install-job: - needs: release-job + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + persist-credentials: false + - uses: hynek/build-and-inspect-python-package@14c7e53f5d033cfa99f7af916fa59a6f7f356394 # v2.11.0 + with: + # Prove that the packages were built in the context of this workflow. + attest-build-provenance-github: true + + publish-package: + # Don't publish from forks + if: github.repository_owner == 'pymc-devs' && github.event_name == 'release' && github.event.action == 'published' + # Use the `release` GitHub environment to protect the Trusted Publishing (OIDC) + # workflow by requiring signoff from a maintainer. + environment: release + needs: build-package runs-on: ubuntu-latest + permissions: + # write id-token is necessary for trusted publishing (OIDC) + id-token: write steps: - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.7 - - name: Give PyPI a chance to update the index - run: sleep 240 - - name: Install from PyPI - run: | - pip install pymc==${GITHUB_REF:11} + - name: Download Distribution Artifacts + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + # The build-and-inspect-python-package action invokes upload-artifact. + # These are the correct arguments from that action. + name: Packages + path: dist + - name: Publish Package to PyPI + uses: pypa/gh-action-pypi-publish@67339c736fd9354cd4f8cb0b744f2b82a74b5c70 # v1.12.3 + # Implicitly attests that the packages were uploaded in the context of this workflow. diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f02afdc62..684ccb68a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,6 +7,13 @@ on: - main + +# Cancel running workflows for updated PRs +# https://turso.tech/blog/simple-trick-to-save-environment-and-money-when-using-github-actions +concurrency: + group: ${{ github.workflow}}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + # Tests are split into multiple jobs to accelerate the CI. # Different jobs should be organized to take approximately the same # time to complete (and not be prohibitely slow). @@ -24,7 +31,7 @@ jobs: outputs: changes: ${{ steps.changes.outputs.src }} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 with: fetch-depth: 0 - uses: dorny/paths-filter@v3 @@ -116,6 +123,7 @@ jobs: tests/logprob/test_censoring.py tests/logprob/test_composite_logprob.py tests/logprob/test_cumsum.py + tests/logprob/test_linalg.py tests/logprob/test_mixture.py tests/logprob/test_order.py tests/logprob/test_rewriting.py @@ -132,52 +140,26 @@ jobs: PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native' defaults: run: - shell: bash -l {0} + shell: bash -leo pipefail {0} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 - - name: Cache conda - uses: actions/cache@v4 - env: - # Increase this value to reset cache if environment-test.yml has not changed - CACHE_NUMBER: 0 - with: - path: ~/conda_pkgs_dir - key: ${{ runner.os }}-py${{matrix.python-version}}-conda-${{ env.CACHE_NUMBER }}-${{ - hashFiles('conda-envs/environment-test.yml') }} - - name: Cache multiple paths - uses: actions/cache@v4 - env: - # Increase this value to reset cache if requirements.txt has not changed - CACHE_NUMBER: 0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + - uses: mamba-org/setup-micromamba@v2 with: - path: | - ~/.cache/pip - $RUNNER_TOOL_CACHE/Python/* - ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-build-${{ matrix.python-version }}-${{ env.CACHE_NUMBER }}-${{ - hashFiles('requirements.txt') }} - - uses: conda-incubator/setup-miniconda@v2 - with: - miniforge-variant: Mambaforge - miniforge-version: latest - mamba-version: "*" - activate-environment: pymc-test - channel-priority: strict environment-file: conda-envs/environment-test.yml - python-version: ${{matrix.python-version}} - use-mamba: true - use-only-tar-bz2: false # IMPORTANT: This may break caching of conda packages! See https://github.com/conda-incubator/setup-miniconda/issues/267 + create-args: >- + python=${{matrix.python-version}} + environment-name: pymc-test + init-shell: bash + cache-environment: true - name: Install-pymc run: | - conda activate pymc-test pip install -e . # TODO: https://github.com/pymc-devs/pymc/issues/7417 pip install --pre -U 'polyagamma<1.3.7' python --version - conda list + micromamba list - name: Run tests run: | - conda activate pymc-test python -m pytest -vv --cov=pymc --cov-report=xml --no-cov-on-fail --cov-report term --durations=50 $TEST_SUBSET - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -208,53 +190,27 @@ jobs: PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2' defaults: run: - shell: cmd + shell: cmd /C call {0} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 - - name: Cache conda - uses: actions/cache@v4 - env: - # Increase this value to reset cache if conda-envs/windows-environment-test.yml has not changed - CACHE_NUMBER: 0 - with: - path: ~/conda_pkgs_dir - key: ${{ runner.os }}-py${{matrix.python-version}}-conda-${{ env.CACHE_NUMBER }}-${{ - hashFiles('conda-envs/windows-environment-test.yml') }} - - name: Cache multiple paths - uses: actions/cache@v4 - env: - # Increase this value to reset cache if requirements.txt has not changed - CACHE_NUMBER: 0 - with: - path: | - ~/.cache/pip - $RUNNER_TOOL_CACHE/Python/* - ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-build-${{ matrix.python-version }}-${{ env.CACHE_NUMBER }}-${{ - hashFiles('requirements.txt') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + - uses: mamba-org/setup-micromamba@v2 with: - miniforge-variant: Mambaforge - miniforge-version: latest - mamba-version: "*" - activate-environment: pymc-test - channel-priority: strict environment-file: conda-envs/windows-environment-test.yml - python-version: ${{matrix.python-version}} - use-mamba: true - use-only-tar-bz2: false # IMPORTANT: This may break caching of conda packages! See https://github.com/conda-incubator/setup-miniconda/issues/267 + create-args: >- + python=${{matrix.python-version}} + environment-name: pymc-test + init-shell: cmd.exe + cache-environment: true - name: Install-pymc run: | - conda activate pymc-test pip install -e . pip install --pre -U 'polyagamma<1.3.7' python --version - conda list + micromamba list - name: Run tests # This job uses a cmd shell, therefore the environment variable syntax is different! # The ">-" in the next line replaces newlines with spaces (see https://stackoverflow.com/a/66809682). run: >- - conda activate pymc-test && python -m pytest -vv --cov=pymc --cov-report=xml --no-cov-on-fail --cov-report term --durations=50 %TEST_SUBSET% - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -292,47 +248,22 @@ jobs: PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native' defaults: run: - shell: bash -l {0} + shell: bash -leo pipefail {0} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 - - name: Cache conda - uses: actions/cache@v4 - env: - # Increase this value to reset cache if environment-test.yml has not changed - CACHE_NUMBER: 0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + - uses: mamba-org/setup-micromamba@v2 with: - path: ~/conda_pkgs_dir - key: ${{ runner.os }}-py${{matrix.python-version}}-conda-${{ env.CACHE_NUMBER }}-${{ - hashFiles('conda-envs/environment-test.yml') }} - - name: Cache multiple paths - uses: actions/cache@v4 - env: - # Increase this value to reset cache if requirements.txt has not changed - CACHE_NUMBER: 0 - with: - path: | - ~/.cache/pip - $RUNNER_TOOL_CACHE/Python/* - ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-build-${{ matrix.python-version }}-${{ env.CACHE_NUMBER }}-${{ - hashFiles('requirements.txt') }} - - uses: conda-incubator/setup-miniconda@v2 - with: - miniforge-variant: Mambaforge - miniforge-version: latest - mamba-version: "*" - activate-environment: pymc-test - channel-priority: strict environment-file: conda-envs/environment-test.yml - python-version: ${{matrix.python-version}} - use-mamba: true - use-only-tar-bz2: false # IMPORTANT: This may break caching of conda packages! See https://github.com/conda-incubator/setup-miniconda/issues/267 + create-args: >- + python=${{matrix.python-version}} + environment-name: pymc-test + init-shell: bash + cache-environment: true - name: Install pymc run: | - conda activate pymc-test pip install -e . python --version - conda list + micromamba list - name: Run tests run: | python -m pytest -vv --cov=pymc --cov-report=xml --no-cov-on-fail --cov-report term --durations=50 $TEST_SUBSET @@ -361,47 +292,22 @@ jobs: PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native' defaults: run: - shell: bash -l {0} + shell: bash -leo pipefail {0} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 - - name: Cache conda - uses: actions/cache@v4 - env: - # Increase this value to reset cache if environment-jax.yml has not changed - CACHE_NUMBER: 0 - with: - path: ~/conda_pkgs_dir - key: ${{ runner.os }}-py${{matrix.python-version}}-conda-${{ env.CACHE_NUMBER }}-${{ - hashFiles('conda-envs/environment-jax.yml') }} - - name: Cache multiple paths - uses: actions/cache@v4 - env: - # Increase this value to reset cache if requirements.txt has not changed - CACHE_NUMBER: 0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + - uses: mamba-org/setup-micromamba@v2 with: - path: | - ~/.cache/pip - $RUNNER_TOOL_CACHE/Python/* - ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-build-${{ matrix.python-version }}-${{ env.CACHE_NUMBER }}-${{ - hashFiles('requirements.txt') }} - - uses: conda-incubator/setup-miniconda@v2 - with: - miniforge-variant: Mambaforge - miniforge-version: latest - mamba-version: "*" - activate-environment: pymc-test - channel-priority: strict environment-file: conda-envs/environment-jax.yml - python-version: ${{matrix.python-version}} - use-mamba: true - use-only-tar-bz2: false # IMPORTANT: This may break caching of conda packages! See https://github.com/conda-incubator/setup-miniconda/issues/267 + create-args: >- + python=${{matrix.python-version}} + environment-name: pymc-test + init-shell: bash + cache-environment: true - name: Install pymc run: | - conda activate pymc-test pip install -e . python --version - conda list + micromamba list - name: Run tests run: | python -m pytest -vv --cov=pymc --cov-report=xml --no-cov-on-fail --cov-report term --durations=50 $TEST_SUBSET @@ -430,53 +336,27 @@ jobs: PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2' defaults: run: - shell: cmd + shell: cmd /C call {0} steps: - - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 - - name: Cache conda - uses: actions/cache@v4 - env: - # Increase this value to reset cache if conda-envs/windows-environment-test.yml has not changed - CACHE_NUMBER: 0 - with: - path: ~/conda_pkgs_dir - key: ${{ runner.os }}-py${{matrix.python-version}}-conda-${{ env.CACHE_NUMBER }}-${{ - hashFiles('conda-envs/windows-environment-test.yml') }} - - name: Cache multiple paths - uses: actions/cache@v4 - env: - # Increase this value to reset cache if requirements.txt has not changed - CACHE_NUMBER: 0 - with: - path: | - ~/.cache/pip - $RUNNER_TOOL_CACHE/Python/* - ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-build-${{ matrix.python-version }}-${{ env.CACHE_NUMBER }}-${{ - hashFiles('requirements.txt') }} - - uses: conda-incubator/setup-miniconda@v2 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + - uses: mamba-org/setup-micromamba@v2 with: - miniforge-variant: Mambaforge - miniforge-version: latest - mamba-version: "*" - activate-environment: pymc-test - channel-priority: strict environment-file: conda-envs/windows-environment-test.yml - python-version: ${{matrix.python-version}} - use-mamba: true - use-only-tar-bz2: false # IMPORTANT: This may break caching of conda packages! See https://github.com/conda-incubator/setup-miniconda/issues/267 + create-args: >- + python=${{matrix.python-version}} + environment-name: pymc-test + init-shell: cmd.exe + cache-environment: true - name: Install-pymc run: | - conda activate pymc-test pip install -e . pip install --pre -U 'polyagamma<1.3.7' python --version - conda list + micromamba list - name: Run tests # This job uses a cmd shell, therefore the environment variable syntax is different! # The ">-" in the next line replaces newlines with spaces (see https://stackoverflow.com/a/66809682). run: >- - conda activate pymc-test && python -m pytest -vv --cov=pymc --cov-report=xml --no-cov-on-fail --cov-report term --durations=50 %TEST_SUBSET% - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e58de58d..f523706d6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,7 +48,7 @@ repos: - --exclude=binder/ - --exclude=versioneer.py - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.9 + rev: v0.8.4 hooks: - id: ruff args: [--fix, --show-fixes] diff --git a/GOVERNANCE.md b/GOVERNANCE.md index e7ea84b86..41478fe1b 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -161,46 +161,7 @@ nomination at the earliest. The nomination process is explained below in more detail in a section of its own. ### Current Core Contributors -Contributors who are also part of a dedicated team or are institutional -contributors will have so indicated after their name. - -Dedicated teams only cover a small part of the work needed to -get the project going, tasks like fundraising, outreach or marketing -for example don't (yet) have a dedicated team. -Contributors don't need to be part of any dedicated team. - -* Adrian Seyboldt (dev - PyMC Labs) -* Alex Andorra (dev - PyMC Labs) -* Austin Rochford -* Ben Mares -* Benjamin Vincent (docs - PyMC Labs) -* Bill Engels (dev) -* Chris Fonnesbeck (dev, docs) -* Christian Luhmann (community) -* Colin Carroll (dev) -* Eelke Spaak (dev) -* Eric Ma (dev - PyMC Labs) -* Fernando Irarrazaval (community) -* George Ho (dev) -* Junpeng Lao (dev, community) -* Larry Dong (dev) -* Luciano Paz (dev - PyMC Labs) -* Martina Cantaro (docs) -* Maxim Kochurov (dev - PyMC Labs) -* Meenal Jhajharia (docs, community) -* Michael Osthege (dev) -* Oriol Abril-Pla (docs, community) -* Osvaldo Martin (dev, docs) -* Purna Chandra Mansingh (community) -* Ravin Kumar (dev, community, docs) -* Reshama Shaikh (community - PyMC Labs) -* Ricardo Vieira (dev, community) -* Robert P. Goldman (dev) -* Rob Zinkov (dev, community) -* Sandra Meneses (community) -* Sayam Kumar (dev, docs) -* Thomas Wiecki (dev, community - PyMC Labs) -* Virgile Andreani (dev) +The list of current core contributors is available at https://github.com/orgs/pymc-devs/teams/core-contributors ## Steering Council @@ -244,11 +205,10 @@ In particular, the Council may: The current Steering Council membership comprises: -- Chris Fonnesbeck (dev, docs) -- Junpeng Lao (dev, community) -- Oriol Abril-Pla (docs, community) -- Ravin Kumar (dev, community, docs) -- Thomas Wiecki (dev, community - PyMC Labs) +- Ricardo Vieira (dev, community - PyMC Labs) +- Colin Carroll (dev) +- Osvaldo Martin (dev, docs) +- Jesse Grabowski (community, dev - PyMC Labs) Note that as explained in the [community architecture section](#community-and-team-architecture) and as indicated again in the description of the Steering Council above, @@ -327,38 +287,6 @@ subcommittee. This is a different situation from a BDFL delegate for a specific decision, or a recusal situation, in which the BDFL gives up their authority to someone else in full. -### NumFOCUS Subcommittee - -The Council will maintain one narrowly focused subcommittee to manage its -interactions with NumFOCUS. - -- The NumFOCUS Subcommittee is comprised of 5 persons who manage project - funding that comes through NumFOCUS. It is expected that these funds will - be spent in a manner that is consistent with the non-profit mission of - NumFOCUS and the direction of the Project as determined by the full - Council. -- This Subcommittee shall NOT make decisions about the direction, scope, - technical or financial direction of the Project. - -#### NumFOCUS subcommittee membership -This Subcommittee will have 5 members. With at least -2 members being on the Steering Council. No more -than 2 Subcommitee Members can report to one person or company through -employment or contracting work (including the reportee, i.e. -the reportee + 1 is the max). -This avoids effective majorities resting on one person. - -Any Core Contributor is eligible for the NumFOCUS subcommittee. - -#### Current NumFOCUS Subcommitee -The current NumFOCUS Subcommittee consists of: - -- Peadar Coyle -- Chris Fonnesbeck -- John Salvatier -- Jon Sedar -- Thomas Wiecki - ## BDFL The Project will have a BDFL (Benevolent Dictator for Life), who is currently @@ -554,9 +482,9 @@ The log of past election processes is kept on [Discourse](https://discourse.pymc the membership constraints were not met, candidates are ranked by interpreting yes=+1, abstain=0 and no=-1. * If too many candidates were confirmed, - the {max_range} number of candidates with higher rank are elected. + the max number of candidates (7) with higher rank are elected. * If not enough candidates were chosen, - the {min_range} number of candidates with higher rank are elected. + the min number of candidates (4) with higher rank are elected. * If membership constraints were not met, candidates are selected progressively by rank if they meet the membership requirements. If for example out of 7 candidates for the NumFOCUS subcommittee @@ -677,12 +605,18 @@ Role: [Community Team](https://discourse.pymc.io/g/Community_Team) group. #### Accounts and services ownership and administration -The PyMC Project also has accounts and hosts services on several platforms -such as GitHub, Discourse, Twitter, ReadTheDocs, and Medium. +The PyMC Project also has accounts and hosts services on several platforms. +Some examples of such platforms include (but are not limited to) +GitHub, Discourse, PyPI, Discord, Twitter, ReadTheDocs, or Medium. +Any service under the PyMC project should follow these rules, +even if not explicitly listed above as an example. If possible, all Council Members and relevant Core Contributors should have -administrative rights on those platforms. -If this is not possible, administrative rights should be distributed among +administrative rights on those platforms. [SPEC 6](https://scientific-python.org/specs/spec-0006/) +from the scientific python project has some recommendations on shared +infrastructure and credentials. + +If none of the above were possible, administrative rights should be distributed among Council Members and relevant Core Contributors and establish a rotation of the administrative rights every 1-2 years. diff --git a/conda-envs/environment-dev.yml b/conda-envs/environment-dev.yml index 85e6694a9..71b6c78ed 100644 --- a/conda-envs/environment-dev.yml +++ b/conda-envs/environment-dev.yml @@ -10,10 +10,10 @@ dependencies: - cachetools>=4.2.1 - cloudpickle - h5py>=2.7 -- numpy>=1.15.0 +- numpy>=1.25.0 - pandas>=0.24.0 - pip -- pytensor>=2.25.1,<2.26 +- pytensor>=2.26.2,<2.27 - python-graphviz - networkx - scipy>=1.4.1 diff --git a/conda-envs/environment-docs.yml b/conda-envs/environment-docs.yml index 86097c5ab..f795fca07 100644 --- a/conda-envs/environment-docs.yml +++ b/conda-envs/environment-docs.yml @@ -8,10 +8,10 @@ dependencies: - arviz>=0.13.0 - cachetools>=4.2.1 - cloudpickle -- numpy>=1.15.0 +- numpy>=1.25.0 - pandas>=0.24.0 - pip -- pytensor>=2.25.1,<2.26 +- pytensor>=2.26.2,<2.27 - python-graphviz - rich>=13.7.1 - scipy>=1.4.1 diff --git a/conda-envs/environment-jax.yml b/conda-envs/environment-jax.yml index 97d25dd5b..48649a617 100644 --- a/conda-envs/environment-jax.yml +++ b/conda-envs/environment-jax.yml @@ -16,11 +16,11 @@ dependencies: - jaxlib>=0.4.28 - libblas=*=*mkl - mkl-service -- numpy>=1.15.0 +- numpy>=1.25.0 - numpyro>=0.8.0 - pandas>=0.24.0 - pip -- pytensor>=2.25.1,<2.26 +- pytensor>=2.26.2,<2.27 - python-graphviz - networkx - rich>=13.7.1 diff --git a/conda-envs/environment-test.yml b/conda-envs/environment-test.yml index 58cde0d32..e6fe9857e 100644 --- a/conda-envs/environment-test.yml +++ b/conda-envs/environment-test.yml @@ -11,12 +11,10 @@ dependencies: - cloudpickle - h5py>=2.7 - jax -- libblas=*=*mkl -- mkl-service -- numpy>=1.15.0 +- numpy>=1.25.0 - pandas>=0.24.0 - pip -- pytensor>=2.25.1,<2.26 +- pytensor>=2.26.2,<2.27 - python-graphviz - networkx - rich>=13.7.1 diff --git a/conda-envs/windows-environment-dev.yml b/conda-envs/windows-environment-dev.yml index 6d785e2ca..ee5bd206f 100644 --- a/conda-envs/windows-environment-dev.yml +++ b/conda-envs/windows-environment-dev.yml @@ -10,10 +10,10 @@ dependencies: - cachetools>=4.2.1 - cloudpickle - h5py>=2.7 -- numpy>=1.15.0 +- numpy>=1.25.0 - pandas>=0.24.0 - pip -- pytensor>=2.25.1,<2.26 +- pytensor>=2.26.2,<2.27 - python-graphviz - networkx - rich>=13.7.1 diff --git a/conda-envs/windows-environment-test.yml b/conda-envs/windows-environment-test.yml index fd17c3171..fa5985283 100644 --- a/conda-envs/windows-environment-test.yml +++ b/conda-envs/windows-environment-test.yml @@ -13,10 +13,10 @@ dependencies: - libpython - mkl-service>=2.3.0 - m2w64-toolchain -- numpy>=1.15.0 +- numpy>=1.25.0 - pandas>=0.24.0 - pip -- pytensor>=2.25.1,<2.26 +- pytensor>=2.26.2,<2.27 - python-graphviz - networkx - rich>=13.7.1 diff --git a/docs/source/api/model.rst b/docs/source/api/model.rst index 4619ffbb2..1e674dc20 100644 --- a/docs/source/api/model.rst +++ b/docs/source/api/model.rst @@ -8,4 +8,5 @@ Model model/core model/conditioning + model/optimization model/fgraph diff --git a/docs/source/api/model/transform.rst b/docs/source/api/model/conditioning.rst similarity index 56% rename from docs/source/api/model/transform.rst rename to docs/source/api/model/conditioning.rst index 3e83176b1..c84dceb92 100644 --- a/docs/source/api/model/transform.rst +++ b/docs/source/api/model/conditioning.rst @@ -9,12 +9,3 @@ Model Conditioning observe change_value_transforms remove_value_transforms - - -Model Optimization ------------------- -.. currentmodule:: pymc.model.transform.optimization -.. autosummary:: - :toctree: generated/ - - freeze_dims_and_data diff --git a/docs/source/api/model/optimization.rst b/docs/source/api/model/optimization.rst new file mode 100644 index 000000000..eb208cd4d --- /dev/null +++ b/docs/source/api/model/optimization.rst @@ -0,0 +1,7 @@ +Model Optimization +------------------ +.. currentmodule:: pymc.model.transform.optimization +.. autosummary:: + :toctree: generated/ + + freeze_dims_and_data diff --git a/docs/source/api/pytensorf.rst b/docs/source/api/pytensorf.rst index 71e718d69..0c4609503 100644 --- a/docs/source/api/pytensorf.rst +++ b/docs/source/api/pytensorf.rst @@ -6,7 +6,7 @@ PyTensor utils .. autosummary:: :toctree: generated/ - compile_pymc + compile gradient hessian hessian_diag @@ -19,6 +19,4 @@ PyTensor utils CallableTensor join_nonshared_inputs make_shared_replacements - generator - convert_generator_data convert_data diff --git a/docs/source/guides/Probability_Distributions.rst b/docs/source/guides/Probability_Distributions.rst index 81530746c..ae5abdb92 100644 --- a/docs/source/guides/Probability_Distributions.rst +++ b/docs/source/guides/Probability_Distributions.rst @@ -29,21 +29,32 @@ A variable requires at least a ``name`` argument, and zero or more model paramet Probability distributions are all subclasses of ``Distribution``, which in turn has two major subclasses: ``Discrete`` and ``Continuous``. In terms of data types, a ``Continuous`` random variable is given whichever floating point type is defined by ``pytensor.config.floatX``, while ``Discrete`` variables are given ``int16`` types when ``pytensor.config.floatX`` is ``float32``, and ``int64`` otherwise. -All distributions in ``pm.distributions`` will have two important methods: ``random()`` and ``logp()`` with the following signatures: +All distributions in ``pm.distributions`` are associated with two key functions: + +1. ``logp(dist, value)`` - Calculates log-probability at given value +2. ``draw(dist, size=...)`` - Generates random samples + +For example, with a normal distribution: :: - class SomeDistribution(Continuous): + with pm.Model(): + x = pm.Normal('x', mu=0, sigma=1) + + # Calculate log-probability + log_prob = pm.logp(x, 0.5) + + # Generate samples + samples = pm.draw(x, size=100) - def random(self, point=None, size=None): - ... - return random_samples +Custom distributions using ``CustomDist`` should provide logp via the ``dist`` parameter: + +:: - def logp(self, value): - ... - return total_log_prob + def custom_logp(value, mu): + return -0.5 * (value - mu)**2 -PyMC expects the ``logp()`` method to return a log-probability evaluated at the passed ``value`` argument. This method is used internally by all of the inference methods to calculate the model log-probability that is used for fitting models. The ``random()`` method is used to simulate values from the variable, and is used internally for posterior predictive checks. + custom_dist = pm.CustomDist('custom', dist=custom_logp, mu=0) Custom distributions @@ -58,7 +69,7 @@ An exponential survival function, where :math:`c=0` denotes failure (or non-surv f(c, t) = \left\{ \begin{array}{l} \exp(-\lambda t), \text{if c=1} \\ \lambda \exp(-\lambda t), \text{if c=0} \end{array} \right. -Such a function can be implemented as a PyMC distribution by writing a function that specifies the log-probability, then passing that function as a keyword argument to the ``DensityDist`` function, which creates an instance of a PyMC distribution with the custom function as its log-probability. +Such a function can be implemented as a PyMC distribution by writing a function that specifies the log-probability, then passing that function as a keyword argument to the ``CustomDist`` function, which creates an instance of a PyMC distribution with the custom function as its log-probability. For the exponential survival function, this is: @@ -67,7 +78,7 @@ For the exponential survival function, this is: def logp(value, t, lam): return (value * log(lam) - lam * t).sum() - exp_surv = pm.DensityDist('exp_surv', t, lam, logp=logp, observed=failure) + exp_surv = pm.CustomDist('exp_surv', dist=logp, t=t, lam=lam, observed=failure) Similarly, if a random number generator is required, a function returning random numbers corresponding to the probability distribution can be passed as the ``random`` argument. @@ -98,10 +109,10 @@ This allows for probabilities to be calculated and random numbers to be drawn. :: - >>> y.logp(4).eval() + >>> pm.logp(y, 4).eval() array(-1.5843639373779297, dtype=float32) - >>> y.random(size=3) + >>> pm.draw(y, size=3) array([5, 4, 3]) diff --git a/pymc/backends/__init__.py b/pymc/backends/__init__.py index 986a34f4b..cd007cf3c 100644 --- a/pymc/backends/__init__.py +++ b/pymc/backends/__init__.py @@ -72,6 +72,7 @@ from pymc.backends.arviz import predictions_to_inference_data, to_inference_data from pymc.backends.base import BaseTrace, IBaseTrace from pymc.backends.ndarray import NDArray +from pymc.blocking import PointType from pymc.model import Model from pymc.step_methods.compound import BlockedStep, CompoundStep @@ -89,7 +90,7 @@ RunType = type(None) # type: ignore[assignment, misc] -__all__ = ["to_inference_data", "predictions_to_inference_data"] +__all__ = ["predictions_to_inference_data", "to_inference_data"] def _init_trace( @@ -100,11 +101,12 @@ def _init_trace( trace: BaseTrace | None, model: Model, trace_vars: list[TensorVariable] | None = None, + initial_point: PointType | None = None, ) -> BaseTrace: """Initialize a trace backend for a chain.""" strace: BaseTrace if trace is None: - strace = NDArray(model=model, vars=trace_vars) + strace = NDArray(model=model, vars=trace_vars, test_point=initial_point) elif isinstance(trace, BaseTrace): if len(trace) > 0: raise ValueError("Continuation of traces is no longer supported.") @@ -122,7 +124,7 @@ def init_traces( chains: int, expected_length: int, step: BlockedStep | CompoundStep, - initial_point: Mapping[str, np.ndarray], + initial_point: PointType, model: Model, trace_vars: list[TensorVariable] | None = None, ) -> tuple[RunType | None, Sequence[IBaseTrace]]: @@ -145,6 +147,7 @@ def init_traces( trace=backend, model=model, trace_vars=trace_vars, + initial_point=initial_point, ) for chain_number in range(chains) ] diff --git a/pymc/backends/base.py b/pymc/backends/base.py index c0239f8de..5a2a043a3 100644 --- a/pymc/backends/base.py +++ b/pymc/backends/base.py @@ -30,9 +30,11 @@ ) import numpy as np +import pytensor from pymc.backends.report import SamplerReport from pymc.model import modelcontext +from pymc.pytensorf import compile from pymc.util import get_var_name logger = logging.getLogger(__name__) @@ -147,32 +149,51 @@ class BaseTrace(IBaseTrace): use different test point that might be with changed variables shapes """ - def __init__(self, name, model=None, vars=None, test_point=None): - self.name = name - + def __init__( + self, + name=None, + model=None, + vars=None, + test_point=None, + *, + fn=None, + var_shapes=None, + var_dtypes=None, + ): model = modelcontext(model) - self.model = model + if vars is None: vars = model.unobserved_value_vars unnamed_vars = {var for var in vars if var.name is None} if unnamed_vars: raise Exception(f"Can't trace unnamed variables: {unnamed_vars}") - self.vars = vars - self.varnames = [var.name for var in vars] - self.fn = model.compile_fn(vars, inputs=model.value_vars, on_unused_input="ignore") + + if fn is None: + # borrow=True avoids deepcopy when inputs=output which is the case for untransformed value variables + fn = compile( + inputs=[pytensor.In(v, borrow=True) for v in model.value_vars], + outputs=[pytensor.Out(v, borrow=True) for v in vars], + on_unused_input="ignore", + ) + fn.trust_input = True # Get variable shapes. Most backends will need this # information. - if test_point is None: - test_point = model.initial_point() - else: - test_point_ = model.initial_point().copy() - test_point_.update(test_point) - test_point = test_point_ - var_values = list(zip(self.varnames, self.fn(test_point))) - self.var_shapes = {var: value.shape for var, value in var_values} - self.var_dtypes = {var: value.dtype for var, value in var_values} + if var_shapes is None or var_dtypes is None: + if test_point is None: + test_point = model.initial_point() + var_values = tuple(zip(vars, fn(**test_point))) + var_shapes = {var.name: value.shape for var, value in var_values} + var_dtypes = {var.name: value.dtype for var, value in var_values} + + self.name = name + self.model = model + self.fn = fn + self.vars = vars + self.varnames = [var.name for var in vars] + self.var_shapes = var_shapes + self.var_dtypes = var_dtypes self.chain = None self._is_base_setup = False self.sampler_vars = None diff --git a/pymc/backends/ndarray.py b/pymc/backends/ndarray.py index 98a11fdec..70ca60879 100644 --- a/pymc/backends/ndarray.py +++ b/pymc/backends/ndarray.py @@ -40,8 +40,8 @@ class NDArray(base.BaseTrace): `model.unobserved_RVs` is used. """ - def __init__(self, name=None, model=None, vars=None, test_point=None): - super().__init__(name, model, vars, test_point) + def __init__(self, name=None, model=None, vars=None, test_point=None, **kwargs): + super().__init__(name, model, vars, test_point, **kwargs) self.draw_idx = 0 self.draws = None self.samples = {} @@ -76,7 +76,7 @@ def setup(self, draws, chain, sampler_vars=None) -> None: else: # Otherwise, make array of zeros for each variable. self.draws = draws for varname, shape in self.var_shapes.items(): - self.samples[varname] = np.zeros((draws, *shape), dtype=self.var_dtypes[varname]) + self.samples[varname] = np.empty((draws, *shape), dtype=self.var_dtypes[varname]) if sampler_vars is None: return @@ -105,17 +105,18 @@ def record(self, point, sampler_stats=None) -> None: point: dict Values mapped to variable names """ - for varname, value in zip(self.varnames, self.fn(point)): - self.samples[varname][self.draw_idx] = value + samples = self.samples + draw_idx = self.draw_idx + for varname, value in zip(self.varnames, self.fn(*point.values())): + samples[varname][draw_idx] = value - if self._stats is not None and sampler_stats is None: - raise ValueError("Expected sampler_stats") - if self._stats is None and sampler_stats is not None: - raise ValueError("Unknown sampler_stats") if sampler_stats is not None: for data, vars in zip(self._stats, sampler_stats): for key, val in vars.items(): - data[key][self.draw_idx] = val + data[key][draw_idx] = val + elif self._stats is not None: + raise ValueError("Expected sampler_stats") + self.draw_idx += 1 def _get_sampler_stats( @@ -166,7 +167,13 @@ def _slice(self, idx: slice): # Only the first `draw_idx` value are valid because of preallocation idx = slice(*idx.indices(len(self))) - sliced = NDArray(model=self.model, vars=self.vars) + sliced = type(self)( + model=self.model, + vars=self.vars, + fn=self.fn, + var_shapes=self.var_shapes, + var_dtypes=self.var_dtypes, + ) sliced.chain = self.chain sliced.samples = {varname: values[idx] for varname, values in self.samples.items()} sliced.sampler_vars = self.sampler_vars diff --git a/pymc/blocking.py b/pymc/blocking.py index dcbfe0ead..2aad65612 100644 --- a/pymc/blocking.py +++ b/pymc/blocking.py @@ -39,11 +39,11 @@ StatShape: TypeAlias = Sequence[int | None] | None -# `point_map_info` is a tuple of tuples containing `(name, shape, dtype)` for +# `point_map_info` is a tuple of tuples containing `(name, shape, size, dtype)` for # each of the raveled variables. class RaveledVars(NamedTuple): data: np.ndarray - point_map_info: tuple[tuple[str, tuple[int, ...], np.dtype], ...] + point_map_info: tuple[tuple[str, tuple[int, ...], int, np.dtype], ...] class Compose(Generic[T]): @@ -67,10 +67,9 @@ class DictToArrayBijection: @staticmethod def map(var_dict: PointType) -> RaveledVars: """Map a dictionary of names and variables to a concatenated 1D array space.""" - vars_info = tuple((v, k, v.shape, v.dtype) for k, v in var_dict.items()) - raveled_vars = [v[0].ravel() for v in vars_info] - if raveled_vars: - result = np.concatenate(raveled_vars) + vars_info = tuple((v, k, v.shape, v.size, v.dtype) for k, v in var_dict.items()) + if vars_info: + result = np.concatenate(tuple(v[0].ravel() for v in vars_info)) else: result = np.array([]) return RaveledVars(result, tuple(v[1:] for v in vars_info)) @@ -91,19 +90,15 @@ def rmap( """ if start_point: - result = dict(start_point) + result = start_point.copy() else: result = {} - if not isinstance(array, RaveledVars): - raise TypeError("`array` must be a `RaveledVars` type") - last_idx = 0 - for name, shape, dtype in array.point_map_info: - arr_len = np.prod(shape, dtype=int) - var = array.data[last_idx : last_idx + arr_len].reshape(shape).astype(dtype) - result[name] = var - last_idx += arr_len + for name, shape, size, dtype in array.point_map_info: + end = last_idx + size + result[name] = array.data[last_idx:end].reshape(shape).astype(dtype) + last_idx = end return result diff --git a/pymc/data.py b/pymc/data.py index 22fc8717c..997f0ccb3 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -42,12 +42,12 @@ from pymc.vartypes import isgenerator __all__ = [ - "get_data", + "ConstantData", + "Data", "GeneratorAdapter", "Minibatch", - "Data", - "ConstantData", "MutableData", + "get_data", ] BASE_URL = "https://raw.githubusercontent.com/pymc-devs/pymc-examples/main/examples/data/{filename}" diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index 4d2088356..442ebddc7 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -116,93 +116,93 @@ from pymc.distributions.truncated import Truncated __all__ = [ - "Uniform", - "Flat", - "HalfFlat", - "Normal", - "TruncatedNormal", + "AR", + "CAR", + "GARCH11", + "ICAR", + "AsymmetricLaplace", + "Bernoulli", "Beta", - "Kumaraswamy", - "Exponential", - "Laplace", - "StudentT", + "BetaBinomial", + "Binomial", + "Categorical", "Cauchy", - "HalfCauchy", - "Gamma", - "Weibull", - "LogNormal", - "Lognormal", - "HalfStudentT", + "Censored", "ChiSquared", - "HalfNormal", - "Wald", - "Pareto", - "InverseGamma", - "ExGaussian", - "VonMises", - "Binomial", - "BetaBinomial", - "Bernoulli", - "Poisson", - "NegativeBinomial", + "Continuous", + "CustomDist", + "DensityDist", "DiracDelta", - "ZeroInflatedPoisson", - "ZeroInflatedNegativeBinomial", - "ZeroInflatedBinomial", + "Dirichlet", + "DirichletMultinomial", + "Discrete", "DiscreteUniform", + "DiscreteWeibull", + "Distribution", + "EulerMaruyama", + "ExGaussian", + "Exponential", + "Flat", + "Gamma", + "GaussianRandomWalk", "Geometric", + "Gumbel", + "HalfCauchy", + "HalfFlat", + "HalfNormal", + "HalfStudentT", + "HurdleGamma", + "HurdleLogNormal", + "HurdleNegativeBinomial", + "HurdlePoisson", "HyperGeometric", - "Categorical", - "OrderedLogistic", - "OrderedProbit", - "DensityDist", - "CustomDist", - "Distribution", - "SymbolicRandomVariable", - "Continuous", - "Discrete", - "MvNormal", - "ZeroSumNormal", - "MatrixNormal", + "Interpolated", + "InverseGamma", "KroneckerNormal", - "MvStudentT", - "Dirichlet", - "StickBreakingWeights", - "Multinomial", - "DirichletMultinomial", - "OrderedMultinomial", - "Wishart", - "WishartBartlett", + "Kumaraswamy", "LKJCholeskyCov", "LKJCorr", - "AsymmetricLaplace", - "RandomWalk", - "GaussianRandomWalk", + "Laplace", + "LogNormal", + "Logistic", + "LogitNormal", + "Lognormal", + "MatrixNormal", + "Mixture", + "Moyal", + "Multinomial", "MvGaussianRandomWalk", + "MvNormal", + "MvStudentT", "MvStudentTRandomWalk", - "AR", - "EulerMaruyama", - "GARCH11", - "SkewNormal", - "Mixture", + "NegativeBinomial", + "Normal", "NormalMixture", - "Triangular", - "DiscreteWeibull", - "Gumbel", - "Logistic", - "LogitNormal", - "Interpolated", + "OrderedLogistic", + "OrderedMultinomial", + "OrderedProbit", + "Pareto", + "Poisson", + "PolyaGamma", + "RandomWalk", "Rice", - "Moyal", "Simulator", - "Truncated", - "Censored", - "CAR", - "ICAR", - "PolyaGamma", - "HurdleGamma", - "HurdleLogNormal", - "HurdleNegativeBinomial", - "HurdlePoisson", + "SkewNormal", "SkewStudentT", + "StickBreakingWeights", + "StudentT", + "SymbolicRandomVariable", + "Triangular", + "Truncated", + "TruncatedNormal", + "Uniform", + "VonMises", + "Wald", + "Weibull", + "Wishart", + "WishartBartlett", + "ZeroInflatedBinomial", + "ZeroInflatedNegativeBinomial", + "ZeroInflatedPoisson", + "ZeroSumNormal", ] diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 286e508bf..d5e69fc79 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -95,40 +95,40 @@ def polyagamma_cdf(*args, **kwargs): from pymc.math import invlogit, logdiffexp, logit __all__ = [ - "Uniform", - "Flat", - "HalfFlat", - "Normal", - "TruncatedNormal", + "AsymmetricLaplace", "Beta", - "Kumaraswamy", - "Exponential", - "Laplace", - "StudentT", "Cauchy", - "HalfCauchy", - "Gamma", - "Weibull", - "HalfStudentT", - "LogNormal", "ChiSquared", - "HalfNormal", - "Wald", - "Pareto", - "InverseGamma", "ExGaussian", - "VonMises", - "SkewNormal", - "Triangular", + "Exponential", + "Flat", + "Gamma", "Gumbel", + "HalfCauchy", + "HalfFlat", + "HalfNormal", + "HalfStudentT", + "Interpolated", + "InverseGamma", + "Kumaraswamy", + "Laplace", + "LogNormal", "Logistic", "LogitNormal", - "Interpolated", - "Rice", "Moyal", - "AsymmetricLaplace", + "Normal", + "Pareto", "PolyaGamma", + "Rice", + "SkewNormal", "SkewStudentT", + "StudentT", + "Triangular", + "TruncatedNormal", + "Uniform", + "VonMises", + "Wald", + "Weibull", ] @@ -1373,13 +1373,12 @@ class Exponential(PositiveContinuous): rv_op = exponential @classmethod - def dist(cls, lam=None, scale=None, *args, **kwargs): - if lam is not None and scale is not None: + def dist(cls, lam=None, *, scale=None, **kwargs): + if lam is None and scale is None: + scale = 1.0 + elif lam is not None and scale is not None: raise ValueError("Incompatible parametrization. Can't specify both lam and scale.") - elif lam is None and scale is None: - raise ValueError("Incompatible parametrization. Must specify either lam or scale.") - - if scale is None: + elif lam is not None: scale = pt.reciprocal(lam) scale = pt.as_tensor_variable(scale) @@ -1626,7 +1625,7 @@ def support_point(rv, size, b, kappa, mu): def logp(value, b, kappa, mu): value = value - mu res = pt.log(b / (kappa + (kappa**-1))) + ( - -value * b * pt.sgn(value) * (kappa ** pt.sgn(value)) + -value * b * pt.sign(value) * (kappa ** pt.sign(value)) ) return check_parameters( diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py index 179bae25f..550a26858 100644 --- a/pymc/distributions/discrete.py +++ b/pymc/distributions/discrete.py @@ -13,7 +13,10 @@ # limitations under the License. import warnings +from typing import TypeAlias + import numpy as np +import numpy.typing as npt import pytensor.tensor as pt from pytensor.tensor import TensorConstant @@ -30,6 +33,7 @@ uniform, ) from pytensor.tensor.random.utils import normalize_size_param +from pytensor.tensor.variable import TensorVariable from scipy import stats import pymc as pm @@ -46,27 +50,28 @@ normal_lccdf, normal_lcdf, ) -from pymc.distributions.distribution import Discrete, SymbolicRandomVariable +from pymc.distributions.distribution import DIST_PARAMETER_TYPES, Discrete, SymbolicRandomVariable from pymc.distributions.shape_utils import implicit_size_from_params, rv_size_is_none from pymc.logprob.basic import logcdf, logp from pymc.math import sigmoid +from pymc.pytensorf import normalize_rng_param __all__ = [ - "Binomial", - "BetaBinomial", "Bernoulli", - "DiscreteWeibull", - "Poisson", - "NegativeBinomial", + "BetaBinomial", + "Binomial", + "Categorical", "DiscreteUniform", + "DiscreteWeibull", "Geometric", "HyperGeometric", - "Categorical", + "NegativeBinomial", "OrderedLogistic", "OrderedProbit", + "Poisson", ] -from pymc.pytensorf import normalize_rng_param +DISCRETE_DIST_PARAMETER_TYPES: TypeAlias = npt.NDArray[np.int_] | int | TensorVariable class Binomial(Discrete): @@ -118,7 +123,14 @@ class Binomial(Discrete): rv_op = binomial @classmethod - def dist(cls, n, p=None, logit_p=None, *args, **kwargs): + def dist( + cls, + n: DISCRETE_DIST_PARAMETER_TYPES, + p: DIST_PARAMETER_TYPES | None = None, + logit_p: DIST_PARAMETER_TYPES | None = None, + *args, + **kwargs, + ): if p is not None and logit_p is not None: raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.") elif p is None and logit_p is None: @@ -234,7 +246,14 @@ def BetaBinom(a, b, n, x): rv_op = betabinom @classmethod - def dist(cls, alpha, beta, n, *args, **kwargs): + def dist( + cls, + alpha: DIST_PARAMETER_TYPES, + beta: DIST_PARAMETER_TYPES, + n: DISCRETE_DIST_PARAMETER_TYPES, + *args, + **kwargs, + ): alpha = pt.as_tensor_variable(alpha) beta = pt.as_tensor_variable(beta) n = pt.as_tensor_variable(n, dtype=int) @@ -341,7 +360,13 @@ class Bernoulli(Discrete): rv_op = bernoulli @classmethod - def dist(cls, p=None, logit_p=None, *args, **kwargs): + def dist( + cls, + p: DIST_PARAMETER_TYPES | None = None, + logit_p: DIST_PARAMETER_TYPES | None = None, + *args, + **kwargs, + ): if p is not None and logit_p is not None: raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.") elif p is None and logit_p is None: @@ -465,7 +490,9 @@ def DiscreteWeibull(q, b, x): rv_op = DiscreteWeibullRV.rv_op @classmethod - def dist(cls, q, beta, *args, **kwargs): + def dist(cls, q: DIST_PARAMETER_TYPES, beta: DIST_PARAMETER_TYPES, *args, **kwargs): + q = pt.as_tensor_variable(q) + beta = pt.as_tensor_variable(beta) return super().dist([q, beta], **kwargs) def support_point(rv, size, q, beta): @@ -553,7 +580,7 @@ class Poisson(Discrete): rv_op = poisson @classmethod - def dist(cls, mu, *args, **kwargs): + def dist(cls, mu: DIST_PARAMETER_TYPES, *args, **kwargs): mu = pt.as_tensor_variable(mu) return super().dist([mu], *args, **kwargs) @@ -677,7 +704,15 @@ def NegBinom(a, m, x): rv_op = nbinom @classmethod - def dist(cls, mu=None, alpha=None, p=None, n=None, *args, **kwargs): + def dist( + cls, + mu: DIST_PARAMETER_TYPES | None = None, + alpha: DIST_PARAMETER_TYPES | None = None, + p: DIST_PARAMETER_TYPES | None = None, + n: DIST_PARAMETER_TYPES | None = None, + *args, + **kwargs, + ): n, p = cls.get_n_p(mu=mu, alpha=alpha, p=p, n=n) n = pt.as_tensor_variable(n) p = pt.as_tensor_variable(p) @@ -790,7 +825,7 @@ class Geometric(Discrete): rv_op = geometric @classmethod - def dist(cls, p, *args, **kwargs): + def dist(cls, p: DIST_PARAMETER_TYPES, *args, **kwargs): p = pt.as_tensor_variable(p) return super().dist([p], *args, **kwargs) @@ -891,7 +926,14 @@ class HyperGeometric(Discrete): rv_op = hypergeometric @classmethod - def dist(cls, N, k, n, *args, **kwargs): + def dist( + cls, + N: DISCRETE_DIST_PARAMETER_TYPES, + k: DISCRETE_DIST_PARAMETER_TYPES, + n: DISCRETE_DIST_PARAMETER_TYPES, + *args, + **kwargs, + ): good = pt.as_tensor_variable(k, dtype=int) bad = pt.as_tensor_variable(N - k, dtype=int) n = pt.as_tensor_variable(n, dtype=int) @@ -1027,7 +1069,13 @@ class DiscreteUniform(Discrete): rv_op = discrete_uniform @classmethod - def dist(cls, lower, upper, *args, **kwargs): + def dist( + cls, + lower: DISCRETE_DIST_PARAMETER_TYPES, + upper: DISCRETE_DIST_PARAMETER_TYPES, + *args, + **kwargs, + ): lower = pt.floor(lower) upper = pt.floor(upper) return super().dist([lower, upper], **kwargs) @@ -1123,7 +1171,12 @@ class Categorical(Discrete): rv_op = categorical @classmethod - def dist(cls, p=None, logit_p=None, **kwargs): + def dist( + cls, + p: np.ndarray | None = None, + logit_p: float | None = None, + **kwargs, + ): if p is not None and logit_p is not None: raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.") elif p is None and logit_p is None: @@ -1261,7 +1314,7 @@ def __new__(cls, name, eta, cutpoints, compute_p=True, **kwargs): return out_rv @classmethod - def dist(cls, eta, cutpoints, **kwargs): + def dist(cls, eta: DIST_PARAMETER_TYPES, cutpoints: DIST_PARAMETER_TYPES, **kwargs): p = cls.compute_p(eta, cutpoints) return Categorical.dist(p=p, **kwargs) diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py index 8e55f649d..0d1c58cf1 100644 --- a/pymc/distributions/distribution.py +++ b/pymc/distributions/distribution.py @@ -64,10 +64,10 @@ from pymc.vartypes import continuous_types, string_types __all__ = [ - "DiracDelta", - "Distribution", "Continuous", + "DiracDelta", "Discrete", + "Distribution", "SymbolicRandomVariable", ] diff --git a/pymc/distributions/moments/means.py b/pymc/distributions/moments/means.py index 450a90f8a..f02573372 100644 --- a/pymc/distributions/moments/means.py +++ b/pymc/distributions/moments/means.py @@ -50,7 +50,7 @@ UniformRV, VonMisesRV, ) -from pytensor.tensor.var import TensorVariable +from pytensor.tensor.variable import TensorVariable from pymc.distributions.continuous import ( AsymmetricLaplaceRV, diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index da10b12fa..e44008fe6 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -83,22 +83,22 @@ from pymc.util import check_dist_not_registered __all__ = [ - "MvNormal", - "ZeroSumNormal", - "MvStudentT", + "CAR", + "ICAR", "Dirichlet", - "Multinomial", "DirichletMultinomial", - "OrderedMultinomial", - "Wishart", - "WishartBartlett", - "LKJCorr", + "KroneckerNormal", "LKJCholeskyCov", + "LKJCorr", "MatrixNormal", - "KroneckerNormal", - "CAR", - "ICAR", + "Multinomial", + "MvNormal", + "MvStudentT", + "OrderedMultinomial", "StickBreakingWeights", + "Wishart", + "WishartBartlett", + "ZeroSumNormal", ] solve_lower = partial(solve_triangular, lower=True) @@ -1791,14 +1791,21 @@ class MatrixNormal(Continuous): Examples -------- Define a matrixvariate normal variable for given row and column covariance - matrices:: + matrices. - colcov = np.array([[1.0, 0.5], [0.5, 2]]) - rowcov = np.array([[1, 0, 0], [0, 4, 0], [0, 0, 16]]) - m = rowcov.shape[0] - n = colcov.shape[0] - mu = np.zeros((m, n)) - vals = pm.MatrixNormal("vals", mu=mu, colcov=colcov, rowcov=rowcov) + .. code:: python + + import pymc as pm + import numpy as np + import pytensor.tensor as pt + + with pm.Model() as model: + colcov = np.array([[1.0, 0.5], [0.5, 2]]) + rowcov = np.array([[1, 0, 0], [0, 4, 0], [0, 0, 16]]) + m = rowcov.shape[0] + n = colcov.shape[0] + mu = np.zeros((m, n)) + vals = pm.MatrixNormal("vals", mu=mu, colcov=colcov, rowcov=rowcov) Above, the ith row in vals has a variance that is scaled by 4^i. Alternatively, row or column cholesky matrices could be substituted for @@ -1827,16 +1834,13 @@ class MatrixNormal(Continuous): with pm.Model() as model: # Setup right cholesky matrix sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=3) - colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=3, eta=2, - sd_dist=sd_dist) - colchol = pm.expand_packed_triangular(3, colchol_packed) - + colchol,_,_ = pm.LKJCholeskyCov('colchol', n=3, eta=2,sd_dist=sd_dist) # Setup left covariance matrix scale = pm.LogNormal('scale', mu=np.log(true_scale), sigma=0.5) rowcov = pt.diag([scale**(2*i) for i in range(m)]) vals = pm.MatrixNormal('vals', mu=mu, colchol=colchol, rowcov=rowcov, - observed=data) + observed=data) """ rv_op = matrixnormal diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py index efd8d1f77..f2b21763c 100644 --- a/pymc/distributions/shape_utils.py +++ b/pymc/distributions/shape_utils.py @@ -36,9 +36,9 @@ from pymc.pytensorf import convert_observed_data __all__ = [ - "to_tuple", - "rv_size_is_none", "change_dist_size", + "rv_size_is_none", + "to_tuple", ] from pymc.exceptions import ShapeError @@ -461,7 +461,7 @@ def implicit_size_from_params( for param, ndim in zip(params, ndims_params): batch_shape = list(param.shape[:-ndim] if ndim > 0 else param.shape) # Overwrite broadcastable dims - for i, broadcastable in enumerate(param.type.broadcastable): + for i, broadcastable in enumerate(param.type.broadcastable[: len(batch_shape)]): if broadcastable: batch_shape[i] = 1 batch_shapes.append(batch_shape) diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py index 6469cd101..3ec863d7a 100644 --- a/pymc/distributions/timeseries.py +++ b/pymc/distributions/timeseries.py @@ -21,7 +21,7 @@ import pytensor import pytensor.tensor as pt -from pytensor.graph.basic import Node, ancestors +from pytensor.graph.basic import Apply, ancestors from pytensor.graph.replace import clone_replace from pytensor.tensor import TensorVariable from pytensor.tensor.random.op import RandomVariable @@ -49,13 +49,13 @@ from pymc.util import check_dist_not_registered __all__ = [ - "RandomWalk", - "GaussianRandomWalk", - "MvGaussianRandomWalk", - "MvStudentTRandomWalk", "AR", "GARCH11", "EulerMaruyama", + "GaussianRandomWalk", + "MvGaussianRandomWalk", + "MvStudentTRandomWalk", + "RandomWalk", ] @@ -490,7 +490,7 @@ def step(*args): constant_term=constant_term, )(rhos, sigma, init_dist, steps, noise_rng) - def update(self, node: Node): + def update(self, node: Apply): """Return the update mapping for the noise RV.""" return {node.inputs[-1]: node.outputs[0]} @@ -767,7 +767,7 @@ def step(prev_y, prev_sigma, omega, alpha_1, beta_1, rng): outputs=[noise_next_rng, garch11], )(omega, alpha_1, beta_1, initial_vol, init_dist, steps, noise_rng) - def update(self, node: Node): + def update(self, node: Apply): """Return the update mapping for the noise RV.""" return {node.inputs[-1]: node.outputs[0]} @@ -918,7 +918,7 @@ def step(*prev_args): extended_signature=f"(),(s),{','.join('()' for _ in sde_pars)},[rng]->[rng],(t)", )(init_dist, steps, *sde_pars, noise_rng) - def update(self, node: Node): + def update(self, node: Apply): """Return the update mapping for the noise RV.""" return {node.inputs[-1]: node.outputs[0]} diff --git a/pymc/distributions/transforms.py b/pymc/distributions/transforms.py index 486fa42b5..fe036c2bc 100644 --- a/pymc/distributions/transforms.py +++ b/pymc/distributions/transforms.py @@ -36,18 +36,18 @@ ) __all__ = [ - "Transform", - "simplex", - "logodds", + "Chain", + "CholeskyCovPacked", "Interval", + "Transform", + "ZeroSumTransform", + "circular", + "log", "log_exp_m1", + "logodds", "ordered", - "log", + "simplex", "sum_to_1", - "circular", - "CholeskyCovPacked", - "Chain", - "ZeroSumTransform", ] diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py index 6f32918bb..36b439526 100644 --- a/pymc/distributions/truncated.py +++ b/pymc/distributions/truncated.py @@ -19,7 +19,7 @@ from pytensor import config, graph_replace, scan from pytensor.graph import Op -from pytensor.graph.basic import Node +from pytensor.graph.basic import Apply from pytensor.raise_op import CheckAndRaise from pytensor.scan import until from pytensor.tensor import TensorConstant, TensorVariable @@ -211,7 +211,7 @@ def _create_logcdf_exprs( upper_logcdf = graph_replace(lower_logcdf, {lower_value: upper_value}) return lower_logcdf, upper_logcdf - def update(self, node: Node): + def update(self, node: Apply): """Return the update mapping for the internal RNGs. TruncatedRVs are created in a way that the rng updates follow the same order as the input RNGs. diff --git a/pymc/exceptions.py b/pymc/exceptions.py index 913c3ca3c..652c2ae5a 100644 --- a/pymc/exceptions.py +++ b/pymc/exceptions.py @@ -13,12 +13,12 @@ # limitations under the License. __all__ = [ - "SamplingError", - "IncorrectArgumentsError", - "TraceDirectoryError", "ImputationWarning", - "ShapeWarning", + "IncorrectArgumentsError", + "SamplingError", "ShapeError", + "ShapeWarning", + "TraceDirectoryError", ] diff --git a/pymc/func_utils.py b/pymc/func_utils.py index 21492a34e..72dc3b1a9 100644 --- a/pymc/func_utils.py +++ b/pymc/func_utils.py @@ -169,18 +169,18 @@ def find_constrained_prior( ) target = (pt.exp(logcdf_lower) - mass_below_lower) ** 2 - target_fn = pm.pytensorf.compile_pymc([dist_params], target, allow_input_downcast=True) + target_fn = pm.pytensorf.compile([dist_params], target, allow_input_downcast=True) constraint = pt.exp(logcdf_upper) - pt.exp(logcdf_lower) - constraint_fn = pm.pytensorf.compile_pymc([dist_params], constraint, allow_input_downcast=True) + constraint_fn = pm.pytensorf.compile([dist_params], constraint, allow_input_downcast=True) jac: str | Callable constraint_jac: str | Callable try: pytensor_jac = pm.gradient(target, [dist_params]) - jac = pm.pytensorf.compile_pymc([dist_params], pytensor_jac, allow_input_downcast=True) + jac = pm.pytensorf.compile([dist_params], pytensor_jac, allow_input_downcast=True) pytensor_constraint_jac = pm.gradient(constraint, [dist_params]) - constraint_jac = pm.pytensorf.compile_pymc( + constraint_jac = pm.pytensorf.compile( [dist_params], pytensor_constraint_jac, allow_input_downcast=True ) # when PyMC cannot compute the gradient diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index f330308b7..d9f357728 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -30,23 +30,23 @@ __all__ = [ "Constant", - "WhiteNoise", + "Coregion", + "Cosine", "ExpQuad", - "RatQuad", "Exponential", - "Matern52", - "Matern32", - "Matern12", + "Gibbs", + "Kron", "Linear", - "Polynomial", - "Cosine", + "Matern12", + "Matern32", + "Matern52", "Periodic", + "Polynomial", + "RatQuad", + "ScaledCov", "WarpedInput", + "WhiteNoise", "WrappedPeriodic", - "Gibbs", - "Coregion", - "ScaledCov", - "Kron", ] from pymc.pytensorf import constant_fold diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index e08ebffbe..3a4b45383 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -36,7 +36,7 @@ solve_lower = partial(solve_triangular, lower=True) solve_upper = partial(solve_triangular, lower=False) -__all__ = ["Latent", "Marginal", "TP", "MarginalApprox", "LatentKron", "MarginalKron"] +__all__ = ["TP", "Latent", "LatentKron", "Marginal", "MarginalApprox", "MarginalKron"] _noise_deprecation_warning = ( diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index cf434ebe3..e331aab8f 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -428,8 +428,8 @@ def prior( self, name: str, X: TensorLike, + dims: str | None = None, hsgp_coeffs_dims: str | None = None, - gp_dims: str | None = None, *args, **kwargs, ): @@ -444,10 +444,11 @@ def prior( Name of the random variable X: array-like Function input values. + dims: str, default None + Dimension name for the GP random variable. hsgp_coeffs_dims: str, default None Dimension name for the HSGP basis vectors. - gp_dims: str, default None - Dimension name for the GP random variable. + """ phi, sqrt_psd = self.prior_linearized(X) self._sqrt_psd = sqrt_psd @@ -469,7 +470,7 @@ def prior( ) f = self.mean_func(X) + phi @ self._beta - self.f = pm.Deterministic(name, f, dims=gp_dims) + self.f = pm.Deterministic(name, f, dims=dims) return self.f def _build_conditional(self, Xnew): @@ -695,7 +696,9 @@ def prior_linearized(self, X: TensorLike): psd = self.scale * self.cov_func.power_spectral_density_approx(J) return (phi_cos, phi_sin), psd - def prior(self, name: str, X: TensorLike, dims: str | None = None): # type: ignore[override] + def prior( # type: ignore[override] + self, name: str, X: TensorLike, dims: str | None = None, hsgp_coeffs_dims: str | None = None + ): R""" Return the (approximate) GP prior distribution evaluated over the input locations `X`. @@ -709,11 +712,13 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None): # type: ign Function input values. dims: None Dimension name for the GP random variable. + hsgp_coeffs_dims: str | None = None + Dimension name for the HSGPPeriodic basis vectors. """ (phi_cos, phi_sin), psd = self.prior_linearized(X) m = self._m - self._beta = pm.Normal(f"{name}_hsgp_coeffs_", size=(m * 2 - 1)) + self._beta = pm.Normal(f"{name}_hsgp_coeffs_", size=(m * 2 - 1), dims=hsgp_coeffs_dims) # The first eigenfunction for the sine component is zero # and so does not contribute to the approximation. f = ( diff --git a/pymc/gp/mean.py b/pymc/gp/mean.py index 800cbf556..827b5db6e 100644 --- a/pymc/gp/mean.py +++ b/pymc/gp/mean.py @@ -14,7 +14,7 @@ import pytensor.tensor as pt -__all__ = ["Zero", "Constant", "Linear"] +__all__ = ["Constant", "Linear", "Zero"] class Mean: diff --git a/pymc/gp/util.py b/pymc/gp/util.py index b2d7447b1..3aaf85ab5 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -23,7 +23,7 @@ from scipy.cluster.vq import kmeans from pymc.model.core import modelcontext -from pymc.pytensorf import compile_pymc +from pymc.pytensorf import compile JITTER_DEFAULT = 1e-6 @@ -55,7 +55,7 @@ def replace_with_values(vars_needed, replacements=None, model=None): if len(inputs) == 0: return tuple(v.eval() for v in vars_needed) - fn = compile_pymc( + fn = compile( inputs, vars_needed, allow_input_downcast=True, diff --git a/pymc/initial_point.py b/pymc/initial_point.py index 15f4f887c..241409f68 100644 --- a/pymc/initial_point.py +++ b/pymc/initial_point.py @@ -25,7 +25,13 @@ from pytensor.tensor.variable import TensorVariable from pymc.logprob.transforms import Transform -from pymc.pytensorf import compile_pymc, find_rng_nodes, replace_rng_nodes, reseed_rngs +from pymc.pytensorf import ( + compile, + find_rng_nodes, + replace_rng_nodes, + reseed_rngs, + toposort_replace, +) from pymc.util import get_transformed_name, get_untransformed_name, is_transformed_name StartDict = dict[Variable | str, np.ndarray | Variable | str] @@ -151,7 +157,7 @@ def make_initial_point_fn( # Replace original rng shared variables so that we don't mess with them # when calling the final seeded function initial_values = replace_rng_nodes(initial_values) - func = compile_pymc(inputs=[], outputs=initial_values, mode=pytensor.compile.mode.FAST_COMPILE) + func = compile(inputs=[], outputs=initial_values, mode=pytensor.compile.mode.FAST_COMPILE) varnames = [] for var in model.free_RVs: @@ -288,8 +294,7 @@ def make_initial_point_expression( # order, so that later nodes do not reintroduce expressions with earlier # rvs that would need to once again be replaced by their initial_points graph = FunctionGraph(outputs=free_rvs_clone, clone=False) - replacements = reversed(list(zip(free_rvs_clone, initial_values_clone))) - graph.replace_all(replacements, import_missing=True) + toposort_replace(graph, tuple(zip(free_rvs_clone, initial_values_clone)), reverse=True) if not return_transformed: return graph.outputs diff --git a/pymc/logprob/__init__.py b/pymc/logprob/__init__.py index aaa8b2052..4dea34312 100644 --- a/pymc/logprob/__init__.py +++ b/pymc/logprob/__init__.py @@ -49,6 +49,7 @@ import pymc.logprob.censoring import pymc.logprob.cumsum import pymc.logprob.checks +import pymc.logprob.linalg import pymc.logprob.mixture import pymc.logprob.order import pymc.logprob.scan @@ -57,7 +58,7 @@ __all__ = ( - "logp", - "logcdf", "icdf", + "logcdf", + "logp", ) diff --git a/pymc/logprob/abstract.py b/pymc/logprob/abstract.py index f47c39e2b..281b4fb18 100644 --- a/pymc/logprob/abstract.py +++ b/pymc/logprob/abstract.py @@ -43,6 +43,7 @@ from pytensor.graph import Apply, Op, Variable from pytensor.graph.utils import MetaType from pytensor.tensor import TensorVariable +from pytensor.tensor.blockwise import Blockwise from pytensor.tensor.elemwise import Elemwise from pytensor.tensor.random.op import RandomVariable @@ -168,6 +169,10 @@ def __str__(self): return f"Measurable{super().__str__()}" +class MeasurableBlockwise(MeasurableOp, Blockwise): + """Base class for Measurable Blockwise variables.""" + + class ValuedRV(Op): r"""Represents the association of a measurable variable and its value. diff --git a/pymc/logprob/basic.py b/pymc/logprob/basic.py index 8c76fd8ae..7753678d2 100644 --- a/pymc/logprob/basic.py +++ b/pymc/logprob/basic.py @@ -503,7 +503,7 @@ def conditional_logp( if not isinstance(node.op, MeasurableOp): continue - valued_nodes = get_related_valued_nodes(node, fgraph) + valued_nodes = get_related_valued_nodes(fgraph, node) if not valued_nodes: continue diff --git a/pymc/logprob/binary.py b/pymc/logprob/binary.py index 0767d25f8..9d0985a2c 100644 --- a/pymc/logprob/binary.py +++ b/pymc/logprob/binary.py @@ -11,11 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import cast import numpy as np import pytensor.tensor as pt -from pytensor.graph.basic import Node +from pytensor.graph.basic import Apply from pytensor.graph.fg import FunctionGraph from pytensor.graph.rewriting.basic import node_rewriter from pytensor.scalar.basic import GE, GT, LE, LT, Invert @@ -39,7 +40,7 @@ class MeasurableComparison(MeasurableElemwise): @node_rewriter(tracks=[gt, lt, ge, le]) -def find_measurable_comparisons(fgraph: FunctionGraph, node: Node) -> list[TensorVariable] | None: +def find_measurable_comparisons(fgraph: FunctionGraph, node: Apply) -> list[TensorVariable] | None: measurable_inputs = filter_measurable_variables(node.inputs) if len(measurable_inputs) != 1: @@ -55,7 +56,7 @@ def find_measurable_comparisons(fgraph: FunctionGraph, node: Node) -> list[Tenso # Check that the other input is not potentially measurable, in which case this rewrite # would be invalid - const = node.inputs[(measurable_var_idx + 1) % 2] + const = cast(TensorVariable, node.inputs[(measurable_var_idx + 1) % 2]) # check for potential measurability of const if check_potential_measurability([const]): @@ -127,11 +128,11 @@ class MeasurableBitwise(MeasurableElemwise): @node_rewriter(tracks=[invert]) -def find_measurable_bitwise(fgraph: FunctionGraph, node: Node) -> list[TensorVariable] | None: - base_var = node.inputs[0] +def find_measurable_bitwise(fgraph: FunctionGraph, node: Apply) -> list[TensorVariable] | None: + base_var = cast(TensorVariable, node.inputs[0]) if not base_var.dtype.startswith("bool"): - raise None + return None if not filter_measurable_variables([base_var]): return None diff --git a/pymc/logprob/censoring.py b/pymc/logprob/censoring.py index 2104ecb6e..e17d30a43 100644 --- a/pymc/logprob/censoring.py +++ b/pymc/logprob/censoring.py @@ -38,7 +38,7 @@ import numpy as np import pytensor.tensor as pt -from pytensor.graph.basic import Node +from pytensor.graph.basic import Apply from pytensor.graph.fg import FunctionGraph from pytensor.graph.rewriting.basic import node_rewriter from pytensor.scalar.basic import Ceil, Clip, Floor, RoundHalfToEven @@ -62,7 +62,7 @@ class MeasurableClip(MeasurableElemwise): @node_rewriter(tracks=[clip]) -def find_measurable_clips(fgraph: FunctionGraph, node: Node) -> list[TensorVariable] | None: +def find_measurable_clips(fgraph: FunctionGraph, node: Apply) -> list[TensorVariable] | None: # TODO: Canonicalize x[x>ub] = ub -> clip(x, x, ub) if not filter_measurable_variables(node.inputs): @@ -153,7 +153,7 @@ class MeasurableRound(MeasurableElemwise): @node_rewriter(tracks=[ceil, floor, round_half_to_even]) -def find_measurable_roundings(fgraph: FunctionGraph, node: Node) -> list[TensorVariable] | None: +def find_measurable_roundings(fgraph: FunctionGraph, node: Apply) -> list[TensorVariable] | None: if not filter_measurable_variables(node.inputs): return None diff --git a/pymc/logprob/linalg.py b/pymc/logprob/linalg.py new file mode 100644 index 000000000..226b24a07 --- /dev/null +++ b/pymc/logprob/linalg.py @@ -0,0 +1,102 @@ +# Copyright 2024 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytensor.tensor as pt + +from pytensor.graph.rewriting.basic import node_rewriter +from pytensor.tensor.math import _matrix_matrix_matmul + +from pymc.logprob.abstract import MeasurableBlockwise, MeasurableOp, _logprob, _logprob_helper +from pymc.logprob.rewriting import measurable_ir_rewrites_db +from pymc.logprob.utils import check_potential_measurability, filter_measurable_variables + + +class MeasurableMatMul(MeasurableBlockwise): + """Measurable matrix multiplication operation.""" + + right_measurable: bool + + def __init__(self, measurable_right: bool, **kwargs): + self.right_measurable = measurable_right + super().__init__(**kwargs) + + +@_logprob.register(MeasurableMatMul) +def logprob_measurable_matmul(op, values, l, r): # noqa: E741 + [y_value] = values + if op.right_measurable: + A, x = l, r + x_value = pt.linalg.solve(A, y_value) + else: + x, A = l, r + x_value = pt.linalg.solve(A.mT, y_value.mT).mT + + x_logp = _logprob_helper(x, x_value) + + # The operation has a support dimensionality of 2 + # We need to reduce it if it's still present in the base logp + if x_logp.type.ndim == x_value.type.ndim: + x_logp = pt.sum(x_logp, axis=(-1, -2)) + elif x_logp.type.ndim == x_value.type.ndim - 1: + x_logp = pt.sum(x_logp, axis=-1) + + _, log_abs_jac_det = pt.linalg.slogdet(A) + + return x_logp - log_abs_jac_det + + +@node_rewriter(tracks=[_matrix_matrix_matmul]) +def find_measurable_matmul(fgraph, node): + """Find measurable matrix-matrix multiplication operations.""" + if isinstance(node.op, MeasurableOp): + return None + + [out] = node.outputs + [l, r] = node.inputs # noqa: E741 + + # Check that not both a and r are measurable + measurable_inputs = filter_measurable_variables([l, r]) + if len(measurable_inputs) != 1: + return None + + [measurable_input] = measurable_inputs + + # Check the measurable input is not broadcasted + if measurable_input.type.broadcastable[:-2] != out.type.broadcastable[:-2]: + return None + + measurable_right = measurable_input is r + A = l if measurable_right else r + + # Check if the static shape already reveals a non-square matrix, + if ( + A.type.shape[-1] is not None + and A.type.shape[-2] is not None + and A.type.shape[-1] != A.type.shape[-2] + ): + return None + + # Check the other input is not potentially measurable + if check_potential_measurability([A]): + return None + + measurable_matmul = MeasurableMatMul(measurable_right=measurable_right, **node.op._props_dict()) + return [measurable_matmul(l, r)] + + +measurable_ir_rewrites_db.register( + find_measurable_matmul.__name__, + find_measurable_matmul, + "basic", + "linalg", +) diff --git a/pymc/logprob/mixture.py b/pymc/logprob/mixture.py index 55e506ad9..1ebb29638 100644 --- a/pymc/logprob/mixture.py +++ b/pymc/logprob/mixture.py @@ -468,7 +468,7 @@ def split_valued_ifelse(fgraph, node): # Single outputs IfElse return None - valued_output_nodes = get_related_valued_nodes(node, fgraph) + valued_output_nodes = get_related_valued_nodes(fgraph, node) if not valued_output_nodes: return None diff --git a/pymc/logprob/rewriting.py b/pymc/logprob/rewriting.py index cd390e13a..76baf31df 100644 --- a/pymc/logprob/rewriting.py +++ b/pymc/logprob/rewriting.py @@ -152,7 +152,11 @@ def remove_DiracDelta(fgraph, node): logprob_rewrites_db.register( "local_exp_over_1_plus_exp", out2in(local_exp_over_1_plus_exp), "basic" ) -logprob_rewrites_db.register("pre-canonicalize", optdb.query("+canonicalize"), "basic") +logprob_rewrites_db.register( + "pre-canonicalize", + optdb.query("+canonicalize", "-local_eager_useless_unbatched_blockwise"), + "basic", +) # These rewrites convert un-measurable variables into their measurable forms, # but they need to be reapplied, because some of the measurable forms require @@ -175,7 +179,11 @@ def remove_DiracDelta(fgraph, node): ) -logprob_rewrites_db.register("post-canonicalize", optdb.query("+canonicalize"), "basic") +logprob_rewrites_db.register( + "post-canonicalize", + optdb.query("+canonicalize", "-local_eager_useless_unbatched_blockwise"), + "basic", +) # Rewrites that remove IR Ops cleanup_ir_rewrites_db = LocalGroupDB() diff --git a/pymc/logprob/scan.py b/pymc/logprob/scan.py index ecd04b9c7..8f6942458 100644 --- a/pymc/logprob/scan.py +++ b/pymc/logprob/scan.py @@ -421,7 +421,7 @@ def find_measurable_scans(fgraph, node): # Find outputs of scan that are directly valued. # These must be mapping outputs, such as `outputs_info = [None]` (i.e, no recurrence nit_sot outputs) direct_valued_outputs = [ - valued_node.inputs[0] for valued_node in get_related_valued_nodes(node, fgraph) + valued_node.inputs[0] for valued_node in get_related_valued_nodes(fgraph, node) ] if not all(valued_out in scan_args.outer_out_nit_sot for valued_out in direct_valued_outputs): return None @@ -434,7 +434,7 @@ def find_measurable_scans(fgraph, node): client.outputs[0] for out in node.outputs for client, _ in fgraph.clients[out] - if (isinstance(client.op, Subtensor) and get_related_valued_nodes(client, fgraph)) + if (isinstance(client.op, Subtensor) and get_related_valued_nodes(fgraph, client)) ] indirect_valued_outputs = [out.owner.inputs[0] for out in sliced_valued_outputs] if not all( @@ -463,7 +463,9 @@ def find_measurable_scans(fgraph, node): # We must also replace any lingering references to the old RVs by the new measurable RVS # For example if we had measurable out1 = exp(normal()) and out2 = out1 - x # We need to replace references of original out1 by the new MeasurableExp(normal()) - inner_outs = node.op.inner_outputs.copy() + clone_fgraph = node.op.fgraph.clone() + inner_inps = clone_fgraph.inputs + inner_outs = clone_fgraph.outputs inner_rvs_replacements = [] for idx, new_inner_rv in zip(valued_output_idxs, inner_rvs, strict=True): old_inner_rv = inner_outs[idx] @@ -474,8 +476,7 @@ def find_measurable_scans(fgraph, node): clone=False, ) toposort_replace(temp_fgraph, inner_rvs_replacements) - inner_outs = temp_fgraph.outputs[: len(inner_outs)] - op = MeasurableScan(node.op.inner_inputs, inner_outs, node.op.info, mode=copy(node.op.mode)) + op = MeasurableScan(inner_inps, inner_outs, node.op.info, mode=copy(node.op.mode)) new_outs = op.make_node(*node.inputs).outputs old_outs = node.outputs diff --git a/pymc/logprob/tensor.py b/pymc/logprob/tensor.py index ec4f5c65a..abb5df2ab 100644 --- a/pymc/logprob/tensor.py +++ b/pymc/logprob/tensor.py @@ -41,13 +41,19 @@ from pytensor.graph.rewriting.basic import node_rewriter from pytensor.tensor import TensorVariable from pytensor.tensor.basic import Join, MakeVector -from pytensor.tensor.elemwise import DimShuffle +from pytensor.tensor.elemwise import DimShuffle, Elemwise from pytensor.tensor.random.op import RandomVariable from pytensor.tensor.random.rewriting import ( local_dimshuffle_rv_lift, ) -from pymc.logprob.abstract import MeasurableOp, _logprob, _logprob_helper, promised_valued_rv +from pymc.logprob.abstract import ( + MeasurableOp, + ValuedRV, + _logprob, + _logprob_helper, + promised_valued_rv, +) from pymc.logprob.rewriting import ( assume_valued_outputs, early_measurable_ir_rewrites_db, @@ -57,6 +63,7 @@ from pymc.logprob.utils import ( check_potential_measurability, filter_measurable_variables, + get_related_valued_nodes, replace_rvs_by_values, ) from pymc.pytensorf import constant_fold @@ -183,6 +190,9 @@ class MeasurableDimShuffle(MeasurableOp, DimShuffle): # find it locally and fails when a new `Op` is initialized c_func_file = str(DimShuffle.get_path(Path(DimShuffle.c_func_file))) # type: ignore[arg-type] + def __str__(self): + return f"Measurable{super().__str__()}" + @_logprob.register(MeasurableDimShuffle) def logprob_dimshuffle(op: MeasurableDimShuffle, values, base_var, **kwargs): @@ -215,33 +225,70 @@ def logprob_dimshuffle(op: MeasurableDimShuffle, values, base_var, **kwargs): return raw_logp.dimshuffle(redo_ds) +def _elemwise_univariate_chain(fgraph, node) -> bool: + # Check whether only Elemwise operations connect a base univariate RV to the valued node through var. + from pymc.distributions.distribution import SymbolicRandomVariable + from pymc.logprob.transforms import MeasurableTransform + + [inp] = node.inputs + [out] = node.outputs + + def elemwise_root(var: TensorVariable) -> TensorVariable | None: + if isinstance(var.owner.op, RandomVariable | SymbolicRandomVariable): + return var + elif isinstance(var.owner.op, MeasurableTransform): + return elemwise_root(var.owner.inputs[var.owner.op.measurable_input_idx]) + else: + return None + + # Check that the root is a univariate distribution linked by only elemwise operations + root = elemwise_root(inp) + if root is None: + return False + elif root.owner.op.ndim_supp != 0: + # This is still fine if the variable is directly valued + return any(get_related_valued_nodes(fgraph, node)) + + def elemwise_leaf(var: TensorVariable, clients=fgraph.clients) -> bool: + var_clients = clients[var] + if len(var_clients) != 1: + return False + [(client, _)] = var_clients + if isinstance(client.op, ValuedRV): + return True + elif isinstance(client.op, Elemwise) and len(client.outputs) == 1: + return elemwise_leaf(client.outputs[0]) + else: + return False + + # Check that the path to the valued node consists only of elemwise operations + return elemwise_leaf(out) + + @node_rewriter([DimShuffle]) def find_measurable_dimshuffles(fgraph, node) -> list[TensorVariable] | None: r"""Find `Dimshuffle`\s for which a `logprob` can be computed.""" - from pymc.distributions.distribution import SymbolicRandomVariable - if isinstance(node.op, MeasurableOp): return None if not filter_measurable_variables(node.inputs): return None + # In cases where DimShuffle transposes dimensions, we only apply this rewrite when only Elemwise + # operations separate it from the valued node. Further transformations likely need to know where + # the support axes are for a correct implementation (and thus assume they are the rightmost axes). + # TODO: When we include the support axis as meta information in each intermediate MeasurableVariable, + # we can lift this restriction (see https://github.com/pymc-devs/pymc/issues/6360) + if tuple(node.op.shuffle) != tuple(sorted(node.op.shuffle)) and not _elemwise_univariate_chain( + fgraph, node + ): + return None + base_var = node.inputs[0] - # We can only apply this rewrite directly to `RandomVariable`s, as those are - # the only `Op`s for which we always know the support axis. Other measurable - # variables can have arbitrary support axes (e.g., if they contain separate - # `MeasurableDimShuffle`s). Most measurable variables with `DimShuffle`s - # should still be supported as long as the `DimShuffle`s can be merged/ - # lifted towards the base RandomVariable. - # TODO: If we include the support axis as meta information in each - # intermediate MeasurableVariable, we can lift this restriction. - if not isinstance(base_var.owner.op, RandomVariable | SymbolicRandomVariable): - return None # pragma: no cover - - measurable_dimshuffle = MeasurableDimShuffle(node.op.input_broadcastable, node.op.new_order)( - base_var - ) + measurable_dimshuffle = MeasurableDimShuffle( + input_ndim=node.op.input_ndim, new_order=node.op.new_order + )(base_var) assert isinstance(measurable_dimshuffle, TensorVariable) return [measurable_dimshuffle] diff --git a/pymc/logprob/transform_value.py b/pymc/logprob/transform_value.py index f093ddbf2..1b5d4cd81 100644 --- a/pymc/logprob/transform_value.py +++ b/pymc/logprob/transform_value.py @@ -147,7 +147,7 @@ def transform_values(fgraph: FunctionGraph, node: Apply) -> list[Apply] | None: return None rv_node = node.inputs[0].owner - valued_nodes = get_related_valued_nodes(rv_node, fgraph) + valued_nodes = get_related_valued_nodes(fgraph, rv_node) rvs = [valued_var.inputs[0] for valued_var in valued_nodes] values = [valued_var.inputs[1] for valued_var in valued_nodes] transforms = [values_to_transforms.get(value, None) for value in values] diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py index 41233223b..930bf1f4e 100644 --- a/pymc/logprob/transforms.py +++ b/pymc/logprob/transforms.py @@ -42,7 +42,7 @@ from pytensor import scan from pytensor.gradient import jacobian -from pytensor.graph.basic import Node, Variable +from pytensor.graph.basic import Apply, Variable from pytensor.graph.fg import FunctionGraph from pytensor.graph.rewriting.basic import node_rewriter from pytensor.scalar import ( @@ -453,7 +453,7 @@ def measurable_power_exponent_to_exp(fgraph, node): erfcx, ] ) -def find_measurable_transforms(fgraph: FunctionGraph, node: Node) -> list[Node] | None: +def find_measurable_transforms(fgraph: FunctionGraph, node: Apply) -> list[Variable] | None: """Find measurable transformations from Elemwise operators.""" # Node was already converted if isinstance(node.op, MeasurableOp): diff --git a/pymc/logprob/utils.py b/pymc/logprob/utils.py index e96426fbe..9865226e4 100644 --- a/pymc/logprob/utils.py +++ b/pymc/logprob/utils.py @@ -320,7 +320,7 @@ def find_negated_var(var): return None -def get_related_valued_nodes(node: Apply, fgraph: FunctionGraph) -> list[Apply]: +def get_related_valued_nodes(fgraph: FunctionGraph, node: Apply) -> list[Apply]: """Get all ValuedVars related to the same RV node. Returns diff --git a/pymc/math.py b/pymc/math.py index 48ec0d7d2..2f7527e11 100644 --- a/pymc/math.py +++ b/pymc/math.py @@ -103,7 +103,10 @@ "arcsinh", "arctan", "arctanh", + "batched_diag", + "block_diagonal", "broadcast_to", + "cartesian", "ceil", "clip", "concatenate", @@ -119,16 +122,32 @@ "erfcinv", "erfinv", "exp", - "full", - "full_like", + "expand_packed_triangular", + "flat_outer", "flatten", + "flatten_list", "floor", + "full", + "full_like", "ge", "gt", + "invlogit", + "invprobit", + "kron_diag", + "kron_dot", + "kron_solve_lower", + "kron_solve_upper", + "kronecker", "le", "log", + "log1mexp", "log1pexp", + "log_softmax", "logaddexp", + "logbern", + "logdet", + "logdiffexp", + "logit", "logsumexp", "lt", "matmul", @@ -141,12 +160,15 @@ "ones", "ones_like", "or_", + "probit", "prod", "round", + "round", "sgn", "sigmoid", "sin", "sinh", + "softmax", "sqr", "sqrt", "stack", @@ -157,28 +179,6 @@ "where", "zeros", "zeros_like", - "kronecker", - "cartesian", - "kron_dot", - "kron_solve_lower", - "kron_solve_upper", - "kron_diag", - "flat_outer", - "logdiffexp", - "invlogit", - "softmax", - "log_softmax", - "logbern", - "logit", - "log1mexp", - "flatten_list", - "logdet", - "probit", - "invprobit", - "expand_packed_triangular", - "batched_diag", - "block_diagonal", - "round", ] diff --git a/pymc/model/core.py b/pymc/model/core.py index ad60a84df..99711e566 100644 --- a/pymc/model/core.py +++ b/pymc/model/core.py @@ -48,7 +48,7 @@ ShapeError, ShapeWarning, ) -from pymc.initial_point import make_initial_point_fn +from pymc.initial_point import PointType, make_initial_point_fn from pymc.logprob.basic import transformed_conditional_logp from pymc.logprob.transforms import Transform from pymc.logprob.utils import ParameterValueError, replace_rvs_by_values @@ -56,11 +56,12 @@ from pymc.pytensorf import ( PointFunc, SeedSequenceSeed, - compile_pymc, + compile, convert_observed_data, gradient, hessian, inputvars, + join_nonshared_inputs, rewrite_pregrad, ) from pymc.util import ( @@ -78,13 +79,13 @@ from pymc.vartypes import continuous_types, discrete_types, typefilter __all__ = [ - "Model", - "modelcontext", "Deterministic", - "Potential", - "set_data", + "Model", "Point", + "Potential", "compile_fn", + "modelcontext", + "set_data", ] @@ -172,6 +173,9 @@ def __init__( dtype=None, casting="no", compute_grads=True, + model=None, + initial_point: PointType | None = None, + ravel_inputs: bool | None = None, **kwargs, ): if extra_vars_and_values is None: @@ -219,9 +223,7 @@ def __init__( givens = [] self._extra_vars_shared = {} for var, value in extra_vars_and_values.items(): - shared = pytensor.shared( - value, var.name + "_shared__", shape=[1 if s == 1 else None for s in value.shape] - ) + shared = pytensor.shared(value, var.name + "_shared__", shape=value.shape) self._extra_vars_shared[var.name] = shared givens.append((var, shared)) @@ -231,13 +233,28 @@ def __init__( grads = pytensor.grad(cost, grad_vars, disconnected_inputs="ignore") for grad_wrt, var in zip(grads, grad_vars): grad_wrt.name = f"{var.name}_grad" - outputs = [cost, *grads] + grads = pt.join(0, *[pt.atleast_1d(grad.ravel()) for grad in grads]) + outputs = [cost, grads] else: outputs = [cost] - inputs = grad_vars + if ravel_inputs: + if initial_point is None: + initial_point = modelcontext(model).initial_point() + outputs, raveled_grad_vars = join_nonshared_inputs( + point=initial_point, inputs=grad_vars, outputs=outputs, make_inputs_shared=False + ) + inputs = [raveled_grad_vars] + else: + if ravel_inputs is None: + warnings.warn( + "ValueGradFunction will become a function of raveled inputs.\n" + "Specify `ravel_inputs` to suppress this warning. Note that setting `ravel_inputs=False` will be forbidden in a future release." + ) + inputs = grad_vars - self._pytensor_function = compile_pymc(inputs, outputs, givens=givens, **kwargs) + self._pytensor_function = compile(inputs, outputs, givens=givens, **kwargs) + self._raveled_inputs = ravel_inputs def set_weights(self, values): if values.shape != (self._n_costs - 1,): @@ -247,7 +264,7 @@ def set_weights(self, values): def set_extra_values(self, extra_vars): self._extra_are_set = True for var in self._extra_vars: - self._extra_vars_shared[var.name].set_value(extra_vars[var.name]) + self._extra_vars_shared[var.name].set_value(extra_vars[var.name], borrow=True) def get_extra_values(self): if not self._extra_are_set: @@ -255,30 +272,21 @@ def get_extra_values(self): return {var.name: self._extra_vars_shared[var.name].get_value() for var in self._extra_vars} - def __call__(self, grad_vars, grad_out=None, extra_vars=None): + def __call__(self, grad_vars, *, extra_vars=None): if extra_vars is not None: self.set_extra_values(extra_vars) - - if not self._extra_are_set: + elif not self._extra_are_set: raise ValueError("Extra values are not set.") if isinstance(grad_vars, RaveledVars): - grad_vars = list(DictToArrayBijection.rmap(grad_vars).values()) - - cost, *grads = self._pytensor_function(*grad_vars) - - if grads: - grads_raveled = DictToArrayBijection.map( - {v.name: gv for v, gv in zip(self._grad_vars, grads)} - ) - - if grad_out is None: - return cost, grads_raveled.data + if self._raveled_inputs: + grad_vars = (grad_vars.data,) else: - np.copyto(grad_out, grads_raveled.data) - return cost - else: - return cost + grad_vars = DictToArrayBijection.rmap(grad_vars).values() + elif self._raveled_inputs and not isinstance(grad_vars, Sequence): + grad_vars = (grad_vars,) + + return self._pytensor_function(*grad_vars) @property def profile(self): @@ -521,7 +529,14 @@ def root(self): def isroot(self): return self.parent is None - def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): + def logp_dlogp_function( + self, + grad_vars=None, + tempered=False, + initial_point: PointType | None = None, + ravel_inputs: bool | None = None, + **kwargs, + ): """Compile a PyTensor function that computes logp and gradient. Parameters @@ -547,13 +562,22 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): costs = [self.logp()] input_vars = {i for i in graph_inputs(costs) if not isinstance(i, Constant)} - ip = self.initial_point(0) + if initial_point is None: + initial_point = self.initial_point(0) extra_vars_and_values = { - var: ip[var.name] + var: initial_point[var.name] for var in self.value_vars if var in input_vars and var not in grad_vars } - return ValueGradFunction(costs, grad_vars, extra_vars_and_values, **kwargs) + return ValueGradFunction( + costs, + grad_vars, + extra_vars_and_values, + model=self, + initial_point=initial_point, + ravel_inputs=ravel_inputs, + **kwargs, + ) def compile_logp( self, @@ -1613,7 +1637,7 @@ def compile_fn( inputs = inputvars(outs) with self: - fn = compile_pymc( + fn = compile( inputs, outs, allow_input_downcast=True, diff --git a/pymc/model/fgraph.py b/pymc/model/fgraph.py index 78ad61306..fa5e34a7d 100644 --- a/pymc/model/fgraph.py +++ b/pymc/model/fgraph.py @@ -413,7 +413,7 @@ def extract_dims(var) -> tuple: __all__ = ( + "clone_model", "fgraph_from_model", "model_from_fgraph", - "clone_model", ) diff --git a/pymc/model/transform/optimization.py b/pymc/model/transform/optimization.py index bcf828ba3..187e4ee44 100644 --- a/pymc/model/transform/optimization.py +++ b/pymc/model/transform/optimization.py @@ -57,6 +57,28 @@ def freeze_dims_and_data( ------- Model A new model with the specified dimensions and data frozen. + + + Examples + -------- + .. code-block:: python + + import pymc as pm + import pytensor.tensor as pt + + from pymc.model.transform.optimization import freeze_dims_and_data + + with pm.Model() as m: + x = pm.Data("x", [0, 1, 2] * 1000) + y = pm.Normal("y", mu=pt.unique(x).mean()) + + # pt.unique(x).mean() has to be computed in every logp function evaluation + print("Logp eval time (1000x): ", m.profile(m.logp()).fct_call_time) + + # pt.uniqe(x).mean() is cached in the logp function + frozen_m = freeze_dims_and_data(m) + print("Logp eval time (1000x): ", frozen_m.profile(frozen_m.logp()).fct_call_time) + """ fg, memo = fgraph_from_model(model) diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py index 213831c9f..6fd44b038 100644 --- a/pymc/pytensorf.py +++ b/pymc/pytensorf.py @@ -45,6 +45,7 @@ from pytensor.tensor.random.op import RandomVariable from pytensor.tensor.random.type import RandomType from pytensor.tensor.random.var import RandomGeneratorSharedVariable +from pytensor.tensor.rewriting.basic import topo_unconditional_constant_folding from pytensor.tensor.rewriting.shape import ShapeFeature from pytensor.tensor.sharedvar import SharedVariable, TensorSharedVariable from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1 @@ -58,22 +59,23 @@ __all__ = [ + "CallableTensor", + "compile", + "compile_pymc", + "cont_inputs", + "convert_data", + "convert_generator_data", + "convert_observed_data", + "floatX", + "generator", "gradient", "hessian", "hessian_diag", "inputvars", - "cont_inputs", - "floatX", "intX", "jacobian", - "CallableTensor", "join_nonshared_inputs", "make_shared_replacements", - "generator", - "convert_data", - "convert_generator_data", - "convert_observed_data", - "compile_pymc", ] @@ -980,7 +982,7 @@ def find_default_update(clients, rng: Variable) -> None | Variable: return rng_updates -def compile_pymc( +def compile( inputs, outputs, random_seed: SeedSequenceSeed = None, @@ -989,7 +991,7 @@ def compile_pymc( ) -> Function: """Use ``pytensor.function`` with specialized pymc rewrites always enabled. - This function also ensures shared RandomState/Generator used by RandomVariables + This function also ensures shared Generator used by RandomVariables in the graph are updated across calls, to ensure independent draws. Parameters @@ -1023,7 +1025,12 @@ def compile_pymc( """ # Create an update mapping of RandomVariable's RNG so that it is automatically # updated after every function call - rng_updates = collect_default_updates(inputs=inputs, outputs=outputs) + rng_updates = collect_default_updates( + inputs=[inp.variable if isinstance(inp, pytensor.In) else inp for inp in inputs], + outputs=[ + out.variable if isinstance(out, pytensor.Out) else out for out in makeiter(outputs) + ], + ) # We always reseed random variables as this provides RNGs with no chances of collision if rng_updates: @@ -1055,9 +1062,17 @@ def compile_pymc( return pytensor_function +def compile_pymc(*args, **kwargs): + warnings.warn( + "compile_pymc was renamed to compile. Old name will be removed in a future release of PyMC", + FutureWarning, + ) + return compile(*args, **kwargs) + + def constant_fold( xs: Sequence[TensorVariable], raise_not_constant: bool = True -) -> tuple[np.ndarray, ...]: +) -> tuple[np.ndarray | Variable, ...]: """Use constant folding to get constant values of a graph. Parameters @@ -1072,8 +1087,12 @@ def constant_fold( """ fg = FunctionGraph(outputs=xs, features=[ShapeFeature()], copy_inputs=False, clone=True) - # By default, rewrite_graph includes canonicalize which includes constant-folding as the final rewrite - folded_xs = rewrite_graph(fg).outputs + # The default rewrite_graph includes a constand_folding that is not always applied. + # We use an unconditional constant_folding as the last pass to ensure a thorough constant folding. + rewrite_graph(fg) + topo_unconditional_constant_folding.apply(fg) + + folded_xs = fg.outputs if raise_not_constant and not all(isinstance(folded_x, Constant) for folded_x in folded_xs): raise NotConstantValueError @@ -1122,10 +1141,40 @@ def toposort_replace( fgraph: FunctionGraph, replacements: Sequence[tuple[Variable, Variable]], reverse: bool = False ) -> None: """Replace multiple variables in place in topological order.""" - toposort = fgraph.toposort() + fgraph_toposort = {node: i for i, node in enumerate(fgraph.toposort())} + _inner_fgraph_toposorts = {} # Cache inner toposorts + + def _nested_toposort_index(var, fgraph_toposort) -> tuple[int]: + """Compute position of variable in fgraph toposort. + + When a variable is an OpFromGraph output, extend output with the toposort index of the inner graph(s). + + This allows ordering variables that come from the same OpFromGraph. + """ + if not var.owner: + return (-1,) + + index = fgraph_toposort[var.owner] + + # Recurse into OpFromGraphs + # TODO: Could also recurse into Scans + if isinstance(var.owner.op, OpFromGraph): + inner_fgraph = var.owner.op.fgraph + + if inner_fgraph not in _inner_fgraph_toposorts: + _inner_fgraph_toposorts[inner_fgraph] = { + node: i for i, node in enumerate(inner_fgraph.toposort()) + } + + inner_fgraph_toposort = _inner_fgraph_toposorts[inner_fgraph] + inner_var = inner_fgraph.outputs[var.owner.outputs.index(var)] + return (index, *_nested_toposort_index(inner_var, inner_fgraph_toposort)) + else: + return (index,) + sorted_replacements = sorted( replacements, - key=lambda pair: toposort.index(pair[0].owner) if pair[0].owner else -1, + key=lambda pair: _nested_toposort_index(pair[0], fgraph_toposort), reverse=reverse, ) fgraph.replace_all(sorted_replacements, import_missing=True) diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py index db706f210..c07683555 100644 --- a/pymc/sampling/forward.py +++ b/pymc/sampling/forward.py @@ -51,7 +51,7 @@ from pymc.backends.base import MultiTrace from pymc.blocking import PointType from pymc.model import Model, modelcontext -from pymc.pytensorf import compile_pymc +from pymc.pytensorf import compile from pymc.util import ( CustomProgress, RandomState, @@ -64,8 +64,8 @@ __all__ = ( "compile_forward_sampling_function", "draw", - "sample_prior_predictive", "sample_posterior_predictive", + "sample_prior_predictive", ) ArrayLike: TypeAlias = np.ndarray | list[float] @@ -273,7 +273,7 @@ def expand(node): ] return ( - compile_pymc(inputs, fg.outputs, givens=givens, on_unused_input="ignore", **kwargs), + compile(inputs, fg.outputs, givens=givens, on_unused_input="ignore", **kwargs), set(basic_rvs) & (volatile_nodes - set(givens_dict)), # Basic RVs that will be resampled ) @@ -329,7 +329,7 @@ def draw( if random_seed is not None: (random_seed,) = _get_seeds_per_chain(random_seed, 1) - draw_fn = compile_pymc(inputs=[], outputs=vars, random_seed=random_seed, **kwargs) + draw_fn = compile(inputs=[], outputs=vars, random_seed=random_seed, **kwargs) if draws == 1: return draw_fn() diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py index 4ee79607b..bc3e3475d 100644 --- a/pymc/sampling/mcmc.py +++ b/pymc/sampling/mcmc.py @@ -80,8 +80,8 @@ sys.setrecursionlimit(10000) __all__ = [ - "sample", "init_nuts", + "sample", ] Step: TypeAlias = BlockedStep | CompoundStep @@ -101,7 +101,10 @@ def instantiate_steppers( model: Model, steps: list[Step], selected_steps: Mapping[type[BlockedStep], list[Any]], + *, step_kwargs: dict[str, dict] | None = None, + initial_point: PointType | None = None, + compile_kwargs: dict | None = None, ) -> Step | list[Step]: """Instantiate steppers assigned to the model variables. @@ -131,13 +134,23 @@ def instantiate_steppers( step_kwargs = {} used_keys = set() - for step_class, vars in selected_steps.items(): - if vars: - name = getattr(step_class, "name") - args = step_kwargs.get(name, {}) - used_keys.add(name) - step = step_class(vars=vars, model=model, **args) - steps.append(step) + if selected_steps: + if initial_point is None: + initial_point = model.initial_point() + + for step_class, vars in selected_steps.items(): + if vars: + name = getattr(step_class, "name") + kwargs = step_kwargs.get(name, {}) + used_keys.add(name) + step = step_class( + vars=vars, + model=model, + initial_point=initial_point, + compile_kwargs=compile_kwargs, + **kwargs, + ) + steps.append(step) unused_args = set(step_kwargs).difference(used_keys) if unused_args: @@ -161,18 +174,22 @@ def assign_step_methods( model: Model, step: Step | Sequence[Step] | None = None, methods: Sequence[type[BlockedStep]] | None = None, - step_kwargs: dict[str, Any] | None = None, -) -> Step | list[Step]: +) -> tuple[list[Step], dict[type[BlockedStep], list[Variable]]]: """Assign model variables to appropriate step methods. - Passing a specified model will auto-assign its constituent stochastic - variables to step methods based on the characteristics of the variables. + Passing a specified model will auto-assign its constituent value + variables to step methods based on the characteristics of the respective + random variables, and whether the logp can be differentiated with respect to it. + This function is intended to be called automatically from ``sample()``, but may be called manually. Each step method passed should have a ``competence()`` method that returns an ordinal competence value corresponding to the variable passed to it. This value quantifies the appropriateness of the step method for sampling the variable. + The outputs of this function can then be passed to `instantiate_steppers()` + to initialize the assigned step samplers. + Parameters ---------- model : Model object @@ -183,24 +200,32 @@ def assign_step_methods( methods : iterable of step method classes, optional The set of step methods from which the function may choose. Defaults to the main step methods provided by PyMC. - step_kwargs : dict, optional - Parameters for the samplers. Keys are the lower case names of - the step method, values a dict of arguments. Returns ------- - methods : list - List of step methods associated with the model's variables. + provided_steps: list of Step instances + List of user provided instantiated step(s) + assigned_steps: dict of Step class to Variable + Dictionary with automatically selected step classes as keys and associated value variables as values """ - steps: list[Step] = [] + provided_steps: list[Step] = [] assigned_vars: set[Variable] = set() if step is not None: if isinstance(step, BlockedStep | CompoundStep): - steps.append(step) + provided_steps = [step] + elif isinstance(step, Sequence): + provided_steps = list(step) else: - steps.extend(step) - for step in steps: + raise ValueError(f"Step should be a Step or a sequence of Steps, got {step}") + + for step in provided_steps: + if not isinstance(step, BlockedStep | CompoundStep): + if issubclass(step, BlockedStep | CompoundStep): + raise ValueError(f"Provided {step} was not initialized") + else: + raise ValueError(f"{step} is not a Step instance") + for var in step.vars: if var not in model.value_vars: raise ValueError( @@ -235,7 +260,7 @@ def assign_step_methods( ) selected_steps.setdefault(selected, []).append(var) - return instantiate_steppers(model, steps, selected_steps, step_kwargs) + return provided_steps, selected_steps def _print_step_hierarchy(s: Step, level: int = 0) -> None: @@ -411,6 +436,7 @@ def sample( callback=None, mp_ctx=None, blas_cores: int | None | Literal["auto"] = "auto", + compile_kwargs: dict | None = None, **kwargs, ) -> InferenceData: ... @@ -443,6 +469,7 @@ def sample( mp_ctx=None, model: Model | None = None, blas_cores: int | None | Literal["auto"] = "auto", + compile_kwargs: dict | None = None, **kwargs, ) -> MultiTrace: ... @@ -474,6 +501,7 @@ def sample( mp_ctx=None, blas_cores: int | None | Literal["auto"] = "auto", model: Model | None = None, + compile_kwargs: dict | None = None, **kwargs, ) -> InferenceData | MultiTrace: r"""Draw samples from the posterior using the given step methods. @@ -500,10 +528,8 @@ def sample( random_seed : int, array-like of int, or Generator, optional Random seed(s) used by the sampling steps. Each step will create its own :py:class:`~numpy.random.Generator` object to make its random draws in a way that is - indepedent from all other steppers and all other chains. If a list, tuple or array of ints - is passed, each entry will be used to seed the creation of ``Generator`` objects. - A ``ValueError`` will be raised if the length does not match the number of chains. - A ``TypeError`` will be raised if a :py:class:`~numpy.random.RandomState` object is passed. + indepedent from all other steppers and all other chains. + A ``TypeError`` will be raised if a legacy :py:class:`~numpy.random.RandomState` object is passed. We no longer support ``RandomState`` objects because their seeding mechanism does not allow easy spawning of new independent random streams that are needed by the step methods. progressbar : bool, optional default=True @@ -575,6 +601,9 @@ def sample( See multiprocessing documentation for details. model : Model (optional if in ``with`` context) Model to sample from. The model needs to have free random variables. + compile_kwargs: dict, optional + Dictionary with keyword argument to pass to the functions compiled by the step methods. + Returns ------- @@ -698,7 +727,17 @@ def joined_blas_limiter(): ) if random_seed == -1: + warnings.warn( + "Setting random_seed = -1 is deprecated. Pass `None` to not specify a seed.", + FutureWarning, + ) random_seed = None + elif isinstance(random_seed, tuple | list): + warnings.warn( + "A list or tuple of random_seed no longer specifies the specific random_seed of each chain. " + "Use a single seed instead.", + UserWarning, + ) rngs = get_random_generator(random_seed).spawn(chains) random_seed_list = [rng.integers(2**30) for rng in rngs] @@ -719,22 +758,23 @@ def joined_blas_limiter(): msg = f"Only {draws} samples per chain. Reliable r-hat and ESS diagnostics require longer chains for accurate estimate." _log.warning(msg) - auto_nuts_init = True - if step is not None: - if isinstance(step, CompoundStep): - for method in step.methods: - if isinstance(method, NUTS): - auto_nuts_init = False - elif isinstance(step, NUTS): - auto_nuts_init = False - - initial_points = None - step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs) + provided_steps, selected_steps = assign_step_methods(model, step, methods=pm.STEP_METHODS) + exclusive_nuts = ( + # User provided an instantiated NUTS step, and nothing else is needed + (not selected_steps and len(provided_steps) == 1 and isinstance(provided_steps[0], NUTS)) + or + # Only automatically selected NUTS step is needed + ( + not provided_steps + and len(selected_steps) == 1 + and issubclass(next(iter(selected_steps)), NUTS) + ) + ) if nuts_sampler != "pymc": - if not isinstance(step, NUTS): + if not exclusive_nuts: raise ValueError( - "Model can not be sampled with NUTS alone. Your model is probably not continuous." + "Model can not be sampled with NUTS alone. It either has discrete variables or a non-differentiable log-probability." ) with joined_blas_limiter(): @@ -755,13 +795,11 @@ def joined_blas_limiter(): **kwargs, ) - if isinstance(step, list): - step = CompoundStep(step) - elif isinstance(step, NUTS) and auto_nuts_init: + if exclusive_nuts and not provided_steps: + # Special path for NUTS initialization if "nuts" in kwargs: nuts_kwargs = kwargs.pop("nuts") [kwargs.setdefault(k, v) for k, v in nuts_kwargs.items()] - _log.info("Auto-assigning NUTS sampler...") with joined_blas_limiter(): initial_points, step = init_nuts( init=init, @@ -773,11 +811,11 @@ def joined_blas_limiter(): jitter_max_retries=jitter_max_retries, tune=tune, initvals=initvals, + compile_kwargs=compile_kwargs, **kwargs, ) - - if initial_points is None: - # Time to draw/evaluate numeric start points for each chain. + else: + # Get initial points ipfns = make_initial_point_fns_per_chain( model=model, overrides=initvals, @@ -786,11 +824,17 @@ def joined_blas_limiter(): ) initial_points = [ipfn(seed) for ipfn, seed in zip(ipfns, random_seed_list)] - # One final check that shapes and logps at the starting points are okay. - ip: dict[str, np.ndarray] - for ip in initial_points: - model.check_start_vals(ip) - _check_start_shape(model, ip) + # Instantiate automatically selected steps + step = instantiate_steppers( + model, + steps=provided_steps, + selected_steps=selected_steps, + step_kwargs=kwargs, + initial_point=initial_points[0], + compile_kwargs=compile_kwargs, + ) + if isinstance(step, list): + step = CompoundStep(step) if var_names is not None: trace_vars = [v for v in model.unobserved_RVs if v.name in var_names] @@ -806,7 +850,7 @@ def joined_blas_limiter(): expected_length=draws + tune, step=step, trace_vars=trace_vars, - initial_point=ip, + initial_point=initial_points[0], model=model, ) @@ -954,7 +998,6 @@ def _sample_return( f"took {t_sampling:.0f} seconds." ) - idata = None if compute_convergence_checks or return_inferencedata: ikwargs: dict[str, Any] = {"model": model, "save_warmup": not discard_tuned_samples} ikwargs.update(idata_kwargs) @@ -1159,7 +1202,6 @@ def _iter_sample( diverging : bool Indicates if the draw is divergent. Only available with some samplers. """ - model = modelcontext(model) draws = int(draws) if draws < 1: @@ -1174,8 +1216,6 @@ def _iter_sample( if hasattr(step, "reset_tuning"): step.reset_tuning() for i in range(draws): - diverging = False - if i == 0 and hasattr(step, "iter_count"): step.iter_count = 0 if i == tune: @@ -1277,14 +1317,11 @@ def _mp_sample( strace = traces[draw.chain] strace.record(draw.point, draw.stats) log_warning_stats(draw.stats) - if draw.is_last: - strace.close() if callback is not None: callback(trace=strace, draw=draw) except ps.ParallelSamplingError as error: - strace = traces[error._chain] for strace in traces: strace.close() raise @@ -1301,6 +1338,7 @@ def _init_jitter( seeds: Sequence[int] | np.ndarray, jitter: bool, jitter_max_retries: int, + logp_dlogp_func=None, ) -> list[PointType]: """Apply a uniform jitter in [-1, 1] to the test value as starting point in each chain. @@ -1331,19 +1369,30 @@ def _init_jitter( if not jitter: return [ipfn(seed) for ipfn, seed in zip(ipfns, seeds)] + model_logp_fn: Callable + if logp_dlogp_func is None: + model_logp_fn = model.compile_logp() + else: + + def model_logp_fn(ip): + q, _ = DictToArrayBijection.map(ip) + return logp_dlogp_func([q], extra_vars={})[0] + initial_points = [] for ipfn, seed in zip(ipfns, seeds): - rng = np.random.RandomState(seed) + rng = np.random.default_rng(seed) for i in range(jitter_max_retries + 1): point = ipfn(seed) - if i < jitter_max_retries: - try: + point_logp = model_logp_fn(point) + if not np.isfinite(point_logp): + if i == jitter_max_retries: + # Print informative message on last attempted point model.check_start_vals(point) - except SamplingError: - # Retry with a new seed - seed = rng.randint(2**30, dtype=np.int64) - else: - break + # Retry with a new seed + seed = rng.integers(2**30, dtype=np.int64) + else: + break + initial_points.append(point) return initial_points @@ -1359,6 +1408,7 @@ def init_nuts( jitter_max_retries: int = 10, tune: int | None = None, initvals: StartDict | Sequence[StartDict | None] | None = None, + compile_kwargs: dict | None = None, **kwargs, ) -> tuple[Sequence[PointType], NUTS]: """Set up the mass matrix initialization for NUTS. @@ -1435,21 +1485,29 @@ def init_nuts( if init == "auto": init = "jitter+adapt_diag" + if compile_kwargs is None: + compile_kwargs = {} + random_seed_list = _get_seeds_per_chain(random_seed, chains) _log.info(f"Initializing NUTS using {init}...") - cb = [ - pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="absolute"), - pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="relative"), - ] + cb = [] + if "advi" in init: + cb = [ + pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="absolute"), + pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="relative"), + ] + logp_dlogp_func = model.logp_dlogp_function(ravel_inputs=True, **compile_kwargs) + logp_dlogp_func.trust_input = True initial_points = _init_jitter( model, initvals, seeds=random_seed_list, jitter="jitter" in init, jitter_max_retries=jitter_max_retries, + logp_dlogp_func=logp_dlogp_func, ) apoints = [DictToArrayBijection.map(point) for point in initial_points] @@ -1563,7 +1621,14 @@ def init_nuts( else: raise ValueError(f"Unknown initializer: {init}.") - step = pm.NUTS(potential=potential, model=model, rng=random_seed_list[0], **kwargs) + step = pm.NUTS( + potential=potential, + model=model, + rng=random_seed_list[0], + initial_point=initial_points[0], + logp_dlogp_func=logp_dlogp_func, + **kwargs, + ) # Filter deterministics from initial_points value_var_names = [var.name for var in model.value_vars] diff --git a/pymc/sampling/parallel.py b/pymc/sampling/parallel.py index a94863738..4edc80433 100644 --- a/pymc/sampling/parallel.py +++ b/pymc/sampling/parallel.py @@ -228,15 +228,12 @@ def __init__( self._shared_point = {} self._point = {} - for name, shape, dtype in DictToArrayBijection.map(start).point_map_info: - size = 1 - for dim in shape: - size *= int(dim) - size *= dtype.itemsize - if size != ctypes.c_size_t(size).value: + for name, shape, size, dtype in DictToArrayBijection.map(start).point_map_info: + byte_size = size * dtype.itemsize + if byte_size != ctypes.c_size_t(byte_size).value: raise ValueError(f"Variable {name} is too large") - array = mp_ctx.RawArray("c", size) + array = mp_ctx.RawArray("c", byte_size) self._shared_point[name] = (array, shape, dtype) array_np = np.frombuffer(array, dtype).reshape(shape) array_np[...] = start[name] diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py index 608454ef3..db1b0cf5b 100644 --- a/pymc/smc/kernels.py +++ b/pymc/smc/kernels.py @@ -30,7 +30,7 @@ from pymc.initial_point import make_initial_point_expression from pymc.model import Point, modelcontext from pymc.pytensorf import ( - compile_pymc, + compile, floatX, join_nonshared_inputs, make_shared_replacements, @@ -636,6 +636,6 @@ def _logp_forw(point, out_vars, in_vars, shared): out_list, inarray0 = join_nonshared_inputs( point=point, outputs=out_vars, inputs=in_vars, shared_inputs=shared ) - f = compile_pymc([inarray0], out_list[0]) + f = compile([inarray0], out_list[0]) f.trust_input = True return f diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index a81d34d55..b1ae52e03 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -35,6 +35,9 @@ from pymc.backends.arviz import dict_to_dataset, to_inference_data from pymc.backends.base import MultiTrace +from pymc.distributions.custom import CustomDistRV, CustomSymbolicDistRV +from pymc.distributions.distribution import _support_point +from pymc.logprob.abstract import _icdf, _logcdf, _logprob from pymc.model import Model, modelcontext from pymc.sampling.parallel import _cpu_count from pymc.smc.kernels import IMH @@ -346,11 +349,18 @@ def run_chains(chains, progressbar, params, random_seed, kernel_kwargs, cores): # main process and our worker functions _progress = manager.dict() + # check if model contains CustomDistributions defined without dist argument + custom_methods = _find_custom_dist_dispatch_methods(params[3]) + # "manually" (de)serialize params before/after multiprocessing params = tuple(cloudpickle.dumps(p) for p in params) kernel_kwargs = {key: cloudpickle.dumps(value) for key, value in kernel_kwargs.items()} - with ProcessPoolExecutor(max_workers=cores) as executor: + with ProcessPoolExecutor( + max_workers=cores, + initializer=_register_custom_methods, + initargs=(custom_methods,), + ) as executor: for c in range(chains): # iterate over the jobs we need to run # set visible false so we don't have a lot of bars all at once: task_id = progress.add_task(f"Chain {c}", status="Stage: 0 Beta: 0") @@ -383,3 +393,32 @@ def run_chains(chains, progressbar, params, random_seed, kernel_kwargs, cores): ) return tuple(cloudpickle.loads(r.result()) for r in done) + + +def _find_custom_dist_dispatch_methods(model): + custom_methods = {} + for rv in model.basic_RVs: + rv_type = rv.owner.op + cls = type(rv_type) + if isinstance(rv_type, CustomDistRV | CustomSymbolicDistRV): + custom_methods[cloudpickle.dumps(cls)] = ( + cloudpickle.dumps(_logprob.registry.get(cls, None)), + cloudpickle.dumps(_logcdf.registry.get(cls, None)), + cloudpickle.dumps(_icdf.registry.get(cls, None)), + cloudpickle.dumps(_support_point.registry.get(cls, None)), + ) + + return custom_methods + + +def _register_custom_methods(custom_methods): + for cls, (logprob, logcdf, icdf, support_point) in custom_methods.items(): + cls = cloudpickle.loads(cls) + if logprob is not None: + _logprob.register(cls, cloudpickle.loads(logprob)) + if logcdf is not None: + _logcdf.register(cls, cloudpickle.loads(logcdf)) + if icdf is not None: + _icdf.register(cls, cloudpickle.loads(icdf)) + if support_point is not None: + _support_point.register(cls, cloudpickle.loads(support_point)) diff --git a/pymc/step_methods/arraystep.py b/pymc/step_methods/arraystep.py index b7da80aee..d15b14499 100644 --- a/pymc/step_methods/arraystep.py +++ b/pymc/step_methods/arraystep.py @@ -99,26 +99,27 @@ def __init__(self, vars, shared, blocked=True, rng: RandomGenerator = None): :py:func:`pymc.util.get_random_generator` for more information. """ self.vars = vars + self.var_names = tuple(cast(str, var.name) for var in vars) self.shared = {get_var_name(var): shared for var, shared in shared.items()} self.blocked = blocked self.rng = get_random_generator(rng) def step(self, point: PointType) -> tuple[PointType, StatsType]: - for name, shared_var in self.shared.items(): - shared_var.set_value(point[name]) - - var_dict = {cast(str, v.name): point[cast(str, v.name)] for v in self.vars} - q = DictToArrayBijection.map(var_dict) - + full_point = None + if self.shared: + for name, shared_var in self.shared.items(): + shared_var.set_value(point[name], borrow=True) + full_point = point + point = {name: point[name] for name in self.var_names} + + q = DictToArrayBijection.map(point) apoint, stats = self.astep(q) if not isinstance(apoint, RaveledVars): # We assume that the mapping has stayed the same apoint = RaveledVars(apoint, q.point_map_info) - new_point = DictToArrayBijection.rmap(apoint, start_point=point) - - return new_point, stats + return DictToArrayBijection.rmap(apoint, start_point=full_point), stats @abstractmethod def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]: @@ -174,27 +175,34 @@ class GradientSharedStep(ArrayStepShared): def __init__( self, vars, + *, model=None, - blocked=True, + blocked: bool = True, dtype=None, logp_dlogp_func=None, rng: RandomGenerator = None, + initial_point: PointType | None = None, + compile_kwargs: dict | None = None, **pytensor_kwargs, ): model = modelcontext(model) if logp_dlogp_func is None: - func = model.logp_dlogp_function(vars, dtype=dtype, **pytensor_kwargs) - else: - func = logp_dlogp_func - - self._logp_dlogp_func = func + if compile_kwargs is None: + compile_kwargs = {} + logp_dlogp_func = model.logp_dlogp_function( + vars, + dtype=dtype, + ravel_inputs=True, + initial_point=initial_point, + **compile_kwargs, + **pytensor_kwargs, + ) + logp_dlogp_func.trust_input = True - super().__init__(vars, func._extra_vars_shared, blocked, rng=rng) + self._logp_dlogp_func = logp_dlogp_func - def step(self, point) -> tuple[PointType, StatsType]: - self._logp_dlogp_func._extra_are_set = True - return super().step(point) + super().__init__(vars, logp_dlogp_func._extra_vars_shared, blocked, rng=rng) def metrop_select( diff --git a/pymc/step_methods/hmc/base_hmc.py b/pymc/step_methods/hmc/base_hmc.py index 87daff649..564daebed 100644 --- a/pymc/step_methods/hmc/base_hmc.py +++ b/pymc/step_methods/hmc/base_hmc.py @@ -22,7 +22,7 @@ import numpy as np -from pymc.blocking import DictToArrayBijection, RaveledVars, StatsType +from pymc.blocking import DictToArrayBijection, PointType, RaveledVars, StatsType from pymc.exceptions import SamplingError from pymc.model import Point, modelcontext from pymc.pytensorf import floatX @@ -82,11 +82,12 @@ class BaseHMC(GradientSharedStep): def __init__( self, vars=None, + *, scaling=None, step_scale=0.25, is_cov=False, model=None, - blocked=True, + blocked: bool = True, potential=None, dtype=None, Emax=1000, @@ -97,6 +98,7 @@ def __init__( adapt_step_size=True, step_rand=None, rng=None, + initial_point: PointType | None = None, **pytensor_kwargs, ): """Set up Hamiltonian samplers with common structures. @@ -137,20 +139,23 @@ def __init__( else: vars = get_value_vars_from_user_vars(vars, self._model) super().__init__( - vars, blocked=blocked, model=self._model, dtype=dtype, rng=rng, **pytensor_kwargs + vars, + blocked=blocked, + model=self._model, + dtype=dtype, + rng=rng, + initial_point=initial_point, + **pytensor_kwargs, ) self.adapt_step_size = adapt_step_size self.Emax = Emax self.iter_count = 0 - # We're using the initial/test point to determine the (initial) step - # size. - # XXX: If the dimensions of these terms change, the step size - # dimension-scaling should change as well, no? - test_point = self._model.initial_point() + if initial_point is None: + initial_point = self._model.initial_point() - nuts_vars = [test_point[v.name] for v in vars] + nuts_vars = [initial_point[v.name] for v in vars] size = sum(v.size for v in nuts_vars) self.step_size = step_scale / (size**0.25) @@ -193,8 +198,6 @@ def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]: process_start = time.process_time() p0 = self.potential.random() - p0 = RaveledVars(p0, q0.point_map_info) - start = self.integrator.compute_state(q0, p0) warning: SamplerWarning | None = None @@ -208,7 +211,8 @@ def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]: self.potential.raise_ok(q0.point_map_info) message_energy = ( "Bad initial energy, check any log probabilities that " - f"are inf or -inf, nan or very small:\n{error_logp}" + f"are inf or -inf, nan or very small:\n{error_logp}\n." + f"Try model.debug() to identify parametrization problems." ) warning = SamplerWarning( WarningType.BAD_ENERGY, @@ -225,13 +229,13 @@ def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]: if self._step_rand is not None: step_size = self._step_rand(step_size, rng=self.rng) - hmc_step = self._hamiltonian_step(start, p0.data, step_size) + hmc_step = self._hamiltonian_step(start, p0, step_size) perf_end = time.perf_counter() process_end = time.process_time() self.step_adapt.update(hmc_step.accept_stat, adapt_step) - self.potential.update(hmc_step.end.q, hmc_step.end.q_grad, self.tune) + self.potential.update(hmc_step.end.q.data, hmc_step.end.q_grad, self.tune) if hmc_step.divergence_info: info = hmc_step.divergence_info point = None diff --git a/pymc/step_methods/hmc/integration.py b/pymc/step_methods/hmc/integration.py index 2d1e725cd..067cd239f 100644 --- a/pymc/step_methods/hmc/integration.py +++ b/pymc/step_methods/hmc/integration.py @@ -18,13 +18,13 @@ from scipy import linalg -from pymc.blocking import RaveledVars +from pymc.blocking import DictToArrayBijection, RaveledVars from pymc.step_methods.hmc.quadpotential import QuadPotential class State(NamedTuple): q: RaveledVars - p: RaveledVars + p: np.ndarray v: np.ndarray q_grad: np.ndarray energy: float @@ -40,23 +40,35 @@ class CpuLeapfrogIntegrator: def __init__(self, potential: QuadPotential, logp_dlogp_func): """Leapfrog integrator using CPU.""" self._potential = potential - self._logp_dlogp_func = logp_dlogp_func - self._dtype = self._logp_dlogp_func.dtype + # Sidestep logp_dlogp_function.__call__ + pytensor_function = logp_dlogp_func._pytensor_function + # Create some wrappers for backwards compatibility during transition + # When raveled_inputs=False is forbidden, func = pytensor_function + if logp_dlogp_func._raveled_inputs: + + def func(q, _): + return pytensor_function(q) + + else: + + def func(q, point_map_info): + unraveled_q = DictToArrayBijection.rmap(RaveledVars(q, point_map_info)).values() + return pytensor_function(*unraveled_q) + + self._logp_dlogp_func = func + self._dtype = logp_dlogp_func.dtype if self._potential.dtype != self._dtype: raise ValueError( f"dtypes of potential ({self._potential.dtype}) and logp function ({self._dtype})" "don't match." ) - def compute_state(self, q: RaveledVars, p: RaveledVars): + def compute_state(self, q: RaveledVars, p: np.ndarray): """Compute Hamiltonian functions using a position and momentum.""" - if q.data.dtype != self._dtype or p.data.dtype != self._dtype: - raise ValueError(f"Invalid dtype. Must be {self._dtype}") - - logp, dlogp = self._logp_dlogp_func(q) + logp, dlogp = self._logp_dlogp_func(q.data, q.point_map_info) - v = self._potential.velocity(p.data, out=None) - kinetic = self._potential.energy(p.data, velocity=v) + v = self._potential.velocity(p, out=None) + kinetic = self._potential.energy(p, velocity=v) energy = kinetic - logp return State(q, p, v, dlogp, energy, logp, 0) @@ -96,10 +108,10 @@ def _step(self, epsilon, state): axpy = linalg.blas.get_blas_funcs("axpy", dtype=self._dtype) pot = self._potential - q_new = state.q.data.copy() - p_new = state.p.data.copy() + q = state.q + q_new = q.data.copy() + p_new = state.p.copy() v_new = np.empty_like(q_new) - q_new_grad = np.empty_like(q_new) dt = 0.5 * epsilon @@ -112,19 +124,16 @@ def _step(self, epsilon, state): # q_new = q + epsilon * v_new axpy(v_new, q_new, a=epsilon) - p_new = RaveledVars(p_new, state.p.point_map_info) - q_new = RaveledVars(q_new, state.q.point_map_info) - - logp = self._logp_dlogp_func(q_new, grad_out=q_new_grad) + logp, q_new_grad = self._logp_dlogp_func(q_new, q.point_map_info) # p_new = p_new + dt * q_new_grad - axpy(q_new_grad, p_new.data, a=dt) + axpy(q_new_grad, p_new, a=dt) - kinetic = pot.velocity_energy(p_new.data, v_new) + kinetic = pot.velocity_energy(p_new, v_new) energy = kinetic - logp return State( - q_new, + RaveledVars(q_new, state.q.point_map_info), p_new, v_new, q_new_grad, diff --git a/pymc/step_methods/hmc/nuts.py b/pymc/step_methods/hmc/nuts.py index fb816954b..770605f4b 100644 --- a/pymc/step_methods/hmc/nuts.py +++ b/pymc/step_methods/hmc/nuts.py @@ -19,8 +19,8 @@ import numpy as np -from pymc.math import logbern -from pymc.pytensorf import floatX +from pytensor import config + from pymc.stats.convergence import SamplerWarning from pymc.step_methods.compound import Competence from pymc.step_methods.hmc import integration @@ -205,11 +205,12 @@ def _hamiltonian_step(self, start, p0, step_size): else: max_treedepth = self.max_treedepth - tree = _Tree(len(p0), self.integrator, start, step_size, self.Emax, rng=self.rng) + rng = self.rng + tree = _Tree(len(p0), self.integrator, start, step_size, self.Emax, rng=rng) reached_max_treedepth = False for _ in range(max_treedepth): - direction = logbern(np.log(0.5), rng=self.rng) * 2 - 1 + direction = (rng.random() < 0.5) * 2 - 1 divergence_info, turning = tree.extend(direction) if divergence_info or turning: @@ -218,9 +219,8 @@ def _hamiltonian_step(self, start, p0, step_size): reached_max_treedepth = not self.tune stats = tree.stats() - accept_stat = stats["mean_tree_accept"] stats["reached_max_treedepth"] = reached_max_treedepth - return HMCStepData(tree.proposal, accept_stat, divergence_info, stats) + return HMCStepData(tree.proposal, stats["mean_tree_accept"], divergence_info, stats) @staticmethod def competence(var, has_grad): @@ -241,6 +241,27 @@ def competence(var, has_grad): class _Tree: + __slots__ = ( + "Emax", + "depth", + "floatX", + "integrator", + "left", + "log_accept_sum", + "log_size", + "max_energy_change", + "mean_tree_accept", + "n_proposals", + "ndim", + "p_sum", + "proposal", + "right", + "rng", + "start", + "start_energy", + "step_size", + ) + def __init__( self, ndim: int, @@ -273,14 +294,15 @@ def __init__( self.rng = rng self.left = self.right = start - self.proposal = Proposal(start.q.data, start.q_grad, start.energy, start.model_logp, 0) + self.proposal = Proposal(start.q, start.q_grad, start.energy, start.model_logp, 0) self.depth = 0 self.log_size = 0.0 self.log_accept_sum = -np.inf self.mean_tree_accept = 0.0 self.n_proposals = 0 - self.p_sum = start.p.data.copy() + self.p_sum = start.p.copy() self.max_energy_change = 0.0 + self.floatX = config.floatX def extend(self, direction): """Double the treesize by extending the tree in the given direction. @@ -296,7 +318,7 @@ def extend(self, direction): """ if direction > 0: tree, diverging, turning = self._build_subtree( - self.right, self.depth, floatX(np.asarray(self.step_size)) + self.right, self.depth, np.asarray(self.step_size, dtype=self.floatX) ) leftmost_begin, leftmost_end = self.left, self.right rightmost_begin, rightmost_end = tree.left, tree.right @@ -305,7 +327,7 @@ def extend(self, direction): self.right = tree.right else: tree, diverging, turning = self._build_subtree( - self.left, self.depth, floatX(np.asarray(-self.step_size)) + self.left, self.depth, np.asarray(-self.step_size, dtype=self.floatX) ) leftmost_begin, leftmost_end = tree.right, tree.left rightmost_begin, rightmost_end = self.left, self.right @@ -318,23 +340,27 @@ def extend(self, direction): if diverging or turning: return diverging, turning - size1, size2 = self.log_size, tree.log_size - if logbern(size2 - size1, rng=self.rng): + self_log_size, tree_log_size = self.log_size, tree.log_size + if np.log(self.rng.random()) < (tree_log_size - self_log_size): self.proposal = tree.proposal - self.log_size = np.logaddexp(self.log_size, tree.log_size) - self.p_sum[:] += tree.p_sum + self.log_size = np.logaddexp(tree_log_size, self_log_size) + + p_sum = self.p_sum + p_sum[:] += tree.p_sum # Additional turning check only when tree depth > 0 to avoid redundant work if self.depth > 0: left, right = self.left, self.right - p_sum = self.p_sum turning = (p_sum.dot(left.v) <= 0) or (p_sum.dot(right.v) <= 0) - p_sum1 = leftmost_p_sum + rightmost_begin.p.data - turning1 = (p_sum1.dot(leftmost_begin.v) <= 0) or (p_sum1.dot(rightmost_begin.v) <= 0) - p_sum2 = leftmost_end.p.data + rightmost_p_sum - turning2 = (p_sum2.dot(leftmost_end.v) <= 0) or (p_sum2.dot(rightmost_end.v) <= 0) - turning = turning | turning1 | turning2 + if not turning: + p_sum1 = leftmost_p_sum + rightmost_begin.p + turning = (p_sum1.dot(leftmost_begin.v) <= 0) or ( + p_sum1.dot(rightmost_begin.v) <= 0 + ) + if not turning: + p_sum2 = leftmost_end.p + rightmost_p_sum + turning = (p_sum2.dot(leftmost_end.v) <= 0) or (p_sum2.dot(rightmost_end.v) <= 0) return diverging, turning @@ -356,7 +382,10 @@ def _single_step(self, left: State, epsilon: float): if np.isnan(energy_change): energy_change = np.inf - self.log_accept_sum = np.logaddexp(self.log_accept_sum, min(0, -energy_change)) + self.log_accept_sum = np.logaddexp( + self.log_accept_sum, (-energy_change if energy_change > 0 else 0) + ) + # self.log_accept_sum = np.logaddexp(self.log_accept_sum, min(0, -energy_change)) if np.abs(energy_change) > np.abs(self.max_energy_change): self.max_energy_change = energy_change @@ -366,13 +395,13 @@ def _single_step(self, left: State, epsilon: float): # Saturated Metropolis accept probability with Boltzmann weight log_size = -energy_change proposal = Proposal( - right.q.data, + right.q, right.q_grad, right.energy, right.model_logp, right.index_in_trajectory, ) - tree = Subtree(right, right, right.p.data, proposal, log_size) + tree = Subtree(right, right, right.p, proposal, log_size) return tree, None, False else: error_msg = f"Energy change in leapfrog step is too large: {energy_change}." @@ -399,15 +428,15 @@ def _build_subtree(self, left, depth, epsilon): p_sum = tree1.p_sum + tree2.p_sum turning = (p_sum.dot(left.v) <= 0) or (p_sum.dot(right.v) <= 0) # Additional U turn check only when depth > 1 to avoid redundant work. - if depth - 1 > 0: - p_sum1 = tree1.p_sum + tree2.left.p.data - turning1 = (p_sum1.dot(tree1.left.v) <= 0) or (p_sum1.dot(tree2.left.v) <= 0) - p_sum2 = tree1.right.p.data + tree2.p_sum - turning2 = (p_sum2.dot(tree1.right.v) <= 0) or (p_sum2.dot(tree2.right.v) <= 0) - turning = turning | turning1 | turning2 + if (not turning) and (depth - 1 > 0): + p_sum1 = tree1.p_sum + tree2.left.p + turning = (p_sum1.dot(tree1.left.v) <= 0) or (p_sum1.dot(tree2.left.v) <= 0) + if not turning: + p_sum2 = tree1.right.p + tree2.p_sum + turning = (p_sum2.dot(tree1.right.v) <= 0) or (p_sum2.dot(tree2.right.v) <= 0) log_size = np.logaddexp(tree1.log_size, tree2.log_size) - if logbern(tree2.log_size - log_size, rng=self.rng): + if np.log(self.rng.random()) < (tree2.log_size - log_size): proposal = tree2.proposal else: proposal = tree1.proposal diff --git a/pymc/step_methods/hmc/quadpotential.py b/pymc/step_methods/hmc/quadpotential.py index 53185bbb8..33f3571ed 100644 --- a/pymc/step_methods/hmc/quadpotential.py +++ b/pymc/step_methods/hmc/quadpotential.py @@ -30,13 +30,13 @@ from pymc.util import RandomGenerator, get_random_generator __all__ = [ - "quad_potential", "QuadPotentialDiag", - "QuadPotentialFull", - "QuadPotentialFullInv", "QuadPotentialDiagAdapt", + "QuadPotentialFull", "QuadPotentialFullAdapt", + "QuadPotentialFullInv", "isquadpotential", + "quad_potential", ] @@ -363,11 +363,11 @@ def raise_ok(self, map_info): if np.any(self._stds == 0): errmsg = ["Mass matrix contains zeros on the diagonal. "] last_idx = 0 - for name, shape, dtype in map_info: - arr_len = np.prod(shape, dtype=int) - index = np.where(self._stds[last_idx : last_idx + arr_len] == 0)[0] + for name, shape, size, dtype in map_info: + end = last_idx + size + index = np.where(self._stds[last_idx:end] == 0)[0] errmsg.append(f"The derivative of RV `{name}`.ravel()[{index}] is zero.") - last_idx += arr_len + last_idx += end raise ValueError("\n".join(errmsg)) @@ -375,11 +375,11 @@ def raise_ok(self, map_info): errmsg = ["Mass matrix contains non-finite values on the diagonal. "] last_idx = 0 - for name, shape, dtype in map_info: - arr_len = np.prod(shape, dtype=int) - index = np.where(~np.isfinite(self._stds[last_idx : last_idx + arr_len]))[0] + for name, shape, size, dtype in map_info: + end = last_idx + size + index = np.where(~np.isfinite(self._stds[last_idx:end]))[0] errmsg.append(f"The derivative of RV `{name}`.ravel()[{index}] is non-finite.") - last_idx += arr_len + last_idx = end raise ValueError("\n".join(errmsg)) diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py index d825c8857..64455c893 100644 --- a/pymc/step_methods/metropolis.py +++ b/pymc/step_methods/metropolis.py @@ -28,9 +28,10 @@ import pymc as pm from pymc.blocking import DictToArrayBijection, RaveledVars +from pymc.initial_point import PointType from pymc.pytensorf import ( CallableTensor, - compile_pymc, + compile, floatX, join_nonshared_inputs, replace_rng_nodes, @@ -46,20 +47,20 @@ from pymc.step_methods.state import dataclass_state __all__ = [ - "Metropolis", - "DEMetropolis", - "DEMetropolisZ", - "BinaryMetropolis", "BinaryGibbsMetropolis", + "BinaryMetropolis", "CategoricalGibbsMetropolis", - "NormalProposal", "CauchyProposal", + "DEMetropolis", + "DEMetropolisZ", "LaplaceProposal", - "PoissonProposal", + "Metropolis", "MultivariateNormalProposal", + "NormalProposal", + "PoissonProposal", ] -from pymc.util import get_value_vars_from_user_vars +from pymc.util import RandomGenerator, get_value_vars_from_user_vars # Available proposal distributions for Metropolis @@ -151,6 +152,7 @@ class Metropolis(ArrayStepShared): def __init__( self, vars=None, + *, S=None, proposal_dist=None, scaling=1.0, @@ -159,7 +161,9 @@ def __init__( model=None, mode=None, rng=None, - **kwargs, + initial_point: PointType | None = None, + compile_kwargs: dict | None = None, + blocked: bool = False, ): """Create an instance of a Metropolis stepper. @@ -188,14 +192,15 @@ def __init__( :py:func:`pymc.util.get_random_generator` for more information. """ model = pm.modelcontext(model) - initial_values = model.initial_point() + if initial_point is None: + initial_point = model.initial_point() if vars is None: vars = model.value_vars else: vars = get_value_vars_from_user_vars(vars, model) - initial_values_shape = [initial_values[v.name].shape for v in vars] + initial_values_shape = [initial_point[v.name].shape for v in vars] if S is None: S = np.ones(int(sum(np.prod(ivs) for ivs in initial_values_shape))) @@ -215,7 +220,7 @@ def __init__( # Determine type of variables self.discrete = np.concatenate( - [[v.dtype in pm.discrete_types] * (initial_values[v.name].size or 1) for v in vars] + [[v.dtype in pm.discrete_types] * (initial_point[v.name].size or 1) for v in vars] ) self.any_discrete = self.discrete.any() self.all_discrete = self.discrete.all() @@ -249,9 +254,9 @@ def __init__( # TODO: This is not being used when compiling the logp function! self.mode = mode - shared = pm.make_shared_replacements(initial_values, vars, model) - self.delta_logp = delta_logp(initial_values, model.logp(), vars, shared) - super().__init__(vars, shared, rng=rng) + shared = pm.make_shared_replacements(initial_point, vars, model) + self.delta_logp = delta_logp(initial_point, model.logp(), vars, shared, compile_kwargs) + super().__init__(vars, shared, blocked=blocked, rng=rng) def reset_tuning(self): """Reset the tuned sampler parameters to their initial values.""" @@ -302,7 +307,7 @@ def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]: accept_rate = self.delta_logp(q, q0d) q, accepted = metrop_select(accept_rate, q, q0d, rng=self.rng) self.accept_rate_iter = accept_rate - self.accepted_iter = accepted + self.accepted_iter[0] = accepted self.accepted_sum += accepted self.steps_until_tune -= 1 @@ -383,6 +388,13 @@ class BinaryMetropolisState(StepMethodState): class BinaryMetropolis(ArrayStep): """Metropolis-Hastings optimized for binary variables. + Unlike BinaryGibbsMetropolis, this step sampler proposes an update for all variable dimensions at once. + + This will perform a single logp evaluation per step, at the expense of a lower acceptance rate when + the posteriors of the binary variables are highly correlated. + + The BinaryGibbsMetropolis (not this one) is the default step sampler for binary variables + Parameters ---------- vars: list @@ -411,7 +423,19 @@ class BinaryMetropolis(ArrayStep): _state_class = BinaryMetropolisState - def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None, rng=None): + def __init__( + self, + vars, + *, + scaling=1.0, + tune=True, + tune_interval=100, + model=None, + rng=None, + initial_point: PointType | None = None, + compile_kwargs: dict | None = None, + blocked: bool = True, + ): model = pm.modelcontext(model) self.scaling = scaling @@ -425,7 +449,9 @@ def __init__(self, vars, scaling=1.0, tune=True, tune_interval=100, model=None, if not all(v.dtype in pm.discrete_types for v in vars): raise ValueError("All variables must be Bernoulli for BinaryMetropolis") - super().__init__(vars, [model.compile_logp()], rng=rng) + if compile_kwargs is None: + compile_kwargs = {} + super().__init__(vars, [model.compile_logp(**compile_kwargs)], blocked=blocked, rng=rng) def astep(self, apoint: RaveledVars, *args) -> tuple[RaveledVars, StatsType]: logp = args[0] @@ -489,6 +515,14 @@ class BinaryGibbsMetropolisState(StepMethodState): class BinaryGibbsMetropolis(ArrayStep): """A Metropolis-within-Gibbs step method optimized for binary variables. + Unlike BinaryMetropolis, this step sampler proposes a variable dimension update at a time. + + This will increase acceptance rate when the posteriors of the binary variables are highly correlated, + at the expense of doing more logp evaluations per step. + + This is the default step sampler for binary variables. + + Parameters ---------- vars: list @@ -515,7 +549,18 @@ class BinaryGibbsMetropolis(ArrayStep): _state_class = BinaryGibbsMetropolisState - def __init__(self, vars, order="random", transit_p=0.8, model=None, rng=None): + def __init__( + self, + vars, + *, + order="random", + transit_p=0.8, + model=None, + rng=None, + initial_point: PointType | None = None, + compile_kwargs: dict | None = None, + blocked: bool = True, + ): model = pm.modelcontext(model) # Doesn't actually tune, but it's required to emit a sampler stat @@ -526,7 +571,8 @@ def __init__(self, vars, order="random", transit_p=0.8, model=None, rng=None): vars = get_value_vars_from_user_vars(vars, model) - initial_point = model.initial_point() + if initial_point is None: + initial_point = model.initial_point() self.dim = sum(initial_point[v.name].size for v in vars) if order == "random": @@ -541,7 +587,10 @@ def __init__(self, vars, order="random", transit_p=0.8, model=None, rng=None): if not all(v.dtype in pm.discrete_types for v in vars): raise ValueError("All variables must be binary for BinaryGibbsMetropolis") - super().__init__(vars, [model.compile_logp()], rng=rng) + if compile_kwargs is None: + compile_kwargs = {} + + super().__init__(vars, [model.compile_logp(**compile_kwargs)], blocked=blocked, rng=rng) def reset_tuning(self): # There are no tuning parameters in this step method. @@ -622,14 +671,26 @@ class CategoricalGibbsMetropolis(ArrayStep): _state_class = CategoricalGibbsMetropolisState - def __init__(self, vars, proposal="uniform", order="random", model=None, rng=None): + def __init__( + self, + vars, + *, + proposal="uniform", + order="random", + model=None, + rng: RandomGenerator = None, + initial_point: PointType | None = None, + compile_kwargs: dict | None = None, + blocked: bool = True, + ): model = pm.modelcontext(model) vars = get_value_vars_from_user_vars(vars, model) - initial_point = model.initial_point() + if initial_point is None: + initial_point = model.initial_point() - dimcats = [] + dimcats: list[tuple[int, int]] = [] # The above variable is a list of pairs (aggregate dimension, number # of categories). For example, if vars = [x, y] with x being a 2-D # variable with M categories and y being a 3-D variable with N @@ -665,10 +726,10 @@ def __init__(self, vars, proposal="uniform", order="random", model=None, rng=Non self.dimcats = [dimcats[j] for j in order] if proposal == "uniform": - self.astep = self.astep_unif + self.astep = self.astep_unif # type: ignore[assignment] elif proposal == "proportional": # Use the optimized "Metropolized Gibbs Sampler" described in Liu96. - self.astep = self.astep_prop + self.astep = self.astep_prop # type: ignore[assignment] else: raise ValueError("Argument 'proposal' should either be 'uniform' or 'proportional'") @@ -676,7 +737,9 @@ def __init__(self, vars, proposal="uniform", order="random", model=None, rng=Non # that indicates whether a draw was done in a tuning phase. self.tune = True - super().__init__(vars, [model.compile_logp()], rng=rng) + if compile_kwargs is None: + compile_kwargs = {} + super().__init__(vars, [model.compile_logp(**compile_kwargs)], blocked=blocked, rng=rng) def reset_tuning(self): # There are no tuning parameters in this step method. @@ -841,6 +904,7 @@ class DEMetropolis(PopulationArrayStepShared): def __init__( self, vars=None, + *, S=None, proposal_dist=None, lamb=None, @@ -850,11 +914,14 @@ def __init__( model=None, mode=None, rng=None, - **kwargs, + initial_point: PointType | None = None, + compile_kwargs: dict | None = None, + blocked: bool = True, ): model = pm.modelcontext(model) - initial_values = model.initial_point() - initial_values_size = sum(initial_values[n.name].size for n in model.value_vars) + if initial_point is None: + initial_point = model.initial_point() + initial_values_size = sum(initial_point[n.name].size for n in model.value_vars) if vars is None: vars = model.continuous_value_vars @@ -883,9 +950,9 @@ def __init__( self.mode = mode - shared = pm.make_shared_replacements(initial_values, vars, model) - self.delta_logp = delta_logp(initial_values, model.logp(), vars, shared) - super().__init__(vars, shared, rng=rng) + shared = pm.make_shared_replacements(initial_point, vars, model) + self.delta_logp = delta_logp(initial_point, model.logp(), vars, shared, compile_kwargs) + super().__init__(vars, shared, blocked=blocked, rng=rng) def astep(self, q0: RaveledVars) -> tuple[RaveledVars, StatsType]: point_map_info = q0.point_map_info @@ -1008,6 +1075,7 @@ class DEMetropolisZ(ArrayStepShared): def __init__( self, vars=None, + *, S=None, proposal_dist=None, lamb=None, @@ -1016,13 +1084,16 @@ def __init__( tune_interval=100, tune_drop_fraction: float = 0.9, model=None, + initial_point: PointType | None = None, + compile_kwargs: dict | None = None, mode=None, rng=None, - **kwargs, + blocked: bool = True, ): model = pm.modelcontext(model) - initial_values = model.initial_point() - initial_values_size = sum(initial_values[n.name].size for n in model.value_vars) + if initial_point is None: + initial_point = model.initial_point() + initial_values_size = sum(initial_point[n.name].size for n in model.value_vars) if vars is None: vars = model.continuous_value_vars @@ -1063,9 +1134,9 @@ def __init__( self.mode = mode - shared = pm.make_shared_replacements(initial_values, vars, model) - self.delta_logp = delta_logp(initial_values, model.logp(), vars, shared) - super().__init__(vars, shared, rng=rng) + shared = pm.make_shared_replacements(initial_point, vars, model) + self.delta_logp = delta_logp(initial_point, model.logp(), vars, shared, compile_kwargs) + super().__init__(vars, shared, blocked=blocked, rng=rng) def reset_tuning(self): """Reset the tuned sampler parameters and history to their initial values.""" @@ -1155,6 +1226,7 @@ def delta_logp( logp: pt.TensorVariable, vars: list[pt.TensorVariable], shared: dict[pt.TensorVariable, pt.sharedvar.TensorSharedVariable], + compile_kwargs: dict | None, ) -> pytensor.compile.Function: [logp0], inarray0 = join_nonshared_inputs( point=point, outputs=[logp], inputs=vars, shared_inputs=shared @@ -1167,6 +1239,8 @@ def delta_logp( # Replace any potential duplicated RNG nodes (logp1,) = replace_rng_nodes((logp1,)) - f = compile_pymc([inarray1, inarray0], logp1 - logp0) + if compile_kwargs is None: + compile_kwargs = {} + f = compile([inarray1, inarray0], logp1 - logp0, **compile_kwargs) f.trust_input = True return f diff --git a/pymc/step_methods/slicer.py b/pymc/step_methods/slicer.py index 2ea4b1f55..73574c025 100644 --- a/pymc/step_methods/slicer.py +++ b/pymc/step_methods/slicer.py @@ -18,8 +18,9 @@ import numpy as np from pymc.blocking import RaveledVars, StatsType +from pymc.initial_point import PointType from pymc.model import modelcontext -from pymc.pytensorf import compile_pymc, join_nonshared_inputs, make_shared_replacements +from pymc.pytensorf import compile, join_nonshared_inputs, make_shared_replacements from pymc.step_methods.arraystep import ArrayStepShared from pymc.step_methods.compound import Competence, StepMethodState from pymc.step_methods.state import dataclass_state @@ -76,7 +77,17 @@ class Slice(ArrayStepShared): _state_class = SliceState def __init__( - self, vars=None, w=1.0, tune=True, model=None, iter_limit=np.inf, rng=None, **kwargs + self, + vars=None, + *, + w=1.0, + tune=True, + model=None, + iter_limit=np.inf, + rng=None, + initial_point: PointType | None = None, + compile_kwargs: dict | None = None, + blocked: bool = False, # Could be true since tuning is independent across dims? ): model = modelcontext(model) self.w = np.asarray(w).copy() @@ -89,15 +100,19 @@ def __init__( else: vars = get_value_vars_from_user_vars(vars, model) - point = model.initial_point() - shared = make_shared_replacements(point, vars, model) + if initial_point is None: + initial_point = model.initial_point() + + shared = make_shared_replacements(initial_point, vars, model) [logp], raveled_inp = join_nonshared_inputs( - point=point, outputs=[model.logp()], inputs=vars, shared_inputs=shared + point=initial_point, outputs=[model.logp()], inputs=vars, shared_inputs=shared ) - self.logp = compile_pymc([raveled_inp], logp) + if compile_kwargs is None: + compile_kwargs = {} + self.logp = compile([raveled_inp], logp, **compile_kwargs) self.logp.trust_input = True - super().__init__(vars, shared, rng=rng) + super().__init__(vars, shared, blocked=blocked, rng=rng) def astep(self, apoint: RaveledVars) -> tuple[RaveledVars, StatsType]: # The arguments are determined by the list passed via `super().__init__(..., fs, ...)` diff --git a/pymc/testing.py b/pymc/testing.py index 3970e9125..cc7433980 100644 --- a/pymc/testing.py +++ b/pymc/testing.py @@ -43,7 +43,7 @@ local_check_parameter_to_ninf_switch, rvs_in_graph, ) -from pymc.pytensorf import compile_pymc, floatX, inputvars +from pymc.pytensorf import compile, floatX, inputvars # This mode can be used for tests where model compilations takes the bulk of the runtime # AND where we don't care about posterior numerical or sampling stability (e.g., when @@ -645,7 +645,7 @@ def check_selfconsistency_discrete_logcdf( dist_logp_fn = pytensor.function(list(inputvars(dist_logp)), dist_logp) dist_logcdf = logcdf(dist, value) - dist_logcdf_fn = compile_pymc(list(inputvars(dist_logcdf)), dist_logcdf) + dist_logcdf_fn = compile(list(inputvars(dist_logcdf)), dist_logcdf) domains = paramdomains.copy() domains["value"] = domain @@ -721,7 +721,7 @@ def continuous_random_tester( model, param_vars = build_model(dist, valuedomain, paramdomains, extra_args) model_dist = change_dist_size(model.named_vars["value"], size, expand=True) - pymc_rand = compile_pymc([], model_dist) + pymc_rand = compile([], model_dist) domains = paramdomains.copy() for point in product(domains, n_samples=100): @@ -760,7 +760,7 @@ def discrete_random_tester( model, param_vars = build_model(dist, valuedomain, paramdomains) model_dist = change_dist_size(model.named_vars["value"], size, expand=True) - pymc_rand = compile_pymc([], model_dist) + pymc_rand = compile([], model_dist) domains = paramdomains.copy() for point in product(domains, n_samples=100): diff --git a/pymc/tuning/scaling.py b/pymc/tuning/scaling.py index a1d151025..d07f8c864 100644 --- a/pymc/tuning/scaling.py +++ b/pymc/tuning/scaling.py @@ -21,7 +21,7 @@ from pymc.pytensorf import hessian_diag from pymc.util import get_var_name -__all__ = ["find_hessian", "trace_cov", "guess_scaling"] +__all__ = ["find_hessian", "guess_scaling", "trace_cov"] def fixed_hessian(point, model=None): diff --git a/pymc/tuning/starting.py b/pymc/tuning/starting.py index c085af5d2..22d3ffb41 100644 --- a/pymc/tuning/starting.py +++ b/pymc/tuning/starting.py @@ -143,7 +143,7 @@ def find_MAP( compiled_logp_func = DictToArrayBijection.mapf(model.compile_logp(jacobian=False), start) logp_func = lambda x: compiled_logp_func(RaveledVars(x, x0.point_map_info)) # noqa: E731 - rvs = [model.values_to_rvs[vars_dict[name]] for name, _, _ in x0.point_map_info] + rvs = [model.values_to_rvs[vars_dict[name]] for name, _, _, _ in x0.point_map_info] try: # This might be needed for calls to `dlogp_func` # start_map_info = tuple((v.name, v.shape, v.dtype) for v in vars) diff --git a/pymc/variational/approximations.py b/pymc/variational/approximations.py index 61940418b..6fad3c10b 100644 --- a/pymc/variational/approximations.py +++ b/pymc/variational/approximations.py @@ -35,7 +35,7 @@ node_property, ) -__all__ = ["MeanField", "FullRank", "Empirical", "sample_approx"] +__all__ = ["Empirical", "FullRank", "MeanField", "sample_approx"] @Group.register diff --git a/pymc/variational/callbacks.py b/pymc/variational/callbacks.py index faac7e367..caf6c89cc 100644 --- a/pymc/variational/callbacks.py +++ b/pymc/variational/callbacks.py @@ -84,7 +84,7 @@ def __call__(self, approx, _, i) -> None: self.prev = current norm = np.linalg.norm(delta, self.ord) if norm < self.tolerance: - raise StopIteration("Convergence achieved at %d" % i) + raise StopIteration(f"Convergence achieved at {i}") @staticmethod def flatten_shared(shared_list): diff --git a/pymc/variational/inference.py b/pymc/variational/inference.py index 3dcb59b59..3e2c07788 100644 --- a/pymc/variational/inference.py +++ b/pymc/variational/inference.py @@ -32,11 +32,11 @@ __all__ = [ "ADVI", - "FullRankADVI", - "SVGD", "ASVGD", - "Inference", + "SVGD", + "FullRankADVI", "ImplicitGradient", + "Inference", "KLqp", "fit", ] diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py index b07b9ded8..9829ea2c3 100644 --- a/pymc/variational/opvi.py +++ b/pymc/variational/opvi.py @@ -72,7 +72,7 @@ from pymc.model import modelcontext from pymc.pytensorf import ( SeedSequenceSeed, - compile_pymc, + compile, find_rng_nodes, identity, reseed_rngs, @@ -88,7 +88,7 @@ from pymc.variational.updates import adagrad_window from pymc.vartypes import discrete_types -__all__ = ["ObjectiveFunction", "Operator", "TestFunction", "Group", "Approximation"] +__all__ = ["Approximation", "Group", "ObjectiveFunction", "Operator", "TestFunction"] class VariationalInferenceError(Exception): @@ -388,9 +388,9 @@ def step_function( ) seed = self.approx.rng.randint(2**30, dtype=np.int64) if score: - step_fn = compile_pymc([], updates.loss, updates=updates, random_seed=seed, **fn_kwargs) + step_fn = compile([], updates.loss, updates=updates, random_seed=seed, **fn_kwargs) else: - step_fn = compile_pymc([], [], updates=updates, random_seed=seed, **fn_kwargs) + step_fn = compile([], [], updates=updates, random_seed=seed, **fn_kwargs) return step_fn @pytensor.config.change_flags(compute_test_value="off") @@ -420,7 +420,7 @@ def score_function( more_replacements = {} loss = self(sc_n_mc, more_replacements=more_replacements) seed = self.approx.rng.randint(2**30, dtype=np.int64) - return compile_pymc([], loss, random_seed=seed, **fn_kwargs) + return compile([], loss, random_seed=seed, **fn_kwargs) @pytensor.config.change_flags(compute_test_value="off") def __call__(self, nmc, **kwargs): @@ -1450,7 +1450,7 @@ def get_optimization_replacements(self, s, d): """ repl = collections.OrderedDict() # avoid scan if size is constant and equal to one - if isinstance(s, int) and (s == 1) or s is None: + if (isinstance(s, int) and (s == 1)) or s is None: repl[self.varlogp] = self.single_symbolic_varlogp repl[self.datalogp] = self.single_symbolic_datalogp return repl @@ -1517,7 +1517,7 @@ def sample_dict_fn(self): names = [self.model.rvs_to_values[v].name for v in self.model.free_RVs] sampled = [self.rslice(name) for name in names] sampled = self.set_size_and_deterministic(sampled, s, 0) - sample_fn = compile_pymc([s], sampled) + sample_fn = compile([s], sampled) rng_nodes = find_rng_nodes(sampled) def inner(draws=100, *, random_seed: SeedSequenceSeed = None): @@ -1554,7 +1554,10 @@ def sample( if random_seed is not None: (random_seed,) = _get_seeds_per_chain(random_seed, 1) samples: dict = self.sample_dict_fn(draws, random_seed=random_seed) - points = ({name: records[i] for name, records in samples.items()} for i in range(draws)) + points = ( + {name: np.asarray(records[i]) for name, records in samples.items()} + for i in range(draws) + ) trace = NDArray( model=self.model, diff --git a/pymc/variational/updates.py b/pymc/variational/updates.py index 656dbd042..234d30750 100644 --- a/pymc/variational/updates.py +++ b/pymc/variational/updates.py @@ -119,18 +119,18 @@ import pymc as pm __all__ = [ - "sgd", - "apply_momentum", - "momentum", - "apply_nesterov_momentum", - "nesterov_momentum", + "adadelta", "adagrad", "adagrad_window", - "rmsprop", - "adadelta", "adam", "adamax", + "apply_momentum", + "apply_nesterov_momentum", + "momentum", + "nesterov_momentum", "norm_constraint", + "rmsprop", + "sgd", "total_norm_constraint", ] @@ -170,7 +170,7 @@ def get_or_compute_grads(loss_or_grads, params): if isinstance(loss_or_grads, list): if not len(loss_or_grads) == len(params): raise ValueError( - "Got %d gradient expressions for %d parameters" % (len(loss_or_grads), len(params)) + f"Got {len(loss_or_grads)} gradient expressions for {len(params)} parameters" ) return loss_or_grads else: diff --git a/pymc/vartypes.py b/pymc/vartypes.py index 018467458..2f145aa9b 100644 --- a/pymc/vartypes.py +++ b/pymc/vartypes.py @@ -14,13 +14,13 @@ __all__ = [ "bool_types", - "int_types", - "float_types", "complex_types", "continuous_types", "discrete_types", - "typefilter", + "float_types", + "int_types", "isgenerator", + "typefilter", ] bool_types = {"int8"} diff --git a/pyproject.toml b/pyproject.toml index 3674e2272..a8ffb06ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ ignore = [ "D101", # Missing docstring in public class "D102", # Missing docstring in public method "D103", # Missing docstring in public function + "D105", # Missing docstring in magic method ] [tool.ruff.lint.pydocstyle] diff --git a/requirements-dev.txt b/requirements-dev.txt index 082eab73c..56f7f964f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -12,12 +12,12 @@ mcbackend>=0.4.0 mypy==1.5.1 myst-nb<=1.0.0 numdifftools>=0.9.40 -numpy>=1.15.0 +numpy>=1.25.0 numpydoc pandas>=0.24.0 polyagamma pre-commit>=2.8.0 -pytensor>=2.25.1,<2.26 +pytensor>=2.26.2,<2.27 pytest-cov>=2.5 pytest>=3.0 rich>=13.7.1 diff --git a/requirements.txt b/requirements.txt index b59ca2912..28c9456b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ arviz>=0.13.0 cachetools>=4.2.1 cloudpickle -numpy>=1.15.0 +numpy>=1.25.0 pandas>=0.24.0 -pytensor>=2.25.1,<2.26 +pytensor>=2.26.1,<2.27 rich>=13.7.1 scipy>=1.4.1 threadpoolctl>=3.1.0,<4.0.0 diff --git a/scripts/run_mypy.py b/scripts/run_mypy.py index 842fb0a13..ccce06736 100755 --- a/scripts/run_mypy.py +++ b/scripts/run_mypy.py @@ -32,8 +32,6 @@ pymc/distributions/timeseries.py pymc/distributions/truncated.py pymc/initial_point.py -pymc/logprob/binary.py -pymc/logprob/censoring.py pymc/logprob/basic.py pymc/logprob/mixture.py pymc/logprob/rewriting.py diff --git a/tests/backends/fixtures.py b/tests/backends/fixtures.py index 1b02eb8bc..c7a3bdcec 100644 --- a/tests/backends/fixtures.py +++ b/tests/backends/fixtures.py @@ -238,7 +238,10 @@ class SamplingTestCase(ModelBackendSetupTestCase): """ def record_point(self, val): - point = {varname: np.tile(val, value.shape) for varname, value in self.test_point.items()} + point = { + varname: np.tile(val, value.shape).astype(value.dtype) + for varname, value in self.test_point.items() + } if self.sampler_vars is not None: stats = [{key: dtype(val) for key, dtype in vars.items()} for vars in self.sampler_vars] self.strace.record(point=point, sampler_stats=stats) diff --git a/tests/distributions/test_continuous.py b/tests/distributions/test_continuous.py index 41504816a..2864335e3 100644 --- a/tests/distributions/test_continuous.py +++ b/tests/distributions/test_continuous.py @@ -461,15 +461,6 @@ def test_exponential(self): lambda q, lam: st.expon.ppf(q, loc=0, scale=1 / lam), ) - def test_exponential_wrong_arguments(self): - msg = "Incompatible parametrization. Can't specify both lam and scale" - with pytest.raises(ValueError, match=msg): - pm.Exponential.dist(lam=0.5, scale=5) - - msg = "Incompatible parametrization. Must specify either lam or scale" - with pytest.raises(ValueError, match=msg): - pm.Exponential.dist() - def test_laplace(self): check_logp( pm.Laplace, @@ -2274,8 +2265,20 @@ class TestExponential(BaseTestDistributionRandom): checks_to_run = [ "check_pymc_params_match_rv_op", "check_pymc_draws_match_reference", + "check_both_lam_scale_raises", + "check_default_scale", ] + def check_both_lam_scale_raises(self): + msg = "Incompatible parametrization. Can't specify both lam and scale" + with pytest.raises(ValueError, match=msg): + pm.Exponential.dist(lam=0.5, scale=5) + + def check_default_scale(self): + rv = self.pymc_dist.dist() + [scale] = rv.owner.op.dist_params(rv.owner) + assert scale.data == 1.0 + class TestExponentialScale(BaseTestDistributionRandom): pymc_dist = pm.Exponential diff --git a/tests/distributions/test_custom.py b/tests/distributions/test_custom.py index 5b1de1617..5f2cef54f 100644 --- a/tests/distributions/test_custom.py +++ b/tests/distributions/test_custom.py @@ -287,7 +287,7 @@ def test_multivariate_insufficient_signature(self): with pytest.raises( NotImplementedError, match="signature is not sufficient to infer the support shape" ): - CustomDist.dist(signature="(n)->(m)") + CustomDist.dist([0], signature="(n)->(m)") class TestCustomSymbolicDist: diff --git a/tests/distributions/test_distribution.py b/tests/distributions/test_distribution.py index 74716081b..cd45b54d4 100644 --- a/tests/distributions/test_distribution.py +++ b/tests/distributions/test_distribution.py @@ -43,7 +43,7 @@ ) from pymc.distributions.shape_utils import change_dist_size from pymc.logprob.basic import conditional_logp, logp -from pymc.pytensorf import compile_pymc +from pymc.pytensorf import compile from pymc.testing import ( BaseTestDistributionRandom, I, @@ -169,7 +169,7 @@ def update(self, node): outputs=[dummy_next_rng, dummy_x], ndim_supp=0, )(rng) - fn = compile_pymc(inputs=[], outputs=x, random_seed=431) + fn = compile(inputs=[], outputs=x, random_seed=431) assert fn() != fn() # Check that custom updates are respected, by using one that's broken @@ -182,7 +182,7 @@ def update(self, node): ValueError, match="No update found for at least one RNG used in SymbolicRandomVariable Op SymbolicRVCustomUpdates", ): - compile_pymc(inputs=[], outputs=x, random_seed=431) + compile(inputs=[], outputs=x, random_seed=431) def test_recreate_with_different_rng_inputs(self): """Test that we can recreate a SymbolicRandomVariable with new RNG inputs. diff --git a/tests/distributions/test_multivariate.py b/tests/distributions/test_multivariate.py index 1fd1b7e6d..cfd50fdd7 100644 --- a/tests/distributions/test_multivariate.py +++ b/tests/distributions/test_multivariate.py @@ -45,7 +45,7 @@ from pymc.logprob.basic import logp from pymc.logprob.utils import ParameterValueError from pymc.math import kronecker -from pymc.pytensorf import compile_pymc, floatX +from pymc.pytensorf import compile, floatX from pymc.sampling.forward import draw from pymc.testing import ( BaseTestDistributionRandom, @@ -168,7 +168,7 @@ def stickbreakingweights_logpdf(): _alpha = pt.scalar() _k = pt.iscalar() _logp = logp(pm.StickBreakingWeights.dist(_alpha, _k), _value) - core_fn = compile_pymc([_value, _alpha, _k], _logp) + core_fn = compile([_value, _alpha, _k], _logp) return np.vectorize(core_fn, signature="(n),(),()->()") @@ -2395,7 +2395,7 @@ def test_mvnormal_no_cholesky_in_model_logp(): d2logp = m.compile_d2logp() assert not contains_cholesky_op(d2logp.f.maker.fgraph) - logp_dlogp = m.logp_dlogp_function() + logp_dlogp = m.logp_dlogp_function(ravel_inputs=True) assert not contains_cholesky_op(logp_dlogp._pytensor_function.maker.fgraph) diff --git a/tests/distributions/test_shape_utils.py b/tests/distributions/test_shape_utils.py index 493d7cb8c..f381d6db4 100644 --- a/tests/distributions/test_shape_utils.py +++ b/tests/distributions/test_shape_utils.py @@ -35,9 +35,11 @@ convert_size, get_support_shape, get_support_shape_1d, + implicit_size_from_params, rv_size_is_none, ) from pymc.model import Model +from pymc.pytensorf import constant_fold test_shapes = [ ((), (1,), (4,), (5, 4)), @@ -324,7 +326,7 @@ def test_size_from_dims_rng_update(self): with pm.Model(coords={"x_dim": range(2)}): x = pm.Normal("x", dims=("x_dim",)) - fn = pm.pytensorf.compile_pymc([], x) + fn = pm.pytensorf.compile([], x) # Check that both function outputs (rng and draws) come from the same Apply node assert fn.maker.fgraph.outputs[0].owner is fn.maker.fgraph.outputs[1].owner @@ -339,7 +341,7 @@ def test_size_from_observed_rng_update(self): with pm.Model(): x = pm.Normal("x", observed=[0, 1]) - fn = pm.pytensorf.compile_pymc([], x) + fn = pm.pytensorf.compile([], x) # Check that both function outputs (rng and draws) come from the same Apply node assert fn.maker.fgraph.outputs[0].owner is fn.maker.fgraph.outputs[1].owner @@ -630,3 +632,10 @@ def test_get_support_shape( assert (f() == expected_support_shape).all() with pytest.raises(AssertionError, match="support_shape does not match"): inferred_support_shape.eval() + + +def test_implicit_size_from_params(): + x = pt.tensor(shape=(5, 1)) + y = pt.tensor(shape=(3, 3)) + res = implicit_size_from_params(x, y, ndims_params=[1, 2]) + assert constant_fold([res]) == (5,) diff --git a/tests/distributions/test_simulator.py b/tests/distributions/test_simulator.py index bddf440a1..873261386 100644 --- a/tests/distributions/test_simulator.py +++ b/tests/distributions/test_simulator.py @@ -28,7 +28,7 @@ from pymc import floatX from pymc.initial_point import make_initial_point_fn -from pymc.pytensorf import compile_pymc +from pymc.pytensorf import compile from pymc.smc.kernels import IMH @@ -357,12 +357,12 @@ def test_dist(self, seeded_test): x = cloudpickle.loads(cloudpickle.dumps(x)) x_logp = pm.logp(x, [0, 1, 2]) - x_logp_fn = compile_pymc([], x_logp, random_seed=1) + x_logp_fn = compile([], x_logp, random_seed=1) res1, res2 = x_logp_fn(), x_logp_fn() assert res1.shape == (3,) assert np.all(res1 != res2) - x_logp_fn = compile_pymc([], x_logp, random_seed=1) + x_logp_fn = compile([], x_logp, random_seed=1) res3, res4 = x_logp_fn(), x_logp_fn() assert np.all(res1 == res3) assert np.all(res2 == res4) diff --git a/tests/gp/test_gp.py b/tests/gp/test_gp.py index 9265dd415..3d620610f 100644 --- a/tests/gp/test_gp.py +++ b/tests/gp/test_gp.py @@ -409,9 +409,9 @@ def testLatentMultioutput(self): == numpy_rv_logp.shape.eval() ) - npt.assert_allclose(latent_rv_logp.eval(), marginal_rv_logp.eval(), atol=5) - npt.assert_allclose(latent_rv_logp.eval(), numpy_rv_logp.eval(), atol=5) - npt.assert_allclose(marginal_rv_logp.eval(), numpy_rv_logp.eval(), atol=5) + npt.assert_allclose(latent_rv_logp.eval({"f": y}), marginal_rv_logp.eval(), rtol=1e-4) + npt.assert_allclose(latent_rv_logp.eval({"f": y}), numpy_rv_logp.eval(), rtol=1e-4) + npt.assert_allclose(marginal_rv_logp.eval(), numpy_rv_logp.eval(), rtol=1e-4) class TestTP: diff --git a/tests/helpers.py b/tests/helpers.py index ae62d72d5..e4b624893 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -26,6 +26,7 @@ import pytensor from numpy.testing import assert_array_less +from pytensor.compile.mode import Mode from pytensor.gradient import verify_grad as at_verify_grad import pymc as pm @@ -198,11 +199,16 @@ def continuous_steps(self, step, step_kwargs): c1 = pm.HalfNormal("c1") c2 = pm.HalfNormal("c2") + # Test methods can handle initial_point and compile_kwargs + step_kwargs.setdefault( + "initial_point", {"c1_log__": np.array(0.5), "c2_log__": np.array(0.9)} + ) + step_kwargs.setdefault("compile_kwargs", {"mode": Mode(linker="py", optimizer=None)}) with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): assert [m.rvs_to_values[c1]] == step([c1], **step_kwargs).vars - assert {m.rvs_to_values[c1], m.rvs_to_values[c2]} == set( - step([c1, c2], **step_kwargs).vars - ) + assert {m.rvs_to_values[c1], m.rvs_to_values[c2]} == set( + step([c1, c2], **step_kwargs).vars + ) def equal_sampling_states(this, other): diff --git a/tests/logprob/test_linalg.py b/tests/logprob/test_linalg.py new file mode 100644 index 000000000..047a0312b --- /dev/null +++ b/tests/logprob/test_linalg.py @@ -0,0 +1,85 @@ +# Copyright 2024 The PyMC Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest + +from pytensor.tensor.type import tensor + +from pymc.distributions import MatrixNormal, MvNormal, Normal +from pymc.logprob.basic import logp + + +@pytest.mark.parametrize("univariate", [True, False]) +@pytest.mark.parametrize("batch_shape", [(), (3,)]) +def test_matrix_vector_transform(univariate, batch_shape): + rng = np.random.default_rng(755) + + μ = rng.normal(size=(*batch_shape, 2)) + if univariate: + σ = np.abs(rng.normal(size=(*batch_shape, 2))) + Σ = np.eye(2) * (σ**2)[..., None] + x = Normal.dist(mu=μ, sigma=σ) + else: + A = rng.normal(size=(*batch_shape, 2, 2)) + Σ = np.swapaxes(A, -1, -2) @ A + x = MvNormal.dist(mu=μ, cov=Σ) + + c = rng.normal(size=(*batch_shape, 2)) + B = rng.normal(size=(*batch_shape, 2, 2)) + y = c + (B @ x[..., None]).squeeze(-1) + + # An affine transformed MvNormal is still a MvNormal + # https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Affine_transformation + ref_dist = MvNormal.dist( + mu=c + (B @ μ[..., None]).squeeze(-1), cov=B @ Σ @ np.swapaxes(B, -1, -2) + ) + test_y = rng.normal(size=(*batch_shape, 2)) + np.testing.assert_allclose( + logp(y, test_y).eval(), + logp(ref_dist, test_y).eval(), + ) + + +def test_matrix_matrix_transform(): + rng = np.random.default_rng(46) + + n, p = 2, 3 + M = rng.normal(size=(n, p)) + A = rng.normal(size=(n, n)) * 0.1 + U = A.T @ A + B = rng.normal(size=(p, p)) * 0.1 + V = B.T @ B + X = MatrixNormal.dist(mu=M, rowcov=U, colcov=V) + + D = rng.normal(size=(n, n)) + C = rng.normal(size=(p, p)) + Y = D @ X @ C + + # A linearly transformed MatrixNormal is still a MatrixNormal + # https://en.wikipedia.org/wiki/Matrix_normal_distribution#Transformation + ref_dist = MatrixNormal.dist(mu=D @ M @ C, rowcov=D @ U @ D.T, colcov=C.T @ V @ C) + test_Y = rng.normal(size=(n, p)) + np.testing.assert_allclose( + logp(Y, test_Y).eval(), + logp(ref_dist, test_Y).eval(), + rtol=1e-5, + ) + + +def test_broadcasted_matmul_fails(): + x = Normal.dist(size=(3, 2)) + A = tensor("A", shape=(4, 3, 3)) + y = A @ x + with pytest.raises(NotImplementedError): + logp(y, y.type()) diff --git a/tests/logprob/test_scan.py b/tests/logprob/test_scan.py index 381eed221..17fb198ca 100644 --- a/tests/logprob/test_scan.py +++ b/tests/logprob/test_scan.py @@ -550,3 +550,25 @@ def test_scan_multiple_output_types(): test_value, [a + b for a, b in itertools.pairwise([1, 1, *test_value[:-1]])] ), ) + + +def test_generative_graph_unchanged(): + # Regression test where creating the IR would overwrite the original Scan inner fgraph + + def step(eps_tm1): + x = pt.random.normal(0, eps_tm1) + eps_t = x - 0 + return (x, eps_t), {x.owner.inputs[0]: x.owner.outputs[0]} + + [xs, _], update = pytensor.scan(step, outputs_info=[None, pt.ones(())], n_steps=5) + + before = xs.dprint(file="str") + + xs_value = np.ones(5) + expected_logp = stats.norm.logpdf(xs_value, 0, 1) + for i in range(2): + xs_logp = logp(xs, xs_value) + np.testing.assert_allclose(xs_logp.eval(), expected_logp) + + after = xs.dprint(file="str") + assert before == after diff --git a/tests/logprob/test_tensor.py b/tests/logprob/test_tensor.py index 13ac8cfff..e118ed69f 100644 --- a/tests/logprob/test_tensor.py +++ b/tests/logprob/test_tensor.py @@ -309,10 +309,7 @@ def test_join_mixed_ndim_supp(): (1, 2, 0), # Swap (0, 1, 2, "x"), # Expand ("x", 0, 1, 2), # Expand - ( - 0, - 2, - ), # Drop + (0, 2), # Drop (2, 0), # Swap and drop (2, 1, "x", 0), # Swap and expand ("x", 0, 2), # Expand and drop @@ -338,7 +335,7 @@ def test_measurable_dimshuffle(ds_order, multivariate): ref_logp = logp(base_rv, base_vv).dimshuffle(logp_ds_order) - # Disable local_dimshuffle_rv_lift to test fallback Aeppl rewrite + # Disable local_dimshuffle_rv_lift to test fallback logprob rewrite ir_rewriter = logprob_rewrites_db.query( RewriteDatabaseQuery(include=["basic"]).excluding("dimshuffle_lift") ) diff --git a/tests/model/test_core.py b/tests/model/test_core.py index 17304fedc..2d3786637 100644 --- a/tests/model/test_core.py +++ b/tests/model/test_core.py @@ -15,7 +15,6 @@ import pickle import threading import traceback -import unittest import warnings from unittest.mock import patch @@ -302,23 +301,26 @@ def test_empty_observed(): assert not hasattr(a.tag, "observations") -class TestValueGradFunction(unittest.TestCase): +class TestValueGradFunction: def test_no_extra(self): a = pt.vector("a") - a.tag.test_value = np.zeros(3, dtype=a.dtype) - f_grad = ValueGradFunction([a.sum()], [a], {}, mode="FAST_COMPILE") + a_ = np.zeros(3, dtype=a.dtype) + f_grad = ValueGradFunction( + [a.sum()], [a], {}, ravel_inputs=True, initial_point={"a": a_}, mode="FAST_COMPILE" + ) assert f_grad._extra_vars == [] def test_invalid_type(self): a = pt.ivector("a") - a.tag.test_value = np.zeros(3, dtype=a.dtype) + a_ = np.zeros(3, dtype=a.dtype) a.dshape = (3,) a.dsize = 3 - with pytest.raises(TypeError) as err: - ValueGradFunction([a.sum()], [a], {}, mode="FAST_COMPILE") - err.match("Invalid dtype") + with pytest.raises(TypeError, match="Invalid dtype"): + ValueGradFunction( + [a.sum()], [a], {}, ravel_inputs=True, initial_point={"a": a_}, mode="FAST_COMPILE" + ) - def setUp(self): + def setup_method(self, test_method): extra1 = pt.iscalar("extra1") extra1_ = np.array(0, dtype=extra1.dtype) extra1.dshape = () @@ -340,41 +342,68 @@ def setUp(self): self.cost = extra1 * val1.sum() + val2.sum() - self.f_grad = ValueGradFunction( - [self.cost], [val1, val2], {extra1: extra1_}, mode="FAST_COMPILE" + self.initial_point = { + "extra1": extra1_, + "val1": val1_, + "val2": val2_, + } + + with pytest.warns( + UserWarning, match="ValueGradFunction will become a function of raveled inputs" + ): + self.f_grad = ValueGradFunction( + [self.cost], + [val1, val2], + {extra1: extra1_}, + mode="FAST_COMPILE", + ) + + self.f_grad_raveled_inputs = ValueGradFunction( + [self.cost], + [val1, val2], + {extra1: extra1_}, + initial_point=self.initial_point, + mode="FAST_COMPILE", + ravel_inputs=True, ) + self.f_grad_raveled_inputs.trust_input = True - def test_extra_not_set(self): + @pytest.mark.parametrize("raveled_fn", (False, True)) + def test_extra_not_set(self, raveled_fn): + f_grad = self.f_grad_raveled_inputs if raveled_fn else self.f_grad with pytest.raises(ValueError) as err: - self.f_grad.get_extra_values() + f_grad.get_extra_values() err.match("Extra values are not set") with pytest.raises(ValueError) as err: size = self.val1_.size + self.val2_.size - self.f_grad(np.zeros(size, dtype=self.f_grad.dtype)) + f_grad(np.zeros(size, dtype=self.f_grad.dtype)) err.match("Extra values are not set") - def test_grad(self): - self.f_grad.set_extra_values({"extra1": 5}) + @pytest.mark.parametrize("raveled_fn", (False, True)) + def test_grad(self, raveled_fn): + f_grad = self.f_grad_raveled_inputs if raveled_fn else self.f_grad + f_grad.set_extra_values({"extra1": 5}) + size = self.val1_.size + self.val2_.size array = RaveledVars( np.ones(size, dtype=self.f_grad.dtype), ( - ("val1", self.val1_.shape, self.val1_.dtype), - ("val2", self.val2_.shape, self.val2_.dtype), + ("val1", self.val1_.shape, self.val1_.size, self.val1_.dtype), + ("val2", self.val2_.shape, self.val2_.size, self.val2_.dtype), ), ) - val, grad = self.f_grad(array) + + val, grad = f_grad(array) assert val == 21 npt.assert_allclose(grad, [5, 5, 5, 1, 1, 1, 1, 1, 1]) - @pytest.mark.xfail(reason="Test not refactored for v4") def test_edge_case(self): # Edge case discovered in #2948 ndim = 3 with pm.Model() as m: pm.LogNormal( - "sigma", mu=np.zeros(ndim), tau=np.ones(ndim), shape=ndim + "sigma", mu=np.zeros(ndim), tau=np.ones(ndim), initval=np.ones(ndim), shape=ndim ) # variance for the correlation matrix pm.HalfCauchy("nu", beta=10) step = pm.NUTS() @@ -382,7 +411,7 @@ def test_edge_case(self): func = step._logp_dlogp_func initial_point = m.initial_point() func.set_extra_values(initial_point) - q = func.dict_to_array(initial_point) + q = DictToArrayBijection.map(initial_point) logp, dlogp = func(q) assert logp.size == 1 assert dlogp.size == 4 @@ -398,7 +427,7 @@ def test_missing_data(self): with pytest.warns(ImputationWarning): x2 = pm.Bernoulli("x2", x1, observed=X) - gf = m.logp_dlogp_function() + gf = m.logp_dlogp_function(ravel_inputs=True) gf._extra_are_set = True assert m["x2_unobserved"].type == gf._extra_vars_shared["x2_unobserved"].type @@ -414,6 +443,8 @@ def test_missing_data(self): # Assert that all the elements of res are equal assert res[1:] == res[:-1] + +class TestPytensorRelatedLogpBugs: def test_pytensor_switch_broadcast_edge_cases_1(self): # Tests against two subtle issues related to a previous bug in Theano # where `tt.switch` would not always broadcast tensors with single @@ -460,25 +491,28 @@ def test_multiple_observed_rv(): assert model["x"] not in model.value_vars -def test_tempered_logp_dlogp(): +@pytest.mark.parametrize("ravel_inputs", (False, True)) +def test_tempered_logp_dlogp(ravel_inputs): with pm.Model() as model: pm.Normal("x") pm.Normal("y", observed=1) pm.Potential("z", pt.constant(-1.0, dtype=pytensor.config.floatX)) - func = model.logp_dlogp_function() + func = model.logp_dlogp_function(ravel_inputs=ravel_inputs) func.set_extra_values({}) - func_temp = model.logp_dlogp_function(tempered=True) + func_temp = model.logp_dlogp_function(tempered=True, ravel_inputs=ravel_inputs) func_temp.set_extra_values({}) - func_nograd = model.logp_dlogp_function(compute_grads=False) + func_nograd = model.logp_dlogp_function(compute_grads=False, ravel_inputs=ravel_inputs) func_nograd.set_extra_values({}) - func_temp_nograd = model.logp_dlogp_function(tempered=True, compute_grads=False) + func_temp_nograd = model.logp_dlogp_function( + tempered=True, compute_grads=False, ravel_inputs=ravel_inputs + ) func_temp_nograd.set_extra_values({}) - x = np.ones(1, dtype=func.dtype) + x = np.ones((1,), dtype=func.dtype) npt.assert_allclose(func(x)[0], func_temp(x)[0]) npt.assert_allclose(func(x)[1], func_temp(x)[1]) @@ -532,7 +566,7 @@ def test_model_roundtrip(self): cloudpickle.loads(s) except Exception: raise AssertionError( - "Exception while trying roundtrip with pickle protocol %d:\n" % proto + f"Exception while trying roundtrip with pickle protocol {proto}:\n" + "".join(traceback.format_exc()) ) @@ -766,10 +800,10 @@ def test_mode(self, mode): "a_interval__": model.rvs_to_transforms[a].forward(0.3, *a.owner.inputs).eval(), "b_interval__": model.rvs_to_transforms[b].forward(2.1, *b.owner.inputs).eval(), } - with patch("pymc.model.core.compile_pymc") as patched_compile_pymc: + with patch("pymc.model.core.compile") as patched_compile: model.check_start_vals(start, mode=mode) - patched_compile_pymc.assert_called_once() - assert patched_compile_pymc.call_args.kwargs["mode"] == mode + patched_compile.assert_called_once() + assert patched_compile.call_args.kwargs["mode"] == mode def test_set_initval(): diff --git a/tests/models.py b/tests/models.py index fd45fb8bd..abf461fa9 100644 --- a/tests/models.py +++ b/tests/models.py @@ -78,7 +78,7 @@ def arbitrary_det(value): def simple_init(): start, model, moments = simple_model() - step = Metropolis(model.value_vars, np.diag([1.0]), model=model) + step = Metropolis(model.value_vars, S=np.diag([1.0]), model=model) return model, start, step, moments diff --git a/tests/sampling/test_forward.py b/tests/sampling/test_forward.py index 2b5ce1265..404f74a96 100644 --- a/tests/sampling/test_forward.py +++ b/tests/sampling/test_forward.py @@ -33,7 +33,7 @@ import pymc as pm from pymc.backends.base import MultiTrace -from pymc.pytensorf import compile_pymc +from pymc.pytensorf import compile from pymc.sampling.forward import ( compile_forward_sampling_function, get_constant_coords, @@ -1195,7 +1195,7 @@ def test_layers(self): a = pm.Uniform("a", lower=0, upper=1, size=10) b = pm.Binomial("b", n=1, p=a, size=10) - b_sampler = compile_pymc([], b, mode="FAST_RUN", random_seed=232093) + b_sampler = compile([], b, mode="FAST_RUN", random_seed=232093) avg = np.stack([b_sampler() for i in range(10000)]).mean(0) npt.assert_array_almost_equal(avg, 0.5 * np.ones((10,)), decimal=2) diff --git a/tests/sampling/test_mcmc.py b/tests/sampling/test_mcmc.py index 8ba7b133b..3219d45b7 100644 --- a/tests/sampling/test_mcmc.py +++ b/tests/sampling/test_mcmc.py @@ -742,39 +742,35 @@ class TestAssignStepMethods: def test_bernoulli(self): """Test bernoulli distribution is assigned binary gibbs metropolis method""" with pm.Model() as model: - pm.Bernoulli("x", 0.5) - with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): - steps = assign_step_methods(model, []) - assert isinstance(steps, BinaryGibbsMetropolis) + x = pm.Bernoulli("x", 0.5) + _, selected_steps = assign_step_methods(model, []) + assert selected_steps == {BinaryGibbsMetropolis: [model.rvs_to_values[x]]} def test_normal(self): """Test normal distribution is assigned NUTS method""" with pm.Model() as model: - pm.Normal("x", 0, 1) - with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): - steps = assign_step_methods(model, []) - assert isinstance(steps, NUTS) + x = pm.Normal("x", 0, 1) + _, selected_steps = assign_step_methods(model, []) + assert selected_steps == {NUTS: [model.rvs_to_values[x]]} def test_categorical(self): """Test categorical distribution is assigned categorical gibbs metropolis method""" with pm.Model() as model: - pm.Categorical("x", np.array([0.25, 0.75])) - with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): - steps = assign_step_methods(model, []) - assert isinstance(steps, BinaryGibbsMetropolis) + x = pm.Categorical("x", np.array([0.25, 0.75])) + _, selected_steps = assign_step_methods(model, []) + assert selected_steps == {BinaryGibbsMetropolis: [model.rvs_to_values[x]]} + with pm.Model() as model: - pm.Categorical("y", np.array([0.25, 0.70, 0.05])) - with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): - steps = assign_step_methods(model, []) - assert isinstance(steps, CategoricalGibbsMetropolis) + y = pm.Categorical("y", np.array([0.25, 0.70, 0.05])) + _, selected_steps = assign_step_methods(model, []) + assert selected_steps == {CategoricalGibbsMetropolis: [model.rvs_to_values[y]]} def test_binomial(self): """Test binomial distribution is assigned metropolis method.""" with pm.Model() as model: - pm.Binomial("x", 10, 0.5) - with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): - steps = assign_step_methods(model, []) - assert isinstance(steps, Metropolis) + x = pm.Binomial("x", 10, 0.5) + _, selected_steps = assign_step_methods(model, []) + assert selected_steps == {Metropolis: [model.rvs_to_values[x]]} def test_normal_nograd_op(self): """Test normal distribution without an implemented gradient is assigned slice method""" @@ -791,11 +787,12 @@ def kill_grad(x): return x data = np.random.normal(size=(100,)) - pm.Normal("y", mu=kill_grad(x), sigma=1, observed=data.astype(pytensor.config.floatX)) + y = pm.Normal( + "y", mu=kill_grad(x), sigma=1, observed=data.astype(pytensor.config.floatX) + ) - with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): - steps = assign_step_methods(model, []) - assert isinstance(steps, Slice) + _, selected_steps = assign_step_methods(model, []) + assert selected_steps == {Slice: [model.rvs_to_values[x]]} @pytest.fixture def step_methods(self): @@ -812,18 +809,18 @@ def test_modify_step_methods(self, step_methods): with pm.Model() as model: pm.Normal("x", 0, 1) - with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): - steps = assign_step_methods(model, []) - assert not isinstance(steps, NUTS) + + _, selected_steps = assign_step_methods(model, []) + assert NUTS not in selected_steps # add back nuts step_methods.append(NUTS) with pm.Model() as model: pm.Normal("x", 0, 1) - with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): - steps = assign_step_methods(model, []) - assert isinstance(steps, NUTS) + + _, selected_steps = assign_step_methods(model, []) + assert NUTS in selected_steps def test_step_vars_in_model(self): """Test if error is raised if step variable is not found in model.value_vars""" diff --git a/tests/smc/test_smc.py b/tests/smc/test_smc.py index 3aa687459..09ba48dc7 100644 --- a/tests/smc/test_smc.py +++ b/tests/smc/test_smc.py @@ -134,6 +134,21 @@ def test_unobserved_categorical(self): assert np.all(np.median(trace["mu"], axis=0) == [1, 2]) + def test_parallel_custom(self): + def _logp(value, mu): + return -((value - mu) ** 2) + + def _random(mu, rng=None, size=None): + return rng.normal(loc=mu, scale=1, size=size) + + def _dist(mu, size=None): + return pm.Normal.dist(mu, 1, size=size) + + with pm.Model(): + mu = pm.CustomDist("mu", 0, logp=_logp, dist=_dist) + pm.CustomDist("y", mu, logp=_logp, class_name="", random=_random, observed=[1, 2]) + pm.sample_smc(draws=6, cores=2) + def test_marginal_likelihood(self): """ Verifies that the log marginal likelihood function diff --git a/tests/stats/test_log_density.py b/tests/stats/test_log_density.py index 5128913e8..00ee5d499 100644 --- a/tests/stats/test_log_density.py +++ b/tests/stats/test_log_density.py @@ -187,12 +187,12 @@ def test_compilation_kwargs(self): with ( # apply_function_over_dataset fails with patched `compile_pymc` patch("pymc.stats.log_density.apply_function_over_dataset"), - patch("pymc.model.core.compile_pymc") as patched_compile_pymc, + patch("pymc.model.core.compile") as patched_compile, ): compute_log_prior(idata, compile_kwargs={"mode": "JAX"}, extend_inferencedata=False) compute_log_likelihood( idata, compile_kwargs={"mode": "NUMBA"}, extend_inferencedata=False ) - assert len(patched_compile_pymc.call_args_list) == 2 - assert patched_compile_pymc.call_args_list[0].kwargs["mode"] == "JAX" - assert patched_compile_pymc.call_args_list[1].kwargs["mode"] == "NUMBA" + assert len(patched_compile.call_args_list) == 2 + assert patched_compile.call_args_list[0].kwargs["mode"] == "JAX" + assert patched_compile.call_args_list[1].kwargs["mode"] == "NUMBA" diff --git a/tests/step_methods/hmc/test_hmc.py b/tests/step_methods/hmc/test_hmc.py index 96840eed0..d22882032 100644 --- a/tests/step_methods/hmc/test_hmc.py +++ b/tests/step_methods/hmc/test_hmc.py @@ -59,10 +59,9 @@ def _hamiltonian_step(self, *args, **kwargs): step = HMC(vars=model.value_vars, model=model, scaling=scaling) - step.integrator._logp_dlogp_func.set_extra_values({}) astart = DictToArrayBijection.map(start) p = RaveledVars(floatX(step.potential.random()), astart.point_map_info) - q = RaveledVars(floatX(np.random.randn(size)), astart.point_map_info) + q = floatX(np.random.randn(size)) start = step.integrator.compute_state(p, q) for epsilon in [0.01, 0.1]: for n_steps in [1, 2, 3, 4, 20]: diff --git a/tests/step_methods/hmc/test_nuts.py b/tests/step_methods/hmc/test_nuts.py index 2bb71b893..d37782fb7 100644 --- a/tests/step_methods/hmc/test_nuts.py +++ b/tests/step_methods/hmc/test_nuts.py @@ -204,3 +204,24 @@ class TestRVsAssignmentNUTS(RVsAssignmentStepsTester): @pytest.mark.parametrize("step, step_kwargs", [(NUTS, {})]) def test_continuous_steps(self, step, step_kwargs): self.continuous_steps(step, step_kwargs) + + +def test_nuts_step_legacy_value_grad_function(): + # This test can be removed once ravel_inputs=False is deprecated + with pm.Model() as m: + x = pm.Normal("x", shape=(2,)) + y = pm.Normal("y", x, shape=(3, 2)) + + legacy_value_grad_fn = m.logp_dlogp_function(ravel_inputs=False, mode="FAST_COMPILE") + legacy_value_grad_fn.set_extra_values({}) + nuts = NUTS(model=m, logp_dlogp_func=legacy_value_grad_fn) + + # Confirm it is a function of multiple variables + logp, dlogp = nuts._logp_dlogp_func([np.zeros((2,)), np.zeros((3, 2))]) + np.testing.assert_allclose(dlogp, np.zeros(8)) + + # Confirm we can perform a NUTS step + ip = m.initial_point() + new_ip, _ = nuts.step(ip) + assert np.all(new_ip["x"] != ip["x"]) + assert np.all(new_ip["y"] != ip["y"]) diff --git a/tests/step_methods/test_metropolis.py b/tests/step_methods/test_metropolis.py index a73538a61..a01e75506 100644 --- a/tests/step_methods/test_metropolis.py +++ b/tests/step_methods/test_metropolis.py @@ -22,6 +22,8 @@ import pytensor import pytest +from pytensor.compile.mode import Mode + import pymc as pm from pymc.step_methods.metropolis import ( @@ -356,22 +358,28 @@ def test_step_continuous(self, step_fn, draws): class TestRVsAssignmentMetropolis(RVsAssignmentStepsTester): @pytest.mark.parametrize( - "step, step_kwargs", + "step", [ - (BinaryGibbsMetropolis, {}), - (CategoricalGibbsMetropolis, {}), + BinaryMetropolis, + BinaryGibbsMetropolis, + CategoricalGibbsMetropolis, ], ) - def test_discrete_steps(self, step, step_kwargs): + def test_discrete_steps(self, step): with pm.Model() as m: d1 = pm.Bernoulli("d1", p=0.5) d2 = pm.Bernoulli("d2", p=0.5) - with pytensor.config.change_flags(mode=fast_unstable_sampling_mode): - assert [m.rvs_to_values[d1]] == step([d1], **step_kwargs).vars - assert {m.rvs_to_values[d1], m.rvs_to_values[d2]} == set( - step([d1, d2], **step_kwargs).vars - ) + # Test it can take initial_point, and compile_kwargs as a kwarg + step_kwargs = { + "initial_point": { + "d1": np.array(0, dtype="int64"), + "d2": np.array(1, dtype="int64"), + }, + "compile_kwargs": {"mode": Mode(linker="py", optimizer=None)}, + } + assert [m.rvs_to_values[d1]] == step([d1]).vars + assert {m.rvs_to_values[d1], m.rvs_to_values[d2]} == set(step([d1, d2]).vars) @pytest.mark.parametrize( "step, step_kwargs", [(Metropolis, {}), (DEMetropolis, {}), (DEMetropolisZ, {})] diff --git a/tests/test_initial_point.py b/tests/test_initial_point.py index 6d61b4b3f..9138f37b3 100644 --- a/tests/test_initial_point.py +++ b/tests/test_initial_point.py @@ -17,11 +17,12 @@ import pytensor.tensor as pt import pytest +from pytensor.compile.builders import OpFromGraph from pytensor.tensor.random.op import RandomVariable import pymc as pm -from pymc.distributions.distribution import support_point +from pymc.distributions.distribution import _support_point, support_point from pymc.initial_point import make_initial_point_fn, make_initial_point_fns_per_chain @@ -47,12 +48,17 @@ def test_make_initial_point_fns_per_chain_checks_kwargs(self): ) pass - def test_dependent_initvals(self): + @pytest.mark.parametrize("reverse_rvs", [False, True]) + def test_dependent_initvals(self, reverse_rvs): with pm.Model() as pmodel: L = pm.Uniform("L", 0, 1, initval=0.5) U = pm.Uniform("U", lower=9, upper=10, initval=9.5) B1 = pm.Uniform("B1", lower=L, upper=U, initval=5) B2 = pm.Uniform("B2", lower=L, upper=U, initval=(L + U) / 2) + + if reverse_rvs: + pmodel.free_RVs = pmodel.free_RVs[::-1] + ip = pmodel.initial_point(random_seed=0) assert ip["L_interval__"] == 0 assert ip["U_interval__"] == 0 @@ -187,6 +193,40 @@ def test_string_overrides_work(self): assert np.isclose(iv["B_log__"], 0) assert iv["C_log__"] == 0 + @pytest.mark.parametrize("reverse_rvs", [False, True]) + def test_dependent_initval_from_OFG(self, reverse_rvs): + class MyTestOp(OpFromGraph): + pass + + @_support_point.register(MyTestOp) + def my_test_op_support_point(op, out): + out1, out2 = out.owner.outputs + if out is out1: + return out1 + else: + return out1 * 4 + + out1 = pt.zeros(()) + out2 = out1 * 2 + rv_op = MyTestOp([], [out1, out2]) + + with pm.Model() as model: + A, B = rv_op() + if reverse_rvs: + model.register_rv(B, "B") + model.register_rv(A, "A") + else: + model.register_rv(A, "A") + model.register_rv(B, "B") + + assert model.initial_point() == {"A": 0, "B": 0} + + model.set_initval(A, 1) + assert model.initial_point() == {"A": 1, "B": 4} + + model.set_initval(B, 3) + assert model.initial_point() == {"A": 1, "B": 3} + class TestSupportPoint: def test_basic(self): diff --git a/tests/test_pytensorf.py b/tests/test_pytensorf.py index b3564cac1..f8353ce9c 100644 --- a/tests/test_pytensorf.py +++ b/tests/test_pytensorf.py @@ -39,7 +39,7 @@ from pymc.pytensorf import ( GeneratorOp, collect_default_updates, - compile_pymc, + compile, constant_fold, convert_data, convert_generator_data, @@ -348,23 +348,23 @@ def test_check_bounds_flag(self): m.check_bounds = False with m: - assert np.all(compile_pymc([], bound)() == 1) + assert np.all(compile([], bound)() == 1) m.check_bounds = True with m: - assert np.all(compile_pymc([], bound)() == -np.inf) + assert np.all(compile([], bound)() == -np.inf) def test_check_parameters_can_be_replaced_by_ninf(self): expr = pt.vector("expr", shape=(3,)) cond = pt.ge(expr, 0) final_expr = check_parameters(expr, cond, can_be_replaced_by_ninf=True) - fn = compile_pymc([expr], final_expr) + fn = compile([expr], final_expr) np.testing.assert_array_equal(fn(expr=[1, 2, 3]), [1, 2, 3]) np.testing.assert_array_equal(fn(expr=[-1, 2, 3]), [-np.inf, -np.inf, -np.inf]) final_expr = check_parameters(expr, cond, msg="test", can_be_replaced_by_ninf=False) - fn = compile_pymc([expr], final_expr) + fn = compile([expr], final_expr) np.testing.assert_array_equal(fn(expr=[1, 2, 3]), [1, 2, 3]) with pytest.raises(ParameterValueError, match="test"): fn([-1, 2, 3]) @@ -373,7 +373,7 @@ def test_compile_pymc_sets_rng_updates(self): rng = pytensor.shared(np.random.default_rng(0)) x = pm.Normal.dist(rng=rng) assert x.owner.inputs[0] is rng - f = compile_pymc([], x) + f = compile([], x) assert not np.isclose(f(), f()) # Check that update was not done inplace @@ -383,7 +383,7 @@ def test_compile_pymc_sets_rng_updates(self): def test_compile_pymc_with_updates(self): x = pytensor.shared(0) - f = compile_pymc([], x, updates={x: x + 1}) + f = compile([], x, updates={x: x + 1}) assert f() == 0 assert f() == 1 @@ -392,21 +392,21 @@ def test_compile_pymc_missing_default_explicit_updates(self): x = pm.Normal.dist(rng=rng) # By default, compile_pymc should update the rng of x - f = compile_pymc([], x) + f = compile([], x) assert f() != f() # An explicit update should override the default_update, like pytensor.function does # For testing purposes, we use an update that leaves the rng unchanged - f = compile_pymc([], x, updates={rng: rng}) + f = compile([], x, updates={rng: rng}) assert f() == f() # If we specify a custom default_update directly it should use that instead. rng.default_update = rng - f = compile_pymc([], x) + f = compile([], x) assert f() == f() # And again, it should be overridden by an explicit update - f = compile_pymc([], x, updates={rng: x.owner.outputs[0]}) + f = compile([], x, updates={rng: x.owner.outputs[0]}) assert f() != f() def test_compile_pymc_updates_inputs(self): @@ -425,7 +425,7 @@ def test_compile_pymc_updates_inputs(self): ([x, y], 1), ([x, y, z], 0), ): - fn = compile_pymc(inputs, z, on_unused_input="ignore") + fn = compile(inputs, z, on_unused_input="ignore") fn_fgraph = fn.maker.fgraph # Each RV adds a shared input for its rng assert len(fn_fgraph.inputs) == len(inputs) + rvs_in_graph @@ -452,7 +452,7 @@ def update(self, node): [dummy_rng1], pt.random.normal(rng=dummy_rng1).owner.outputs, )(rng1) - fn = compile_pymc(inputs=[], outputs=dummy_x1, random_seed=433) + fn = compile(inputs=[], outputs=dummy_x1, random_seed=433) assert fn() != fn() # Now there's a problem as there is no update rule for rng2 @@ -468,7 +468,7 @@ def update(self, node): with pytest.raises( ValueError, match="No update found for at least one RNG used in SymbolicRandomVariable" ): - compile_pymc(inputs=[], outputs=[dummy_x1, dummy_x2]) + compile(inputs=[], outputs=[dummy_x1, dummy_x2]) def test_random_seed(self): seedx = pytensor.shared(np.random.default_rng(1)) @@ -482,17 +482,17 @@ def test_random_seed(self): assert x0_eval == y0_eval # The variables will be reseeded with new seeds by default - f1 = compile_pymc([], [x, y]) + f1 = compile([], [x, y]) x1_eval, y1_eval = f1() assert x1_eval != y1_eval # Check that seeding works - f2 = compile_pymc([], [x, y], random_seed=1) + f2 = compile([], [x, y], random_seed=1) x2_eval, y2_eval = f2() assert x2_eval != x1_eval assert y2_eval != y1_eval - f3 = compile_pymc([], [x, y], random_seed=1) + f3 = compile([], [x, y], random_seed=1) x3_eval, y3_eval = f3() assert x3_eval == x2_eval assert y3_eval == y2_eval @@ -504,23 +504,23 @@ def test_multiple_updates_same_variable(self): y = pt.random.normal(1, rng=rng) # No warnings if only one variable is used - assert compile_pymc([], [x]) - assert compile_pymc([], [y]) + assert compile([], [x]) + assert compile([], [y]) user_warn_msg = "RNG Variable rng has multiple distinct clients" with pytest.warns(UserWarning, match=user_warn_msg): - f = compile_pymc([], [x, y], random_seed=456) + f = compile([], [x, y], random_seed=456) assert f() == f() # The user can provide an explicit update, but we will still issue a warning with pytest.warns(UserWarning, match=user_warn_msg): - f = compile_pymc([], [x, y], updates={rng: y.owner.outputs[0]}, random_seed=456) + f = compile([], [x, y], updates={rng: y.owner.outputs[0]}, random_seed=456) assert f() != f() # Same with default update rng.default_update = x.owner.outputs[0] with pytest.warns(UserWarning, match=user_warn_msg): - f = compile_pymc([], [x, y], updates={rng: y.owner.outputs[0]}, random_seed=456) + f = compile([], [x, y], updates={rng: y.owner.outputs[0]}, random_seed=456) assert f() != f() @pytest.mark.filterwarnings("error") # This is part of the test @@ -530,7 +530,7 @@ def test_duplicated_client_nodes(self): x = pt.random.normal(rng=rng) y = x.owner.clone().default_output() - fn = compile_pymc([], [x, y], random_seed=1) + fn = compile([], [x, y], random_seed=1) res_x1, res_y1 = fn() assert res_x1 == res_y1 res_x2, res_y2 = fn() @@ -545,7 +545,7 @@ def test_nested_updates(self): collect_default_updates(inputs=[], outputs=[x, y, z]) == {rng: next_rng3} - fn = compile_pymc([], [x, y, z], random_seed=514) + fn = compile([], [x, y, z], random_seed=514) assert not set(np.array(fn())) & set(np.array(fn())) # A local myopic rule (as PyMC used before, would not work properly) @@ -600,7 +600,7 @@ def step_wo_update(x, rng): assert collect_default_updates([ys]) == {rng: next(iter(next_rng.values()))} - fn = compile_pymc([], ys, random_seed=1) + fn = compile([], ys, random_seed=1) assert not (set(fn()) & set(fn())) def test_op_from_graph_updates(self): @@ -616,7 +616,7 @@ def test_op_from_graph_updates(self): next_rng, x = OpFromGraph([], [next_rng_, x_])() assert collect_default_updates([x]) == {rng: next_rng} - fn = compile_pymc([], x, random_seed=1) + fn = compile([], x, random_seed=1) assert not (set(fn()) & set(fn())) @@ -696,6 +696,11 @@ def test_inputs_preserved(self): (out_shape,) = constant_fold((out.shape[0],), raise_not_constant=False) assert out_shape is a + def test_constant_fold_alloc(self): + # By default, Alloc outputs cannot be constant folded + x = pt.alloc(pt.arange(5), 2, 5) + np.testing.assert_allclose(constant_fold([x])[0], np.broadcast_to(np.arange(5), (2, 5))) + def test_replace_vars_in_graphs(): inp = shared(0.0, name="inp")