diff --git a/.circleci/config.yml b/.circleci/config.yml index 3eaf28ae3..1b7141124 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,22 +3,11 @@ version: 2.1 jobs: build: docker: - - image: cimg/python:3.12 + - image: cimg/python:3.13 steps: - checkout - - run: - name: Install TeX - command: | - sudo apt update - sudo apt install -y \ - dvipng \ - latexmk \ - texlive-latex-extra \ - texlive-fonts-extra \ - texlive-extra-utils - - restore_cache: keys: - pip-cache-v1 @@ -27,7 +16,7 @@ jobs: name: Install Python dependencies command: | pip install --upgrade --user pip - pip install --user -r requirements.txt + pip install --user -r build_requirements.txt pip list - save_cache: @@ -40,12 +29,7 @@ jobs: command: | # NOTE: bad interaction w/ blas multithreading on circleci export OMP_NUM_THREADS=1 - make pdf - make html # FIX: check that failing examples produce failure - cp \ - ScientificPythonLectures.pdf \ - ScientificPythonLectures-simple.pdf \ - build/html/_downloads/ + make web # FIX: check that failing examples produce failure - store_artifacts: - path: build/html + path: _build/html diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 000000000..f0f205671 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,3 @@ +[codespell] +skip = .git,*.pdf,*.svg,*.csv +ignore-words-list = trough,remainers,befores diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 11b29b7e7..05f1ce117 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -27,7 +27,7 @@ jobs: - name: Install packages run: | pip install --upgrade pip wheel setuptools - pip install -r requirements.txt + pip install -r build_requirements.txt pip list - name: Lint diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 785b1b20e..4a9d98480 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -27,26 +27,21 @@ jobs: - name: Install Python dependencies run: | python -m pip install --upgrade pip wheel setuptools - python -m pip install -r requirements.txt + python -m pip install -r build_requirements.txt - - name: "Build PDF & HTML" + - name: "Build HTML" run: | - make pdf make html - mv \ - ScientificPythonLectures.pdf \ - ScientificPythonLectures-simple.pdf \ - build/html/_downloads - echo -n 'lectures.scientific-python.org' > build/html/CNAME - touch build/html/.nojekyll + echo -n 'lectures.scientific-python.org' > _build/html/CNAME + touch _build/html/.nojekyll - name: Deploy uses: peaceiris/actions-gh-pages@v4 with: deploy_key: ${{ secrets.ACTIONS_DEPLOY_KEY }} - external_repository: scipy-lectures/lectures.scientific-python.org + # external_repository: scipy-lectures/lectures.scientific-python.org publish_branch: gh-pages - publish_dir: ./build/html + publish_dir: ./_build/html force_orphan: true user_name: "github-actions[bot]" user_email: "github-actions[bot]@users.noreply.github.com" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f51e285e4..5efd5cc8f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,9 +28,13 @@ jobs: - name: Install packages run: | - python -m pip install --upgrade pip wheel setuptools - python -m pip install -r requirements.txt + python -m pip install -r test_requirements.txt python -m pip list - - name: Test lectures - run: make test + - name: Test + run: | + # Avoid deprecation error. + export JUPYTER_PLATFORM_DIRS=1 + jupyter --paths + + make test diff --git a/.gitignore b/.gitignore index e588efd0a..4474e2447 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,17 @@ airfares.txt wages.txt ScientificPythonLectures-simple.pdf ScientificPythonLectures.pdf +.ipynb_checkpoints/ +__pycache__/ +.ok_storage +*.ipynb +*.orig +node_modules/ +.jupyterlite.doit.db +advanced/advanced_numpy/test.png +packages/scikit-image/cat.png +advanced/advanced_numpy/test_recolored.png +advanced/advanced_numpy/test_red.png +intro/language/junk.txt +intro/language/test.pkl +joblib/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e791e90da..f4b5e59f9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,6 +45,8 @@ repos: additional_dependencies: - types-aiofiles - types-requests + - types-docutils + - types-PyYAML - pandas-stubs - types-pillow - matplotlib @@ -56,6 +58,8 @@ repos: | .*/setup.*\.py$ | .*/demo.py$ | .*/auto_examples/ + | _scripts/examples2nb.py$ + | _scripts/post_parser.py$ | advanced/mathematical_optimization/examples/plot_gradient_descent\.py$ | advanced/mathematical_optimization/examples/helper/compare_optimizers\.py$ | advanced/advanced_numpy/examples/view-colors\.py$ diff --git a/AUTHORS.rst b/AUTHORS.md similarity index 88% rename from AUTHORS.rst rename to AUTHORS.md index e86e28e6e..6fc6417e8 100644 --- a/AUTHORS.rst +++ b/AUTHORS.md @@ -1,277 +1,150 @@ +--- +orphan: true +--- -Authors -======== +# Authors -Editors --------- - -- K\. Jarrod Millman +## Editors +- K. Jarrod Millman - Stéfan van der Walt - - Gaël Varoquaux - - Emmanuelle Gouillart - - Olav Vahtras - - Pierre de Buyl +- Peter Rush +- Matthew Brett - -Chapter authors ----------------- +## Chapter authors Listed by alphabetical order. - Christopher Burns - - Adrian Chauve - - Robert Cimrman - - Christophe Combelles - - André Espaze - - Emmanuelle Gouillart - - Mike Müller - - Fabian Pedregosa - - Didrik Pinte - - Nicolas Rougier - - Gaël Varoquaux - - Pauli Virtanen - - Zbigniew Jędrzejewski-Szmek - - Valentin Haenel (editor from 2011 to 2015) -Additional Contributions ------------------------- +## Additional Contributions Listed by alphabetical order - Osayd Abdu - - arunpersaud - - Ross Barnowski - - Sebastian Berg - - Lilian Besson - - Matthieu Boileau - - Joris Van den Bossche - - Michael Boyle - - Matthew Brett - - BSGalvan - - Lars Buitinck - - Pierre de Buyl - - Ozan Çağlayan - - Lawrence Chan - - Adrien Chauve - - Robert Cimrman - - Christophe Combelles - - David Cournapeau - - Dave - - dogacan dugmeci - - Török Edwin - - egens - - Andre Espaze - - André Espaze - - Loïc Estève - - Corey Farwell - - Tim Gates - - Stuart Geiger - - Olivier Georg - - Daniel Gerigk - - Robert Gieseke - - Philip Gillißen - - Ralf Gommers - - Emmanuelle Gouillart - - Julia Gustavsen - - Omar Gutiérrez - - Matt Haberland - - Valentin Haenel - - Pierre Haessig - - Bruno Hanzen - - Michael Hartmann - - Jonathan Helmus - - Andreas Hilboll - - Himanshu - - Julian Hofer - - Tim Hoffmann - -- B\. Hohl - +- B. Hohl - Tarek Hoteit - - Gert-Ludwig Ingold - - Zbigniew Jędrzejewski-Szmek - - Thouis (Ray) Jones - - jorgeprietoarranz - - josephsalmon - - Greg Kiar - - kikocorreoso - - Vince Knight - - LFP6 - - Manuel López-Ibáñez - - Marco Mangan - - Nicola Masarone - - John McLaughlin - - mhemantha - - michelemaroni89 - -- K\. Jarrod Millman - +- K. Jarrod Millman - Mohammad - - Zachary Moon - - Mike Mueller - - negm - - John B Nelson - - nicoguaro - - Sergio Oller - - Theofilos Papapanagiotou - - patniharshit - - Fabian Pedregosa - - Philippe Pepiot - - Tiago M. D. Pereira - - Nicolas Pettiaux - - Didrik Pinte - - Evgeny Pogrebnyak - - reverland - - Maximilien Riehl - - Kristian Rother - - Nicolas P. Rougier - - Pamphile Roy - - Rutzmoser - - Sander - - João Felipe Santos - - Mark Setchell - - Helen Sherwood-Taylor - - Shoeboxam - - Simon - - solarjoe - - ssmiller - - Scott Staniewicz - - strpeter - - surfer190 - - Bartosz Telenczuk - - tommyod - - Wes Turner - - Akihiro Uchida - - Utkarsh Upadhyay - - Olav Vahtras - - Stéfan van der Walt - - Gaël Varoquaux - - Nelle Varoquaux - - Olivier Verdier - - VirgileFritsch - - Pauli Virtanen - - Yosh Wakeham - - yasutomo57jp diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 000000000..7d8c0853d --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,171 @@ +# What's new + +## Release 2024.1 (April 2024) + +- Python 3.10, 3.11, 3.12 +- Renamed Scientific Python Lectures +- Removed old content +- Major updates to support recent packages +- Updates to the SciPy and scikit-image chapters + +## Release 2022.1 (August 2022) + +- Replace scikit-learn housing example with California data (Marco Mangan) +- Fix links and typos (Zachary Moon, Tim Gates, Marco Mangan, Gert-Ludwig Ingold) +- Fix fftpack figure (Osayd Abdu) +- Update software version (Pierre de Buyl) + +## Release 2020.2 (September 2020) + +- Replace image i/o from scipy.misc by imageio (Pierre de Buyl) +- Update information on dict ordering (Bharath Saiguhan) +- Suppress warnings for mandelbrot example (Pierre de Buyl) +- Update NumPy introduction and advanced usage for changes to NumPy: wording, bytes + representation, floating point argument to np.zeros (Ross Barnowski) +- Fix links to NumPy documentation to use numpy.org (Ross Barnowski) +- Update note on transposed arrays (Ross Barnowski with Eric Wieser) +- Use generated figure file for lidar data processing (Lawrence Chan) +- Update link from PyMC2 to PyMC3 (B. Hohl) +- Fix transparent popup menu to have a background (Pierre de Buyl) + +## Release 2020.1 (March 2020) + +- Fix outdated URLs (Gert-Ludwig Ingold) +- Update packages (Pierre de Buyl) +- Remove Python 2 continuous integration (Olav Vahtras - EuroSciPy 2019 sprint) +- Fix chessboard size (Mark Setchell) +- Add objectives and design choices (Gert-Ludwig Ingold and Pierre de Buyl) +- Make the numpy advanced iterator example more elaborate (Sebastian Berg) +- Use empty list instead of empty tuple to deactivate ticks (Tim Hoffmann) +- Fix typos (Sander van Rijn, cydave, Michel Corne) and off by 2 errors + (Andreas Hilboll) +- Improve readability of Polynomials example code (Michel Corne) +- Replace suggestions for debugging environments (Gert-Ludwig Ingold) +- Add section on Python 2 vs Python 3 (Pierre de Buyl) + +## Release 2019.1 (May 2019) + +- Update matplotlib compatibility to version 2.2 (Mike Mueller, Joris Van den + Bossche, Pierre de Buyl) +- Make C-API example cos_module_np Python 2/3 compatible (Michael Boyle) +- Fix typos and outdated URLs (Dogacan Dugmeci, Matthieu Boileau, Stuart Geiger, Omar + Gutiérrez, Himanshu, Julian Hofer, Joseph Salmon, Manuel López-Ibáñez, + Nicola Masarone, michelemaroni89, Evgeny Pogrebnyak, tommyod) + +## Release 2018.1 (September 2018) + +- Fix wordings, typos, colours (Pierre de Buyl, Greg Kiar, Olav Vahtras + Kristian Rother) +- Fix interpolation example code (Scott Staniewicz) +- Fix CSS for high density displays (Gaël Varoquaux) +- Generate indexing figures with PyX (Gert Ingold) +- Warn clearly against the use of Python 2 (Bruno Hanzen) +- Update external links (Bruno Hanzen) +- Update versions of dependencies: sphinx-gallery, pandas, statsmodels + (Gaël Varoquaux) + +## Release 2017.1 (October 2017) + +- Update optimization chapter (Michael Hartmann, Gaël Varoquaux) +- Update SymPy chapter (Vince Knight) +- Update advanced NumPy (Bartosz Teleńczuk) +- Update scikit-learn chapter (Gaël Varoquaux) +- Update SciPy chapter (Gaël Varoquaux) +- Make '>>>' in the prompts unselectable (Pierre de Buyl) +- Use common package requirements for pip and conda and improve the build + instructions (Gert-Ludwig Ingold, Vince Knight, Pierre de Buyl) +- Set up Circle CI (Loïc Estève) +- Improved support for Python 3 integer divisions and calls to print (Loïc + Estève, Gert-Ludwig Ingold, Pierre de Buyl, Gaël Varoquaux) +- Change test runner to pytest (Pierre de Buyl) +- Replace the plot directive by sphinx-gallery (Gert-Ludwig Ingold) + +## Release 2016.1 (September 2016) + +- Rework of intro chapter (Gaël Varoquaux) +- Integrate sphinx-gallery: examples are now Jupyter notebooks (Gaël + Varoquaux, Gert-Ludwig Ingold, Óscar Nájera) +- Better Python 3 tests and support (Gert-Ludwig Ingold) +- Adapt examples to Matplotlib 1.5 (Gaël Varoquaux) +- Modernize numpy chapter (Bartosz Telenczuk) + +## Release 2015.3 (November 2015) + +- Collapsed sidebar can now pop up for mid-sized display (Gaël Varoquaux) +- Replaced pictures of Lena by raccoon face (Thouis Jones) + +## Release 2015.2 (October 2015) + +- Authors on cover ordered as in bibtex entry (Nicolas Rougier) +- Better rendering on mobile (Gaël Varoquaux) +- Fix restructured text markup errors (Olav Vahtras) + +## Release 2015.1 (September 2015) + +- New chapter on statistics with Python (Gaël Varoquaux) +- Better layout in PDF (Gaël Varoquaux) +- New HTML layout, simplified formatting, mobile-friendly and sidebar + (Gaël Varoquaux, Nelle Varoquaux) +- Logos on the HTML front page and on the PDF cover (Nicolas Rougier) +- Python 3 compatible code (Gaël Varoquaux, Olav Vahtras) +- Code put up to date for more recent versions of project (Pierre de + Buyl, Emmanuelle Gouillart, Gert-Ludwig Ingold, Nicolas Pettiaux, Olav + Vahtras, Gaël Varoquaux, Nelle Varoquaux) +- Matplotlib updated with removal of deprecated pylab interface (Nicolas + Rougier) + +## Release 2013.2 (21 August 2013) + +- NumPy chapter simplified (Valentin Haenel) +- New layout for the HTML rendering (Gaël Varoquaux) + +## Release 2013.1 (10 Feb 2013) + +- Improvements to the advanced image manipulation chapter (Emmanuelle Gouillart) +- Upgrade of the introductory language chapter (Valentin Haenel) +- Upgrade of the introductory numpy chapter (Valentin Haenel) +- New advanced chapter on interfacing with C (Valentin Haenel) +- Minor fixes and improvements in various places (Robert Gieseke, Ozan Çağlayan, + Sergio Oller, kikocorreo, Valentin Haenel) + +## Release 2012.3 (26 Nov 2012) + +This release integrates the changes written for the Euroscipy conference: + +- Matplotlib chapter completely redone (Nicolas Rougier, Gaël Varoquaux) +- New advanced chapter on mathematical optimization (Gaël Varoquaux) +- Mayavi chapter redone (Gaël Varoquaux) +- Front page layout slightly improved: folding TOC (Gaël Varoquaux) + +## Release 2012.2 (22 Jun 2012) + +Minor release with a few clean ups (Gael Varoquaux). + +## Release 2012.1 (20 Jun 2012) + +This is a minor release with many clean ups. In particular, clean up of +the layout (Gael Varoquaux), shortening of the numpy chapters and +deduplications across the intro and advanced chapters (Gael Varoquaux) +and doctesting of all the code (Gael Varoquaux). + +## Release 2012.0 (22 Apr 2012) + +This is a minor release with a few clean ups. In particular, clean up the +scikit-learn chapter (Lars Buitinck), more informative section titles +(Gael Varoquaux), and misc fixes (Valentin Haenel, Virgile Fritsch). + +## Release 2011.1 (16 Oct 2011) + +This release is a reworked version of the Euroscipy 2011 tutorial. Layout +has been cleaned and optimized (Valentin Haenel and many others), the Traits +chapter has been merged in (Didrik Pinte) + +## Release 2011 (1 Sept 2011) + +This release is used for the Euroscipy 2011 tutorial. The numpy +introductory chapter has been rewamped (Pauli Virtanen). The outline of +the introductory chapters has been simplified (Gaël Varoquaux). Advanced +chapters have been added: advanced Python constructs (Zbigniew +Jędrzejewski-Szmek), debugging code (Gaël Varoquaux), optimizing code +(Gaël Varoquaux), image processing (Emmanuelle Gouillart), scikit-learn +(Fabian Pedregosa). diff --git a/CHANGES.rst b/CHANGES.rst deleted file mode 100644 index a3a7cd8bd..000000000 --- a/CHANGES.rst +++ /dev/null @@ -1,261 +0,0 @@ -What's new -========== - -Release 2024.1 (April 2024) ---------------------------- - -- Python 3.10, 3.11, 3.12 - -- Renamed Scientific Python Lectures - -- Removed old content - -- Major updates to support recent packages - -- Updates to the SciPy and scikit-image chapters - - -Release 2022.1 (August 2022) ----------------------------- - -* Replace scikit-learn housing example with California data (Marco Mangan) - -* Fix links and typos (Zachary Moon, Tim Gates, Marco Mangan, Gert-Ludwig Ingold) - -* Fix fftpack figure (Osayd Abdu) - -* Update software version (Pierre de Buyl) - -Release 2020.2 (September 2020) -------------------------------- - -* Replace image i/o from scipy.misc by imageio (Pierre de Buyl) - -* Update information on dict ordering (Bharath Saiguhan) - -* Suppress warnings for mandelbrot example (Pierre de Buyl) - -* Update NumPy introduction and advanced usage for changes to NumPy: wording, bytes - representation, floating point argument to np.zeros (Ross Barnowski) - -* Fix links to NumPy documentation to use numpy.org (Ross Barnowski) - -* Update note on transposed arrays (Ross Barnowski with Eric Wieser) - -* Use generated figure file for lidar data processing (Lawrence Chan) - -* Update link from PyMC2 to PyMC3 (B. Hohl) - -* Fix transparent popup menu to have a background (Pierre de Buyl) - - -Release 2020.1 (March 2020) ------------------------------ - -* Fix outdated URLs (Gert-Ludwig Ingold) - -* Update packages (Pierre de Buyl) - -* Remove Python 2 continuous integration (Olav Vahtras - EuroSciPy 2019 sprint) - -* Fix chessboard size (Mark Setchell) - -* Add objectives and design choices (Gert-Ludwig Ingold and Pierre de Buyl) - -* Make the numpy advanced iterator example more elaborate (Sebastian Berg) - -* Use empty list instead of empty tuple to deactivate ticks (Tim Hoffmann) - -* Fix typos (Sander van Rijn, cydave, Michel Corne) and off by 2 errors - (Andreas Hilboll) - -* Improve readability of Polynomials example code (Michel Corne) - -* Replace suggestions for debugging environments (Gert-Ludwig Ingold) - -* Add section on Python 2 vs Python 3 (Pierre de Buyl) - - -Release 2019.1 (May 2019) -------------------------- - -* Update matplotlib compatibility to version 2.2 (Mike Mueller, Joris Van den - Bossche, Pierre de Buyl) - -* Make C-API example cos_module_np Python 2/3 compatible (Michael Boyle) - -* Fix typos and outdated URLs (Dogacan Dugmeci, Matthieu Boileau, Stuart Geiger, Omar - Gutiérrez, Himanshu, Julian Hofer, Joseph Salmon, Manuel López-Ibáñez, - Nicola Masarone, michelemaroni89, Evgeny Pogrebnyak, tommyod) - - -Release 2018.1 (September 2018) -------------------------------------- - -* Fix wordings, typos, colours (Pierre de Buyl, Greg Kiar, Olav Vahtras - Kristian Rother) - -* Fix interpolation example code (Scott Staniewicz) - -* Fix CSS for high density displays (Gaël Varoquaux) - -* Generate indexing figures with PyX (Gert Ingold) - -* Warn clearly against the use of Python 2 (Bruno Hanzen) - -* Update external links (Bruno Hanzen) - -* Update versions of dependencies: sphinx-gallery, pandas, statsmodels - (Gaël Varoquaux) - - -Release 2017.1 (October 2017) -------------------------------------- - -* Update optimization chapter (Michael Hartmann, Gaël Varoquaux) - -* Update SymPy chapter (Vince Knight) - -* Update advanced NumPy (Bartosz Teleńczuk) - -* Update scikit-learn chapter (Gaël Varoquaux) - -* Update SciPy chapter (Gaël Varoquaux) - -* Make '>>>' in the prompts unselectable (Pierre de Buyl) - -* Use common package requirements for pip and conda and improve the build - instructions (Gert-Ludwig Ingold, Vince Knight, Pierre de Buyl) - -* Set up Circle CI (Loïc Estève) - -* Improved support for Python 3 integer divisions and calls to print (Loïc - Estève, Gert-Ludwig Ingold, Pierre de Buyl, Gaël Varoquaux) - -* Change test runner to pytest (Pierre de Buyl) - -* Replace the plot directive by sphinx-gallery (Gert-Ludwig Ingold) - -Release 2016.1 (September 2016) -------------------------------------- - -* Rework of intro chapter (Gaël Varoquaux) - -* Integrate sphinx-gallery: examples are now Jupyter notebooks (Gaël - Varoquaux, Gert-Ludwig Ingold, Óscar Nájera) - -* Better Python 3 tests and support (Gert-Ludwig Ingold) - -* Adapt examples to Matplotlib 1.5 (Gaël Varoquaux) - -* Modernize numpy chapter (Bartosz Telenczuk) - -Release 2015.3 (November 2015) -------------------------------------- - -* Collapsed sidebar can now pop up for mid-sized display (Gaël Varoquaux) - -* Replaced pictures of Lena by raccoon face (Thouis Jones) - -Release 2015.2 (October 2015) -------------------------------------- - -* Authors on cover ordered as in bibtex entry (Nicolas Rougier) - -* Better rendering on mobile (Gaël Varoquaux) - -* Fix restructured text markup errors (Olav Vahtras) - -Release 2015.1 (September 2015) -------------------------------------- - -* New chapter on statistics with Python (Gaël Varoquaux) - -* Better layout in PDF (Gaël Varoquaux) - -* New HTML layout, simplified formatting, mobile-friendly and sidebar - (Gaël Varoquaux, Nelle Varoquaux) - -* Logos on the HTML front page and on the PDF cover (Nicolas Rougier) - -* Python 3 compatible code (Gaël Varoquaux, Olav Vahtras) - -* Code put up to date for more recent versions of project (Pierre de - Buyl, Emmanuelle Gouillart, Gert-Ludwig Ingold, Nicolas Pettiaux, Olav - Vahtras, Gaël Varoquaux, Nelle Varoquaux) - -* Matplotlib updated with removal of deprecated pylab interface (Nicolas - Rougier) - -Release 2013.2 (21 August 2013) -------------------------------------- - -* NumPy chapter simplified (Valentin Haenel) - -* New layout for the HTML rendering (Gaël Varoquaux) - -Release 2013.1 (10 Feb 2013) ----------------------------- - -* Improvements to the advanced image manipulation chapter (Emmanuelle Gouillart) - -* Upgrade of the introductory language chapter (Valentin Haenel) - -* Upgrade of the introductory numpy chapter (Valentin Haenel) - -* New advanced chapter on interfacing with C (Valentin Haenel) - -* Minor fixes and improvements in various places (Robert Gieseke, Ozan Çağlayan, - Sergio Oller, kikocorreo, Valentin Haenel) - - -Release 2012.3 (26 Nov 2012) ----------------------------- - -This release integrates the changes written for the Euroscipy conference: - -* Matplotlib chapter completely redone (Nicolas Rougier, Gaël Varoquaux) - -* New advanced chapter on mathematical optimization (Gaël Varoquaux) - -* Mayavi chapter redone (Gaël Varoquaux) - -* Front page layout slightly improved: folding TOC (Gaël Varoquaux) - -Release 2012.2 (22 Jun 2012) ----------------------------- - -Minor release with a few clean ups (Gael Varoquaux). - -Release 2012.1 (20 Jun 2012) ----------------------------- - -This is a minor release with many clean ups. In particular, clean up of -the layout (Gael Varoquaux), shortening of the numpy chapters and -deduplications across the intro and advanced chapters (Gael Varoquaux) -and doctesting of all the code (Gael Varoquaux). - -Release 2012.0 (22 Apr 2012) ----------------------------- - -This is a minor release with a few clean ups. In particular, clean up the -scikit-learn chapter (Lars Buitinck), more informative section titles -(Gael Varoquaux), and misc fixes (Valentin Haenel, Virgile Fritsch). - -Release 2011.1 (16 Oct 2011) ----------------------------- - -This release is a reworked version of the Euroscipy 2011 tutorial. Layout -has been cleaned and optimized (Valentin Haenel and many others), the Traits -chapter has been merged in (Didrik Pinte) - -Release 2011 (1 Sept 2011) ---------------------------- - -This release is used for the Euroscipy 2011 tutorial. The numpy -introductory chapter has been rewamped (Pauli Virtanen). The outline of -the introductory chapters has been simplified (Gaël Varoquaux). Advanced -chapters have been added: advanced Python constructs (Zbigniew -Jędrzejewski-Szmek), debugging code (Gaël Varoquaux), optimizing code -(Gaël Varoquaux), image processing (Emmanuelle Gouillart), scikit-learn -(Fabian Pedregosa). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..94eb0e598 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,270 @@ +# Contributing + +The Scientific Python Lectures are a community-based effort and require +constant maintenance and improvements. New contributions such as wording +improvements or inclusion of new topics are welcome. + +To propose bugfixes or straightforward improvements to the lectures, see the +contribution guide below. + +For new topics, read the objectives first and [open an issue on the GitHub +project](https://github.com/scipy-lectures/scientific-python-lectures/issues) to +discuss it with the editors. + +## Objectives and design choices for the lectures + +Contributors should keep the following objectives and design choices of +the Scientific Python Lectures in mind. + +Objectives: + +- Provide a self-contained introduction to Python and its primary computational + packages, the ”Scientific Python stack“. +- Provide tutorials for a selection of widely-used and stable computational + libraries. + Currently, we cover Pandas, Statmodels, some of Seaborn, Scikit-image, + Scikit-learn, and Sympy. +- We would like to apply automated testing to the code examples as much as + possible. + +Design choices: + +- Each chapter should provide a useful basis for a 1‒2 h tutorial. +- The code should be readable. +- An idiomatic style should be followed, e.g. `import numpy as np`, + preference for array operations, PEP8 coding conventions. + +## Contributing guide + +The directory `guide` contains instructions on how to contribute: + +[Contribution guide](guide) + +## Building instructions + +To generate the html output for on-screen display, Type: + +``` +make html +``` + +the generated html files can be found in `build/html` + +The first build takes a long time, but information is cached and +subsequent builds will be faster. + +### Requirements + +Build requirements are listed in the +{download}`requirements file `: + +```{literalinclude} requirements.txt + +``` + +Ensure that you have a [virtual environment](https://docs.python.org/3/library/venv.html) or conda environment +set up, then install requirements with: + +``` +pip install -r requirements.txt +``` + +Note that you will also need the following system packages: + +- Python C development headers (the `python3-dev` package on Debian, e.g.), +- a C compiler like gcc, +- [GNU Make](https://www.gnu.org/software/make/), +- a full LaTeX distribution such as [TeX Live](https://www.tug.org/texlive/) (`texlive-latex-base`, + `texlive-latex-extra`, `texlive-fonts-extra`, and `latexmk` + on Debian/Ubuntu), +- [dvipng](http://savannah.nongnu.org/projects/dvipng/), +- [latexmk](https://personal.psu.edu/~jcc8/software/latexmk/), +- [git](https://git-scm.com/). + +### Updating the cover + +Use Inkscape to modify the cover in `images/`, then export to PDF: + +``` +inkscape --export-filename=cover-2025.pdf cover-2025.svg +``` + +Ensure that the `images/cover.pdf` symlink points to the correct +file. + +## Notes and admonitions + +Use `:::` for +`
` blocks ([JupyterBook allows +this](https://jupyterbook.org/en/stable/content/content-blocks.html#markdown-friendly-directives-with)): +So, for example, prefer: + +``` +::: {note} + +My note + +::: +``` + +to the more standard Myst markup of: + +```` + +``` {note} + +My note + +``` + +```` + +Note the `region` and `endregion` markup in the second form; this makes more +sure that Jupytext does not confuse the `{note}` with a code block. One of the +advantages of the `:::` markup is that you don't need these `#region` +demarcations. + +For the same reason, prefer the `:::` form for other content blocks, such as +warnings and admonitions. For example, prefer: + +``` +::: {admonition} A custom title + +My admonition + +::: +``` + +## Exercises and solutions + +We use [sphinx-exercise](https://ebp-sphinx-exercise.readthedocs.io) for the exercises and solutions. + +Mark _all_ exercises and solutions with [gated +markers](https://ebp-sphinx-exercise.readthedocs.io/en/latest/syntax.html#alternative-gated-syntax), +like this: + +``` +::: {exercise-start} +:label: my-exercise-label +:class: dropdown +::: + +My exercise. + +::: {exercise-end} +::: + +::: {solution-start} my-exercise-label +:class: dropdown +::: + +My solution. + +::: {solution-end} +::: +``` + +The gated markers (of form `solution-start` and `solution-end` etc) allow you +to embed code cells in the exercise or solution, because this allows code cells +to be at the top level of the notebook, where Jupyter needs them to be. + +The gated markers also make it possible to for the `process_notebooks.py` +script to recognize exercise and solutions blocks, to parse them correctly. + +(notebook-processing)= + +## A note on processing + +The pages are designed both as pages for pretty HTML output, and to be used as +interactive notebooks in e.g. JupyterLite. + +There is some markup that we need for the pretty HTML output that looks ugly in +a Jupyter interface such as [JupyterLite](https://jupyterlite.readthedocs.io). +To deal with this in part, we install the +[jupyterlab_myst](https://github.com/jupyter-book/jupyterlab-myst) extension by +default, so that Myst markup (mostly) appears as it should inside JupyterLab +when opened as a notebook. Another difference we want to see between the HTML +and the notebook version is that we want to avoid putting the solutions in the +notebook version, to allow more space for thought about the exercise. Both to +modify any ugly formatting, and to remove the exercise solutions, we +post-process the pages with a script `_scripts/process_notebooks.py` to load +the pages as text notebooks, and write out `.ipynb` files with modified markup +that looks better in a Jupyter interface. Some of the authoring advice here is +to allow that process to work smoothly, because the `process_notebooks.py` file +reads the input Myst-MD format notebooks using +[Jupytext](https://jupytext.readthedocs.io) before converting to Jupyter +`.ipynb` files. + +## Tests + +There may well be cases where you will want to put cells in the rendered +notebook that test values, as part of the exposition. For example, from the +`intro/scipy/index.md` notebook / page: + +```` + +```{code-cell} +A_upper = np.triu(A) +A_upper +``` + +```{code-cell} +np.allclose(sp.linalg.solve_triangular(A_upper, b, lower=False), + sp.linalg.solve(A_upper, b)) +``` +```` + +Notice that, in this case, we do want the reader to see that test, as part of +the exposition. + +However, there are cases where the test would be useful, to, for example, +detect changes in the output over versions of the packages being used. We want +to avoid the situation where the text says one thing, but the values contradict +it. But we may not want the reader to have to read such tests as part of the +exposition. Here, for example, is a test from the `intro/scipy/index.md` +notebook: + +```` + +```{code-cell} +log_a = sp.special.gammaln(500) +log_b = sp.special.gammaln(499) +log_res = log_a - log_b +res = np.exp(log_res) +res +``` + +```{code-cell} +:tags: [remove-cell, test] +assert np.allclose(res, 499) +``` + +```` + +Note that the test confirms that Scipy is still giving the output implied in +the text. Note too that we have given the testing code cell the tag +`remove-cell`. This drops the cell from the HTML output, and our +[post-processing of the notebooks](notebook-processing) also drops these cells, +so someone opening the notebook in e.g. JupyterLite will not see them. +Accordingly, please make sure you are not defining anything in these test cells +that the notebook will need in later cells. + +Be judicious — testing the output of `np.ones(3)` is probably not useful +— Numpy would have to break in order for that test to fail. + +## Development + +Run this once, in the repository directory: + +``` +pip install pre_commit +pre-commit install +``` + +Before each commit that you will push: + +``` +pre-commit run --all +``` + +Among other things, this runs the `codespell` check, also run by CI. diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst deleted file mode 100644 index 7e5694afb..000000000 --- a/CONTRIBUTING.rst +++ /dev/null @@ -1,106 +0,0 @@ -Contributing -============= - -The Scientific Python Lectures are a community-based effort and require -constant maintenance and improvements. New contributions such as wording -improvements or inclusion of new topics are welcome. - -To propose bugfixes or straightforward improvements to the lectures, see the -contribution guide below. - -For new topics, read the objectives first and `open an issue on the GitHub -project `_ to -discuss it with the editors. - - -Objectives and design choices for the lectures ----------------------------------------------- - -Contributors should keep the following objectives and design choices of -the Scientific Python Lectures in mind. - -Objectives: - -* Provide a self-contained introduction to Python and its primary computational - packages, the ”Scientific Python stack“. -* Provide tutorials for a selection of widely-used and stable computational - libraries. - Currently, we cover pandas, statmodels, seaborn, scikit-image, - scikit-learn, and sympy. -* Automated testing is applied to the code examples as much as possible. - -Design choices: - -* Each chapter should provide a useful basis for a 1‒2 h tutorial. -* The code should be readable. -* An idiomatic style should be followed, e.g. ``import numpy as np``, - preference for array operations, PEP8 coding conventions. - - -Contributing guide ------------------- - -The directory ``guide`` contains instructions on how to contribute: - -.. topic:: **Example chapter** - - .. toctree:: - - guide/index.rst - -Building instructions ----------------------- - -To generate the html output for on-screen display, Type:: - - make html - -the generated html files can be found in ``build/html`` - -The first build takes a long time, but information is cached and -subsequent builds will be faster. - -To generate the pdf file for printing:: - - make pdf - -The pdf builder is a bit difficult and you might have some TeX errors. -Tweaking the layout in the ``*.rst`` files is usually enough to work -around these problems. - -Requirements -............ - -Build requirements are listed in the -:download:`requirements file `: - -.. literalinclude:: requirements.txt - -Ensure that you have a `virtual environment -`__ or conda environment -set up, then install requirements with:: - - pip install -r requirements.txt - -Note that you will also need the following system packages: - - - Python C development headers (the `python3-dev` package on Debian, e.g.), - - a C compiler like gcc, - - `GNU Make `__, - - a full LaTeX distribution such as `TeX Live - `__ (``texlive-latex-base``, - ``texlive-latex-extra``, ``texlive-fonts-extra``, and ``latexmk`` - on Debian/Ubuntu), - - `dvipng `__, - - `latexmk `__, - - `git `__. - -Updating the cover -.................. - -Use inkscape to modify the cover in ``images/``, then export to PDF:: - - inkscape --export-filename=cover-2025.pdf cover-2025.svg - -Ensure that the ``images/cover.pdf`` symlink points to the correct -file. diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 000000000..7a6dc9b1a --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,9 @@ +# License + +All code and material is licensed under a + +Creative Commons Attribution 4.0 International License (CC-by) + + + +See the [AUTHORS](AUTHORS.md) file for a list of contributors. diff --git a/LICENSE.rst b/LICENSE.rst deleted file mode 100644 index c59ed1103..000000000 --- a/LICENSE.rst +++ /dev/null @@ -1,10 +0,0 @@ -License -======== - -All code and material is licensed under a - -Creative Commons Attribution 4.0 International License (CC-by) - -https://creativecommons.org/licenses/by/4.0/ - -See the AUTHORS.rst file for a list of contributors. diff --git a/Makefile b/Makefile index 2b22cc8b0..79b13fccb 100644 --- a/Makefile +++ b/Makefile @@ -1,159 +1,44 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -PYTHON = python -SPHINXOPTS = -SPHINXBUILD = $(PYTHON) -m sphinx - -ALLSPHINXOPTS = -d build/doctrees $(SPHINXOPTS) . - -TAG ?= HEAD - -SSH_HOST= -SSH_USER= -SSH_TARGET_DIR= - -SHELL := /bin/bash - -.PHONY: help clean html web pickle htmlhelp latex changes linkcheck zip check-rsync-env test - -all: html-noplot - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " pickle to make pickle files (usable by e.g. sphinx-web)" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " pdf to make PDF from LaTeX, you can set PAPER=a4 or PAPER=letter" - @echo " changes to make an overview over all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " install to upload to github the web pages" - @echo " zip to create the zip file with examples and doc" - -clean: - -rm -rf build/* - -find . -name __pycache__ -type d | xargs rm -rf - -rm -rf intro/scipy/auto_examples/ intro/matplotlib/auto_examples/ intro/scipy/summary-exercises/auto_examples advanced/mathematical_optimization/auto_examples/ advanced/advanced_numpy/auto_examples/ advanced/image_processing/auto_examples advanced/scipy_sparse/auto_examples packages/3d_plotting/auto_examples packages/statistics/auto_examples/ packages/scikit-image/auto_examples/ packages/scikit-learn/auto_examples intro/numpy/auto_examples guide/auto_examples - -rm -f data/test.png face.png face.raw file.mat fname.png local_logo.png mandelbrot.png output.txt output2.txt plot.png pop.npy pop2.txt random_00.png random_01.png random_02.png random_03.png random_04.png random_05.png random_06.png random_07.png random_08.png random_09.png red_elephant.png test.png tiny_elephant.png workfile - -rm -f ScientificPythonLectures-simple.pdf ScientificPythonLectures.pdf - -rm -f advanced/image_processing/examples/face.png - -test: - $(PYTHON) -m pytest --doctest-glob '*.rst' - -test-stop-when-failing: - $(PYTHON) -m pytest -x --doctest-glob '*.rst' - -html-noplot: - $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) build/html - @echo - @echo "Build finished. The HTML pages are in build/html." +PYTHON ?= python +PIP_INSTALL_CMD ?= $(PYTHON) -m pip install +BUILD_DIR=_build/html +JL_DIR=_build/jl html: - mkdir -p build/html build/doctrees - # This line makes the build a bit more lengthy, and the - # the embedding of images more robust - rm -rf build/html/_images - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html - @echo - @echo "Build finished. The HTML pages are in build/html." - -html-scipy: export DOMAIN=scipy-lectures.org -html-scipy: html + # Check for ipynb files in source (should all be text - .md or .Rmd). + if compgen -G "*.ipynb" 2> /dev/null; then (echo "ipynb files" && exit 1); fi + jupyter-book build -W . + +jl: + # Jupyter-lite files for book build. + $(PIP_INSTALL_CMD) -r jl-build-requirements.txt + rm -rf $(JL_DIR) + mkdir $(JL_DIR) + cp -r data images $(JL_DIR) + $(PYTHON) _scripts/process_notebooks.py $(JL_DIR) + $(PYTHON) -m jupyter lite build \ + --contents $(JL_DIR) \ + --output-dir $(BUILD_DIR)/interact \ + --lite-dir $(JL_DIR) + +lint: + pre-commit run --all-files --show-diff-on-failure --color always + +web: html jl + +github: web + ghp-import -n _build/html -p -f + +clean: rm-ipynb + rm -rf _build + -find . -name ".ipynb_checkpoints" -exec rm -rf {} \; + -find . -name "joblib" -exec rm -rf {} \; + +rm-ipynb: + find . -name "*.ipynb" -exec rm {} \; -cleandoctrees: - rm -rf build/doctrees - -pickle: - mkdir -p build/pickle build/doctrees - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle - @echo - @echo "Build finished; now you can process the pickle files or run" - @echo " sphinx-web build/pickle" - @echo "to start the sphinx-web server." - -web: pickle - -htmlhelp: - mkdir -p build/htmlhelp build/doctrees - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in build/htmlhelp." - -latex: cleandoctrees - mkdir -p build/latex build/doctrees - cp intro/scipy/index.rst{,.bak} - sed -i '/^ solutions.rst/d' intro/scipy/index.rst - $(SPHINXBUILD) -b $@ $(ALLSPHINXOPTS) build/latex - mv intro/scipy/index.rst{.bak,} - @echo - @echo "Build finished; the LaTeX files are in build/latex." - @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ - "run these through (pdf)latex." - -latexpdf: latex - $(MAKE) -C build/latex all-pdf - -changes: - mkdir -p build/changes build/doctrees - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes - @echo - @echo "The overview file is in build/changes." - -linkcheck: - mkdir -p build/linkcheck build/doctrees - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in build/linkcheck/output.txt." - -pdf: latex - cd build/latex ; make all-pdf ; pdfjam --outfile ScientificPythonLectures-nup.pdf --nup 2x1 --landscape ScientificPythonLectures.pdf - cp build/latex/ScientificPythonLectures.pdf ScientificPythonLectures-simple.pdf - cp build/latex/ScientificPythonLectures-nup.pdf ScientificPythonLectures.pdf - -zip: clean html pdf - mkdir -p build/scipy_lecture_notes ; - cp ScientificPythonLectures.pdf ScientificPythonLectures-simple.pdf build/html/_downloads/ - cp -r data build/html/ - cd build/html ; zip -r ../scientific-python-lectures-html-$(TAG).zip . - cp ScientificPythonLectures.pdf build/ ; - git archive -o build/scientific-python-lectures-source-$(TAG).zip --prefix scientific-python-lectures-$(TAG)/ $(TAG) - -# This target is used to deploy to the old location: scipy-lectures.org -# The site is now hosted via Netlify at https://lectures.scientific-python.org -install: cleandoctrees html-scipy pdf - rm -rf build/scipy-lectures.github.com - cp ScientificPythonLectures.pdf ScientificPythonLectures-simple.pdf build/html/_downloads/ - cd build/ && \ - git clone --no-checkout --depth 1 git@github.com:scipy-lectures/scipy-lectures.github.com.git && \ - cp -r html/* scipy-lectures.github.com && \ - cd scipy-lectures.github.com && \ - echo -n 'scipy-lectures.org' > CNAME && \ - touch .nojekyll && \ - git add * .nojekyll && \ - git commit -a -m 'Make install' && \ - git push - -rsync_upload: check-rsync-env cleandoctrees html pdf - cp ScientificPythonLectures-simple.pdf ScientificPythonLectures.pdf build/html/_downloads/ - rsync -P -auvz --delete build/html/ $(SSH_USER)@$(SSH_HOST):$(SSH_TARGET_DIR)/ - -check-rsync-env: -ifndef SSH_TARGET_DIR - $(error SSH_TARGET_DIR is undefined) -endif -ifndef SSH_HOST - $(error SSH_HOST is undefined) -endif - -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) build/epub - @echo - @echo "Build finished. The epub file is in build/epub." +test: + pytest . -contributors: - git shortlog -sn 2>&1 | awk '{print $$NF, $$0}' | sort | cut -d ' ' -f 2- | sed "s/^ *[0-9][0-9]* /\n- /" +compare-optimizers: + ( cd advanced/mathematical_optimization/helper && \ + python compare_optimizers.py ) diff --git a/PRESENTING.txt b/PRESENTING.txt index 51d816701..66d2ff300 100644 --- a/PRESENTING.txt +++ b/PRESENTING.txt @@ -1,7 +1,7 @@ Here is the way I (Gael) tend to present these course is to use the html -output created by Sphinx and display it in a fullscreen browser. On top -of that I use 'yeahconsole' to type in the examples (it stays nicely on -top of the browser, in an area where everybody can see it, even in a -crowded room). I use the accompanying shell script to start yeahconsole: -it defines the right font size, and 'Ctr-Alt-Y' to show/hide the console. +output from the book build and display it in a fullscreen browser. On top of +that I use 'yeahconsole' to type in the examples (it stays nicely on top of the +browser, in an area where everybody can see it, even in a crowded room). I use +the accompanying shell script to start yeahconsole: it defines the right font +size, and 'Ctr-Alt-Y' to show/hide the console. diff --git a/README.md b/README.md new file mode 100644 index 000000000..2c0be2694 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +![https://dx.doi.org/10.5281/zenodo.594102](https://zenodo.org/badge/doi/10.5281/zenodo.594102.svg) + +![https://github.com/scipy-lectures/scientific-python-lectures/actions?query=workflow%3A%22test%22](https://github.com/scipy-lectures/scientific-python-lectures/workflows/test/badge.svg?branch=main) + +# Scientific Python Lectures + +This repository gathers some lectures on the scientific Python +ecosystem that can be used for a full course of scientific computing with +Python. + +These documents are written in Markdown and built using [Jupyter Book version +1](https://jupyterbook.org/en/stable/intro.html), which, in turn, uses the +[Sphinx](https://www.sphinx-doc.org) engine. + +You can view the online version at: + +## Reusing and distributing + +As stated in the `LICENSE.md` file, this material comes with no strings +attached. Feel free to reuse and modify for your own teaching purposes. + +However, we would like this reference material to be improved over time, +thus we encourage people to contribute back changes. These will be +reviewed and edited by the original authors and the editors. + +## Building and contributing + +The file `CONTRIBUTING.md` contains instructions to build from source +and to contribute. diff --git a/README.rst b/README.rst deleted file mode 100644 index 78cf9b5fb..000000000 --- a/README.rst +++ /dev/null @@ -1,34 +0,0 @@ -.. image:: https://zenodo.org/badge/doi/10.5281/zenodo.594102.svg - :target: https://dx.doi.org/10.5281/zenodo.594102 - -.. image:: https://github.com/scipy-lectures/scientific-python-lectures/workflows/test/badge.svg?branch=main - :target: https://github.com/scipy-lectures/scientific-python-lectures/actions?query=workflow%3A%22test%22 - -========================== -Scientific Python Lectures -========================== - -This repository gathers some lectures on the scientific Python -ecosystem that can be used for a full course of scientific computing with -Python. - -These documents are written with the rest markup language (``.rst`` -extension) and built using `Sphinx `_. - -You can view the online version at: https://lectures.scientific-python.org/ - -Reusing and distributing -------------------------- - -As stated in the ``LICENSE.rst`` file, this material comes with no strings -attached. Feel free to reuse and modify for your own teaching purposes. - -However, we would like this reference material to be improved over time, -thus we encourage people to contribute back changes. These will be -reviewed and edited by the original authors and the editors. - -Building and contributing --------------------------- - -The file ``CONTRIBUTING.rst`` contains instructions to build from source -and to contribute. diff --git a/_config.yml b/_config.yml new file mode 100644 index 000000000..4dd0a2a23 --- /dev/null +++ b/_config.yml @@ -0,0 +1,148 @@ +# Book settings +title: "Scientific Python Lectures" +author: Scientific Python developers +copyright: "2025" +logo: images/sp_lectures.png +email: matthew.brett@gmail.com +# >- starts a multiline string, where newlines replaced by spaces, and final +# newlines are stripped. +description: >- + One document to learn numerics, science, and data with Python + +execute: + # 'cache' attempts to cache the results. + # 'auto' appears to be safer. + execute_notebooks: cache + timeout: 180 + +exclude_patterns: + - README.md + - CHANGES.md + - LICENSE.md + - CONTRIBUTING.md + - todo.md + - _scripts/* + - _notes/* + - _to_ignore.md + - data/LICENSE.txt + - .pytest_cache/* + - .ipynb_checkpoints/* + - todo.md + +html: + favicon: images/sp_lectures.png + home_page_in_navbar: false + use_edit_page_button: true + use_repository_button: true + use_issues_button: true + baseurl: https://lectures.scientific-python.org + +repository: + url: https://github.com/scipy-lectures/scientific-python-lectures + branch: main + +launch_buttons: + # The interface interactive links will activate ["classic", "jupyterlab"] + notebook_interface: "jupyterlab" + # The URL of the JupyterHub (e.g., https://datahub.berkeley.edu) + # jupyterhub_url: "https://ds.lis.2i2c.cloud" + # Example jupyterhub link: + # https://ds.lis.2i2c.cloud/hub/user-redirect/git-pull?repo=https%3A//github.com/lisds/textbook&urlpath=lab/tree/textbook/code-basics/variables_intro.Rmd&branch=main + # The URL of the BinderHub (e.g., https://mybinder.org) + # binderhub_url: "https://mybinder.org" + # Jupyterlite URL + jupyterlite_url: "/scipy-lecture-notes/interact/lab/index.html" + # Extension (if different from source file). + jupyterlite_ext: ".ipynb" + # Example jupyterlite link: + # https://pxr687.github.io/ASPP_pandas_tutorials/interact/lab/index.html?path=variables_intro.ipynb + # The URL of Google Colab (e.g., https://colab.research.google.com) + # colab_url: "https://colab.research.google.com" + # thebe: true + +sphinx: + recursive_update: true + config: + nb_custom_formats: + .md: + - jupytext.reads + - fmt: md:myst + intersphinx_mapping: + python: + - "https://docs.python.org/3/" + - null + numpy: + - "https://numpy.org/doc/stable/" + - null + scipy: + - "https://docs.scipy.org/doc/scipy/" + - null + matplotlib: + - "https://matplotlib.org/stable/" + - null + sklearn: + - "https://scikit-learn.org/stable/" + - null + sphinx: + - "https://www.sphinx-doc.org/en/master/" + - null + pandas: + - "https://pandas.pydata.org/pandas-docs/stable/" + - null + seaborn: + - "https://seaborn.pydata.org/" + - null + skimage: + - "https://scikit-image.org/docs/stable/" + - null + statsmodels: + - "https://www.statsmodels.org/stable/" + - null + imageio: + - "https://imageio.readthedocs.io/en/stable/" + - null + + extra_extensions: + # For documenting 'click' Python CLIs + # - sphinx_click.ext + # Directive for creating tab panels in pages. + # https://github.com/djungelorm/sphinx-tabs + # - sphinx_tabs.tabs + # A sphinx extension for creating panels in a grid layout or as + # drop-downs. + # - sphinx_panels + # Needed as of 5 Dec 2022 - release of IPython 8.7.0 + # https://github.com/ipython/ipython/issues/13845 + # Fix from: + # https://github.com/spatialaudio/nbsphinx/issues/24#issuecomment-267687633 + # Alternative is to pin install to !=8.7.0 + - IPython.sphinxext.ipython_console_highlighting + - sphinx_exercise + +latex: + latex_documents: + targetname: scientific_python_lectures.tex + +bibtex_bibfiles: + - sp_lectures.bib + +# HTML redirection +# Pages linked, but then renamed. +redirection: + builddir: _build/html + redirects: + # data-types/Ranges: ../arrays/Ranges + +jupyterlite: + in_nb_ext: .md + out_nb_ext: .ipynb + in_nb_fmt: "myst" + remove_remove: true + proc_admonitions: false + +parse: + myst_substitutions: + release: "2025.2rc0.dev0" + clear_floats: | + +
diff --git a/_scripts/examples2nb.py b/_scripts/examples2nb.py new file mode 100755 index 000000000..12f708dcf --- /dev/null +++ b/_scripts/examples2nb.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 +"""Process sphinx-gallery examples in notebook""" + +from argparse import ArgumentParser, RawDescriptionHelpFormatter +import ast +from copy import deepcopy +from functools import reduce +import operator +import re +from pathlib import Path + +import jupytext +import nbformat + + +HEADER = jupytext.reads( + """\ +--- +jupyter: + orphan: true + jupytext: + formats: ipynb,Rmd + text_representation: + extension: .Rmd + format_name: rmarkdown + format_version: '1.2' + jupytext_version: 1.17.1 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +""", + fmt="Rmd", +) + +# New Markdown cell function +NMC = nbformat.versions[HEADER["nbformat"]].new_markdown_cell + +# Default encoding for notebooks and examples. +NB_ENCODING = "utf-8" + + +def get_ref_targets(root_path, nb_ext=".Rmd", excludes=()): + refs = [] + for nb_path in root_path.glob("**/*" + nb_ext): + if nb_path in excludes: + continue + refs += re.findall( + r"^\s*\(\s*([a-zA-Z0-9-_]+)\s*\)=\s*$", + nb_path.read_text(NB_ENCODING), + flags=re.MULTILINE, + ) + return refs + + +FIG_EG_RE = re.compile( + r""" +^(\s*:::+|```)\s*\{(?:figure|image)\}\s* +auto_examples/.*?images/sphx_glr_(?P\w+?)_\d{3}\.png +.*? +\s*\1""", + flags=re.MULTILINE | re.VERBOSE | re.DOTALL, +) + + +def get_eg_stems(nb_path): + """Analyze notebook for references to example output""" + refs = [] + nb = jupytext.read(nb_path) + for cell in nb.cells: + if cell["cell_type"] != "markdown": + continue + for ref in [m.groupdict()["stem"] for m in FIG_EG_RE.finditer(cell["source"])]: + if ref not in refs: + refs.append(ref) + return refs + + +def proc_str(s): + s = s.strip() + lines = s.splitlines() + title = None + if len(lines) > 2 and re.match(r"^[=-]{2,}\s*$", lines[1]): + title = lines[0].strip() + lines = lines[2:] + if len(lines) and lines[0].strip() == "": + lines = lines[1:] + return "\n".join(lines), title + + +def process_example(eg_path, import_lines=None): + import_lines = [] if import_lines is None else import_lines + txt = eg_path.read_text(NB_ENCODING) + nb = jupytext.reads(txt, "py:nomarker") + title = None + # Convert standalone multiline strings to Markdown cells. + out_cells = [] + for cell in nb.cells: + if cell["cell_type"] != "code": + out_cells.append(cell) + continue + body = ast.parse(cell.source).body + # Multiline string. + if ( + len(body) == 1 + and isinstance(body[0], ast.Expr) + and isinstance(body[0].value, ast.Constant) + and isinstance(body[0].value.value, str) + ): + src, cell_title = proc_str(body[0].value.value) + cell["cell_type"] = "markdown" + cell["source"] = src + title = cell_title if title is None else title + out_cells.append(cell) + continue + out_lines = [] + show_cell = False + for L in cell["source"].splitlines(): + sL = L.strip() + if sL.startswith("plt.show"): + show_cell = True + continue + if sL.startswith("import "): + if sL in import_lines: + continue + import_lines.append(sL) + out_lines.append(L) + if out_lines: + cell["source"] = "\n".join(out_lines) + if show_cell: + cell["metadata"] = cell.get("metadata", {}) + cell["metadata"]["tags"] = list( + set(cell["metadata"].get("tags", [])).union(["hide-input"]) + ) + out_cells.append(cell) + nb.cells = out_cells + # Get title from filename if not already found. + if title is None and (m := re.match(r"plot_(.+)\.py", eg_path.name)): + title = m.groups()[0] + return nb, title + + +def get_example_paths(eg_dirs): + return reduce(operator.add, [sorted(Path(d).glob("**/plot_*.py")) for d in eg_dirs]) + + +def process_nb_examples(root_path, nb_path, eg_paths, check_refs=True): + # Get all references (something)= + ref_defs = get_ref_targets(root_path) + # Get all examples. + examples = {} + nb_imp_lines = [] + # Analyze notebook for references to examples + eg_stems = get_eg_stems(nb_path) + + def eg_sorter(pth): + return [eg_stems.index(pth.stem) if pth.stem in eg_stems else len(eg_stems)] + + # Sort examples in notebook order. + eg_paths = sorted(eg_paths, key=eg_sorter) # Relies on stable sort. + + for eg_path in eg_paths: + nb, title = process_example(eg_path, nb_imp_lines) + eg_stem = eg_path.stem + ref = ( + eg_stem + if title is None + else re.sub(r"[^a-zA-Z0-9]+", "-", title).lower().strip("-") + ) + if check_refs and ref in ref_defs: + raise ValueError(f"Reference {ref} already used in project") + examples[eg_stem] = nb, title, ref + # Try to detect possible titles for each reference. + # Run through examples in notebook order + nb_out = deepcopy(HEADER) + cells = nb_out.cells + cells.append(NMC(f"# Examples for {nb_path}")) + for eg_stem in eg_stems: + cells += output_example(eg_stem, examples, header_level=2) + remaining = [s for s in examples if s not in eg_stems] + if remaining: + cells.append(NMC("## Other examples")) + for eg_stem in remaining: + cells += output_example(eg_stem, examples, header_level=3) + return nb_out + + +def output_example(eg_stem, examples, header_level=2): + nb, title, ref = examples[eg_stem] + title = ref.replace("-", " ").title() if title is None else title + return [ + NMC(f"({ref})=\n\n{'#' * header_level} {title}\n\n") + ] + nb.cells + + +def get_parser(): + parser = ArgumentParser( + description=__doc__, # Usage from docstring + formatter_class=RawDescriptionHelpFormatter, + ) + parser.add_argument("nb_file", help="notebook file") + parser.add_argument("--eg-dir", help="path to examples", nargs="*") + parser.add_argument("--root-dir", help="root path to book", default=".") + parser.add_argument("--eg-nb", help="Output notebook filename") + parser.add_argument( + "--no-check-refs", + action="store_true", + help="Do not check if example refs are unique", + ) + return parser + + +def main(): + args = get_parser().parse_args() + # Process inputs and set defaults. + nb_pth = Path(args.nb_file) + if not nb_pth.is_file(): + raise RuntimeError(f"Notebook {nb_pth} is not a file") + if args.eg_dir: + eg_dirs = [Path(f) for f in args.eg_dir] + elif (eg_dir := nb_pth.parent / "examples").is_dir() or ( + eg_dir := nb_pth.parent.parent / "examples" + ).is_dir(): + eg_dirs = [eg_dir] + else: + raise RuntimeError("Cannot find examples directory") + if not (eg_pths := get_example_paths(eg_dirs)): + raise RuntimeError(f"No examples in {eg_dirs}") + eg_nb = ( + Path(args.eg_nb) + if args.eg_nb is not None + else (nb_pth.parent / (nb_pth.stem + "_examples" + nb_pth.suffix)) + ) + # Generate, write examples notebook. + out_nb = process_nb_examples( + Path(args.root_dir), nb_pth, eg_pths, not args.no_check_refs + ) + jupytext.write(out_nb, eg_nb, fmt="rmarkdown") + + +if __name__ == "__main__": + main() diff --git a/_scripts/post_parser.py b/_scripts/post_parser.py new file mode 100755 index 000000000..7e6d1ac2b --- /dev/null +++ b/_scripts/post_parser.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python3 +"""Post-ReST to Myst parser""" + +from argparse import ArgumentParser, RawDescriptionHelpFormatter +from pathlib import Path +import re +import textwrap + + +RMD_HEADER = """\ +--- +jupyter: + jupytext: + formats: ipynb,Rmd + text_representation: + extension: .Rmd + format_name: rmarkdown + format_version: '1.2' + jupytext_version: 1.17.1 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- +""" + + +def process_python_block(lines, tags=()): + if [L.strip().startswith(">>> ") for L in lines if L.strip()][0]: + return process_doctest_block(lines) + return [get_hdr(tags)] + lines[:] + ["```"] + + +_PY_BLOCK = """\ +>>> 7 * 3. +21.0 +>>> 2**10 +1024 +>>> 8 % 3 +2 +""".splitlines() + + +_EXP_PY_BLOCK = [ + "```{python}", + "7 * 3.", + "```", + "", + "```{python}", + "2**10", + "```", + "", + "```{python}", + "8 % 3", + "```", +] + + +def test_process_python_block(): + assert process_python_block(_PY_BLOCK) == _EXP_PY_BLOCK + assert process_doctest_block(_PY_BLOCK) == _EXP_PY_BLOCK + + +IPY_IN = re.compile(r"In \[\d+\]: (.*)$") +IPY_OUT = re.compile(r"Out \[\d+\]: (.*)$") + + +def process_verbatim_block(lines): + out_lines = [] + for line in lines: + if line.strip() in ("@verbatim", ":verbatim:"): + continue + line = IPY_IN.sub(r"\1", line) + line = IPY_OUT.sub(r"\1", line) + out_lines.append(line) + return ["```python", ""] + out_lines + ["```"] + + +_IPY_BLOCK = """\ + In [53]: a = "hello, world!" + In [54]: a[2] = 'z' + --------------------------------------------------------------------------- + Traceback (most recent call last): + File "", line 1, in + TypeError: 'str' object does not support item assignment + + In [55]: a.replace('l', 'z', 1) + Out[55]: 'hezlo, world!' + In [56]: a.replace('l', 'z') + Out[56]: 'hezzo, worzd!' +""".splitlines() + + +_IPY_CONT_RE = re.compile(r"\s*\.{3,}: (.*)$") + + +def process_ipython_block(lines): + text = textwrap.dedent("\n".join(lines)) + if "@verbatim" in text or ":verbatim:" in text: + return process_verbatim_block(text.splitlines()) + out_lines = ["```{python}"] + state = "start" + last_i = len(lines) - 1 + for i, line in enumerate(text.splitlines()): + if state == "start" and line.strip() == "": + continue + if m := IPY_IN.match(line): + if state == "output" and i != last_i: + out_lines += ["```", "", "```{python}"] + state = "code" + out_lines.append(m.groups()[0]) + continue + if state == "code" and (m := _IPY_CONT_RE.match(line)): + out_lines.append(m.groups()[0]) + continue + # In code, but no code input line. + if line.strip(): + state = "output" + return out_lines + ["```"] + + +def test_ipython_block(): + assert process_ipython_block(_IPY_BLOCK) == [ + "```{python}", + 'a = "hello, world!"', + "a[2] = 'z'", + "```", + "", + "```{python}", + "a.replace('l', 'z', 1)", + "```", + "", + "```{python}", + "a.replace('l', 'z')", + "```", + ] + + +_DOCTEST_BLOCK = r""" +>>> a = "hello, world!" +>>> a[3:6] # 3rd to 6th (excluded) elements: elements 3, 4, 5 +'lo,' +>>> a[2:10:2] # Syntax: a[start:stop:step] +'lo o' +>>> a[::3] # every three characters, from beginning to end +'hl r!' +""".splitlines() + + +def get_hdr(tags): + if not tags: + return "```{python}" + joined_tags = ", ".join(f'"{t}"' for t in tags) + return f"```{{python tags=c({joined_tags})}}" + + +def process_doctest_block(lines, tags=()): + if not any(L.strip().startswith(">>> ") for L in lines): + return process_python_block(lines, tags) + lines = textwrap.dedent("\n".join(lines)).splitlines() + cell_hdr = get_hdr(tags) + out_lines = [cell_hdr] + state = "start" + last_i = len(lines) - 1 + for i, line in enumerate(lines): + if state == "start" and line.strip() == "": + continue + if line.startswith(">>> "): + if state == "output" and i != last_i: + out_lines += ["```", "", cell_hdr] + state = "code" + out_lines.append(line[4:]) + continue + if state == "code" and line.startswith("... "): + out_lines.append(line[4:]) + continue + state = "output" + return out_lines + ["```"] + + +def test_doctest_block(): + assert process_doctest_block(_DOCTEST_BLOCK) == [ + "```{python}", + 'a = "hello, world!"', + "a[3:6] # 3rd to 6th (excluded) elements: elements 3, 4, 5", + "```", + "", + "```{python}", + "a[2:10:2] # Syntax: a[start:stop:step]", + "```", + "", + "```{python}", + "a[::3] # every three characters, from beginning to end", + "```", + ] + + +def process_eval_rst_block(lines): + return [textwrap.dedent("\n".join(lines))] + + +_EVAL_RST_BLOCK = """\ +```{eval-rst} +.. ipython:: + + In [1]: a = [1, 2, 3] + + In [2]: b = a + + In [3]: a + Out[3]: [1, 2, 3] + + In [4]: b + Out[4]: [1, 2, 3] + + In [5]: a is b + Out[5]: True + + In [6]: b[1] = 'hi!' + + In [7]: a + Out[7]: [1, 'hi!', 3] +``` +""".splitlines() + + +def test_ipython_block_in_rst(): + assert parse_lines(_EVAL_RST_BLOCK) == [ + "```{python}", + "a = [1, 2, 3]", + "b = a", + "a", + "```", + "", + "```{python}", + "b", + "```", + "", + "```{python}", + "a is b", + "```", + "", + "```{python}", + "b[1] = 'hi!'", + "a", + "```", + ] + + +STATE_PROCESSOR = { + "python-block": process_python_block, + "ipython-block": process_ipython_block, + "doctest-block": process_doctest_block, + "eval-rst-block": process_eval_rst_block, +} + + +def parse_lines(lines): + parsed_lines = [] + state = "default" + block_lines = [] + for i, line in enumerate(lines): + if state == "default": + if re.match(r"```\s*\{eval-rst\}\s*$", line): + if re.match(r"\.\.\s+ipython::", lines[i + 1]): + state = "ipython-block-header" + else: + state = "eval-rst-block" + # Remove all eval-rst blocks. + continue + LS = line.strip() + if LS == "```": + state = "python-block" + continue + if LS == "```pycon": + state = "doctest-block" + continue + if LS.startswith("```"): + state = "other-block" + directive = line + continue + if state == "ipython-block-header": + # Drop ipython line + state = "ipython-block" + continue + if state.endswith("block"): + if line.strip() != "```": + block_lines.append(line) + continue + parsed_lines += ( + STATE_PROCESSOR[state](block_lines) + if state in STATE_PROCESSOR + else [directive] + block_lines + [line] + ) + block_lines = [] + state = "default" + continue + parsed_lines.append(line) + + return parsed_lines + + +def strip_content(lines): + text = "\n".join(lines) + text = re.sub(r"^\.\.\s+currentmodule:: .*\n", "", text, flags=re.MULTILINE) + text = re.sub(r"\s+#\s*doctest:.*$", "", text, flags=re.MULTILINE) + text = re.sub( + r"^:::\s*\{topic\}\s*\**(.*?)\**$", + r":::{admonition} \1", + text, + flags=re.MULTILINE, + ) + text = re.sub( + r"^:::\s*\{seealso\}$\n*(.*?)^:::\s*$", + ":::{admonition} See also\n\n\\1:::\n", + text, + flags=re.MULTILINE | re.DOTALL, + ) + return re.sub( + r"\`\`\`\s*\{contents\}.*?^\`\`\`\s*\n", + "", + text, + flags=re.MULTILINE | re.DOTALL, + ).splitlines() + + +def process_percent_block(lines): + # The first one or more lines should be considered comments. + for i, line in enumerate(lines): + if line.strip().startswith(">>> "): + head_lines = [ + ">>> # " + L + for L in lines[:i] + if (L.strip() and "for doctest" not in L.lower()) + ] + return process_doctest_block(head_lines + lines[i:], tags=("hide-input",)) + return [""] + + +def process_percent(lines): + out_lines = [] + block_lines = [] + state = "default" + for line in lines: + pct_line = line.startswith("% ") + if state == "default": + if not pct_line: + out_lines.append(line) + continue + state = "percent-lines" + if state == "percent-lines": + if line.startswith("%"): + block_lines.append(line[2:]) + else: # End of block + out_lines += process_percent_block(block_lines) + assert not line.strip() + state = "default" + block_lines = [] + return out_lines + + +def process_md(fname): + fpath = Path(fname) + out_lines = fpath.read_text().splitlines()[:] + for parser in [parse_lines, strip_content, process_percent]: + out_lines = parser(out_lines) + content = "\n".join(out_lines) + out_path = fpath + if fpath.suffix == ".md" and "```{python}" in content: + out_path = fpath.with_suffix(".Rmd") + fpath.unlink() + content = f"{RMD_HEADER}\n{content}" + out_path.write_text(content) + + +def get_parser(): + parser = ArgumentParser( + description=__doc__, # Usage from docstring + formatter_class=RawDescriptionHelpFormatter, + ) + parser.add_argument("in_md", nargs="+", help="Input Markdown files") + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + for fname in args.in_md: + process_md(fname) + + +if __name__ == "__main__": + main() diff --git a/_scripts/process_notebooks.py b/_scripts/process_notebooks.py new file mode 100755 index 000000000..66549a534 --- /dev/null +++ b/_scripts/process_notebooks.py @@ -0,0 +1,399 @@ +#!/usr/bin/env python3 +"""Process notebooks + +* Replace local kernel with Pyodide kernel in metadata. +* Filter: + * Note and admonition markers. + * Exercise markers. + * Solution blocks. +* Write notebooks to output directory. +* Write JSON jupyterlite file. +""" + +from argparse import ArgumentParser, RawDescriptionHelpFormatter +from copy import deepcopy +from pathlib import Path +import re +from urllib.parse import quote as urlquote, urlparse + +import docutils.core as duc +import docutils.nodes as dun +from docutils.utils import Reporter +from sphinx.util.matching import get_matching_files +from myst_parser.docutils_ import Parser +import yaml + +_END_DIV_RE = re.compile(r"^\s*(:::+|```+|~~~+)\s*$") +import jupytext + +_JL_JSON_FMT = r"""\ +{{ + "jupyter-lite-schema-version": 0, + "jupyter-config-data": {{ + "contentsStorageName": "rss-{language}" + }} +}} +""" + +_DIV_RE = r"\s*(:::+|```+|~~~+)\s*" + + +_ADM_HEADER = re.compile( + rf""" + ^{_DIV_RE} + \{{\s*(?P\S+)\s*\}}\s* + (?P.*)\s*$ + """, + flags=re.VERBOSE, +) + + +_EX_SOL_MARKER = re.compile( + rf""" + (?P\n*) + {_DIV_RE} + \{{\s* + (?Pexercise|solution)- + (?Pstart|end) + \s*\}} + \s* + (?P\S+)?\s* + \n + (?P\s*:\S+: \s* \S+\s*\n)* + \n* + \s*(\2)\s* + \n + """, + flags=re.VERBOSE, +) + + +_SOL_MARKED = re.compile( + r""" + \n? + \n + .*? + \n? + """, + flags=re.VERBOSE | re.MULTILINE | re.DOTALL, +) + + +_END_DIV_RE = re.compile(rf"^{_DIV_RE}$") + + +# https://myst-parser.readthedocs.io/en/latest/syntax/optional.html#syntax-extensions +MYST_EXTENSIONS = [ + "amsmath", + "attrs_inline", + "colon_fence", + "deflist", + "dollarmath", + "fieldlist", + "html_admonition", + "html_image", + "linkify", + "replacements", + "smartquotes", + "strikethrough", + "substitution", + "tasklist", +] + + +DEF_JUPYTERLITE_CONFIG = { + "in_nb_ext": ".md", + "out_nb_ext": ".ipynb", + "in_nb_fmt": "myst", + "remove_remove": True, + "proc_admonitions": True, +} + + +def _replace_markers(m): + st_end = m["st_end"] + if m["ex_sol"] == "exercise": + return f"{m['newlines']}**{st_end.capitalize()} of exercise**\n\n" + return f"\n\n" + + +def get_admonition_lines(nb_text, nb_path): + parser = Parser() + doc = duc.publish_doctree( + source=nb_text, + source_path=str(nb_path), + settings_overrides={ + "myst_enable_extensions": MYST_EXTENSIONS, + "report_level": Reporter.SEVERE_LEVEL, + }, + parser=parser, + ) + lines = nb_text.splitlines() + n_lines = len(lines) + admonition_lines = [] + for admonition in doc.findall(dun.Admonition): + start_line = admonition.line - 1 + # Find first node of subsequent doctree. + node0 = next( + admonition.findall(include_self=False, descend=False, ascend=True), None + ) + # There can be a system_message as next node, in which case the correct + # line is in the 'line' attribute. + last_line = node0.get("line", node0.line) - 2 if node0 else n_lines - 1 + for end_line in range(last_line, start_line + 1, -1): + if _END_DIV_RE.match(lines[end_line]): + break + else: + raise ValueError("Could not find end div") + admonition_lines.append((start_line, end_line)) + return admonition_lines + + +_ADM_HEADER = re.compile( + r""" + ^\s*(:::+|```+|~~~+)\s* + \{\s*(?P\S+)\s*\}\s* + (?P.*)\s*$ + """, + flags=re.VERBOSE, +) + + +_DIR_OPTION = re.compile(r"^\s*:\w+:") + + +def process_admonitions(nb_text, nb_path): + lines = nb_text.splitlines() + out_lines = [] + start_i = last = 0 + for first, last in get_admonition_lines(nb_text, nb_path): + m = _ADM_HEADER.match(lines[first]) + if not m: + raise ValueError(f"Cannot get match from {lines[first]}") + out_lines += lines[start_i:first] + start_i = last + 1 + ad_type, ad_title = m["ad_type"], m["ad_title"] + suffix = f": {ad_title}" if ad_title else "" + in_i = first + 1 + while _DIR_OPTION.match(lines[in_i]): + in_i += 1 + adm_txt = "\n".join(lines[in_i:last]).strip("\n") + out_lines.append( + f"**Start of {ad_type}{suffix}**\n\n{adm_txt}\n\n**End of {ad_type}**" + ) + return "\n".join(out_lines + lines[start_i:]) + + +def process_cells(nb, processors): + """Process cells in notebooks. + + Parameters + ---------- + nb : dict + processors : sequence + Sequences of callables, taking a cell as input, and returning a cell as + output. If None returned, delete this cell. + + Returns + ------- + out_nb : dict + """ + out_nb = deepcopy(nb) + out_cells = [] + for cell in out_nb["cells"]: + for processor in processors: + cell = processor(cell) + if cell is None: + break + if cell: + out_cells.append(cell) + out_nb["cells"] = out_cells + return out_nb + + +_LABEL = re.compile(r"^\s*\(\s*\S+\s*\)\=\s*\n", flags=re.MULTILINE) + +_GLUE_DIR = re.compile( + r""" + (:::+|```+)\s* + \{\s*glue:*\s*\}\s+ + (\w+)\n + (?:\s*:doc: .*?)* + \n\s*\1\s*\n + """, + flags=re.MULTILINE | re.DOTALL | re.VERBOSE, +) + + +_GLUE_ROLE = re.compile( + r""" + \{\s*glue:{0,1}\s*\}\s*`(.*)?` + """, + flags=re.MULTILINE | re.DOTALL | re.VERBOSE, +) + + +def label_processor(cell): + if cell["cell_type"] == "markdown": + cell["source"] = _LABEL.sub("", cell["source"]) + return cell + + +def remove_processor(cell): + tags = cell.get("metadata", {}).get("tags", {}) + if "remove-cell" in tags: + return None + return cell + + +_GLUE_DIR = re.compile( + r""" + (:::+|```+)\s* + \{\s*glue:*\s*\}\s+ + (?P\w+)\n + (\s*:doc:\s*(?P.*?)$){0,1} + \n\s*\1\s*\n + """, + flags=re.MULTILINE | re.DOTALL | re.VERBOSE, +) + + +_GLUE_ROLE = re.compile( + r""" + \{\s*glue:{0,1}\s*\}\s*`(.*?)` + """, + flags=re.MULTILINE | re.DOTALL | re.VERBOSE, +) + + +def _glue_replacer(m): + d = m.groupdict() + ref, doc = d["ref"], d["doc"] + doc_msg = f' in "{doc}"' if doc else "" + return f"(Ref to `{ref}`{doc_msg})\n" + + +def glue_processor(cell): + if cell["cell_type"] != "markdown": + return cell + cell["source"] = _GLUE_DIR.sub(_glue_replacer, cell["source"]) + cell["source"] = _GLUE_ROLE.sub(r"(Ref to `\1`)", cell["source"]) + return cell + + +def load_process_nb(nb_path, fmt="myst", url=None, proc_admonitions=True): + """Load and process notebook + + Deal with: + + * Note and admonition markers. + * Exercise markers. + * Solution blocks. + + Parameters + ---------- + nb_path : file-like + Path to notebook + fmt : str, optional + Format of notebook (for Jupytext) + url : str, optional + URL for output page. + proc_admonitions : {True, False}, optional + If True, process admonition blocks to plain paragraphs. + + Returns + ------- + nb : dict + Notebook as loaded and parsed. + """ + link_txt = "corresponding page" + page_link = f"[{link_txt}]({url})" if url else link_txt + nb_path = Path(nb_path) + nb_text = nb_path.read_text() + nbt1 = _EX_SOL_MARKER.sub(_replace_markers, nb_text) + nbt2 = _SOL_MARKED.sub(f"\n**See the {page_link} for solution**\n\n", nbt1) + if proc_admonitions: + nbt2 = process_admonitions(nbt2, nb_path) + nb = jupytext.reads(nbt2, fmt={"format_name": fmt, "extension": nb_path.suffix}) + return process_cells(nb, [label_processor, glue_processor]) + + +def process_notebooks( + config, output_dir, kernel_name="python", kernel_dname="Python (Pyodide)" +): + # Get processing params from jupyterlite config section. + jl_config = config["jupyterlite"] + input_dir = Path(config["input_dir"]) + # Use sphinx utility to find not-excluded files. + for fn in get_matching_files( + input_dir, exclude_patterns=config["exclude_patterns"] + ): + rel_path = Path(fn) + if rel_path.suffix != jl_config["in_nb_ext"]: + continue + print(f"Processing {rel_path}") + nb_url = ( + config["base_path"] + + "/" + + urlquote(rel_path.with_suffix(".html").as_posix()) + ) + nb = load_process_nb( + input_dir / rel_path, + jl_config["in_nb_fmt"], + nb_url, + jl_config["proc_admonitions"], + ) + if jl_config["remove_remove"]: + nb = process_cells(nb, [remove_processor]) + nb["metadata"]["kernelspec"] = { + "name": kernel_name, + "display_name": kernel_dname, + } + out_path = (output_dir / rel_path).with_suffix(jl_config["out_nb_ext"]) + out_path.parent.mkdir(exist_ok=True, parents=True) + jupytext.write(nb, out_path) + + +def get_parser(): + parser = ArgumentParser( + description=__doc__, # Usage from docstring + formatter_class=RawDescriptionHelpFormatter, + ) + parser.add_argument( + "output_dir", help="Directory to which we will output notebooks" + ) + parser.add_argument( + "--config-dir", default=".", help="Directory containing `_config.yml` file" + ) + return parser + + +def load_config(config_path): + config_path = Path(config_path).resolve() + with (config_path / "_config.yml").open("rt") as fobj: + config = yaml.safe_load(fobj) + # Post-processing. + config["input_dir"] = Path( + config.get("repository", {}).get("path_to_book", config_path) + ) + config["base_path"] = urlparse(config.get("html", {}).get("baseurl", "")).path + config["exclude_patterns"] = config.get("exclude_patterns", []) + config["exclude_patterns"].append("_build") + config["jupyterlite"] = dict( + DEF_JUPYTERLITE_CONFIG, **config.get("jupyterlite", {}) + ) + return config + + +def main(): + parser = get_parser() + args = parser.parse_args() + config = load_config(Path(args.config_dir)) + out_path = Path(args.output_dir) + out_path.mkdir(parents=True, exist_ok=True) + process_notebooks(config, out_path) + (out_path / "jupyter-lite.json").write_text(_JL_JSON_FMT.format(language="python")) + + +if __name__ == "__main__": + main() diff --git a/_scripts/run_regex.py b/_scripts/run_regex.py new file mode 100755 index 000000000..a6a59428d --- /dev/null +++ b/_scripts/run_regex.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +"""Run a regex over a file""" + +from argparse import ArgumentParser, RawDescriptionHelpFormatter +from pathlib import Path +import re + +IMAGE_NOT_EXAMPLE = re.compile( + r""" +^```{image} +\s+(?!auto_examples) +(?P\S+)$ +.*? +```""", + flags=re.DOTALL | re.MULTILINE | re.VERBOSE, +) + + +REPLACER = r"![](\1)" + + +def run_regexp(fname, regex, replacer): + pth = Path(fname) + in_contents = pth.read_text() + out_contents = regex.sub(replacer, in_contents) + pth.write_text(out_contents) + + +def get_parser(): + parser = ArgumentParser( + description=__doc__, # Usage from docstring + formatter_class=RawDescriptionHelpFormatter, + ) + parser.add_argument("fname", nargs="+", help="Files on which to run regexp") + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + for fname in args.fname: + run_regexp(fname, IMAGE_NOT_EXAMPLE, REPLACER) + + +if __name__ == "__main__": + main() diff --git a/_scripts/tests/eg.Rmd b/_scripts/tests/eg.Rmd new file mode 100644 index 000000000..68f59f5b9 --- /dev/null +++ b/_scripts/tests/eg.Rmd @@ -0,0 +1,188 @@ +--- +jupyter: + jupytext: + formats: ipynb,Rmd + notebook_metadata_filter: all,-language_info + split_at_heading: true + text_representation: + extension: .Rmd + format_name: rmarkdown + format_version: '1.2' + jupytext_version: 1.17.1 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Pandas from Numpy + +## What is Pandas? + +Pandas is an open-source python library for data manipulation and analysis. + + +``` {note} + +**Why is Pandas called Pandas?** + +The “Pandas” name is short for “panel data”. The library was named after the +type of econometrics panel data that it was designed to analyse. [Panel +data](https://en.wikipedia.org/wiki/Panel_data) are longitudinal data where +the same observational units (e.g. countries) are observed over multiple +instances across time. + +``` + + +The Pandas Data Frame is the most important feature of the Pandas library. Data Frames, as the name suggests, contain not only the data for an analysis, but a toolkit of methods for cleaning, plotting and interacting with the data in flexible ways. For more information about Pandas see [this page](https://Pandas.pydata.org/about/). + +The standard way to make a new Data Frame is to ask Pandas to read a data file +(like a `.csv` file) into a Data Frame. Before we do that however, we will +build our own Data Frame from scratch, beginning with the fundamental building +block for Data Frames: Numpy arrays. + +```{python} +# import the libraries needed for this page +import numpy as np +import pandas as pd +``` + +## Numpy arrays + +Let's say we have some data that applies to a set of countries, and we have some countries in mind: + +```{python} +country_names_array = np.array(['Australia', 'Brazil', 'Canada', + 'China', 'Germany', 'Spain', + 'France', 'United Kingdom', 'India', + 'Italy', 'Japan', 'South Korea', + 'Mexico', 'Russia', 'United States']) +country_names_array +``` + +For compactness, we'll also want to use the corresponding [standard +three-letter code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) for each +country, like so: + +Both Data Frames contain the same data, and the same labels. In fact, we can +use the `.equals` method of Data Frames to ask Pandas whether it agrees the +Data Frames are equivalent: + +```{python} +df.equals(loaded_labeled_df) +``` + +They are equivalent. + + +```{exercise-start} +:label: index-in-display +:class: dropdown +``` + + +In fact the `df` and `loaded_labeled_df` data frames are not exactly the same. +If you look very carefully at the notebook output for the two data frames, you +may be able to spot the difference. Pandas `.equals` does not care about this +difference, but let's imagine we did. Try to work out how to change the `df` +Data Frame to give *exactly* the same display as we see for +`loaded_labeled_df`. + + +```{exercise-end} +``` + + + +```{solution-start} index-in-display +:class: dropdown +``` + + +You probably spotted that the `loaded_labeled_df` displays a `name` for the Index. You can also see this displaying the `.index` on its own: + +```{python} +loaded_labeled_df.index +``` + +compared to: + +```{python} +df.index +``` + +We see that the `.name` attribute differs for the two Indices; to make the Data Frame displays match, we should set the `.name` on the `df` Data Frame. + +The simplest way to do that is: + +```{python} +# Make a copy of the `df` Data Frame. This step is unnecessary to solving +# the problem, it is just to be neat. +df_copy = df.copy() +``` + +```{python} +# Set the Index name. +df_copy.index.name = 'Code' +df_copy +``` + + +```{solution-end} +``` + + + +``` {admonition} My title + +Some interesting information. + +``` + + +Some more text. + + +``` {exercise-start} +:label: differing-indices +:class: dropdown +``` + + +```{python} +# df5 +``` + +After these examples, what is your final working theory about the algorithm +Pandas uses to match the Indices of Series, when creating Data Frames? + + +``` {exercise-end} +``` + + + +``` {solution-start} differing-indices +:class: dropdown +``` + + +Here's our hypothesis of the algorithm: + +* First check if the Series Indices are the same. If so, use the Index of any + Series. +* If they are not the same, first sort all Series by their Index values, and + use the resulting sorted Index. + +What was your hypothesis? If it was different from ours, why do you think yours fits the results better? What tests would you do to test your theory against our theory? + + +``` {solution-end} +``` + + +(plot-frames)= +## Convenient Plotting with Data Frames + +Remember earlier we imported Matplotlib to plot some of our data? diff --git a/_scripts/tests/eg.md b/_scripts/tests/eg.md new file mode 100644 index 000000000..43234c66e --- /dev/null +++ b/_scripts/tests/eg.md @@ -0,0 +1,203 @@ +--- +jupytext: + notebook_metadata_filter: all,-language_info + split_at_heading: true + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Pandas from Numpy + ++++ + +## What is Pandas? + +Pandas is an open-source python library for data manipulation and analysis. + ++++ + +```{note} + +**Why is Pandas called Pandas?** + +The “Pandas” name is short for “panel data”. The library was named after the +type of econometrics panel data that it was designed to analyse. [Panel +data](https://en.wikipedia.org/wiki/Panel_data) are longitudinal data where +the same observational units (e.g. countries) are observed over multiple +instances across time. + +``` + ++++ + +The Pandas Data Frame is the most important feature of the Pandas library. Data Frames, as the name suggests, contain not only the data for an analysis, but a toolkit of methods for cleaning, plotting and interacting with the data in flexible ways. For more information about Pandas see [this page](https://Pandas.pydata.org/about/). + +The standard way to make a new Data Frame is to ask Pandas to read a data file +(like a `.csv` file) into a Data Frame. Before we do that however, we will +build our own Data Frame from scratch, beginning with the fundamental building +block for Data Frames: Numpy arrays. + +```{code-cell} +# import the libraries needed for this page +import numpy as np +import pandas as pd +``` + +## Numpy arrays + +Let's say we have some data that applies to a set of countries, and we have some countries in mind: + +```{code-cell} +country_names_array = np.array(['Australia', 'Brazil', 'Canada', + 'China', 'Germany', 'Spain', + 'France', 'United Kingdom', 'India', + 'Italy', 'Japan', 'South Korea', + 'Mexico', 'Russia', 'United States']) +country_names_array +``` + +For compactness, we'll also want to use the corresponding [standard +three-letter code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) for each +country, like so: + +Both Data Frames contain the same data, and the same labels. In fact, we can +use the `.equals` method of Data Frames to ask Pandas whether it agrees the +Data Frames are equivalent: + +```{code-cell} +df.equals(loaded_labeled_df) +``` + +They are equivalent. + ++++ + +```{exercise-start} +:label: index-in-display +:class: dropdown +``` + ++++ + +In fact the `df` and `loaded_labeled_df` data frames are not exactly the same. +If you look very carefully at the notebook output for the two data frames, you +may be able to spot the difference. Pandas `.equals` does not care about this +difference, but let's imagine we did. Try to work out how to change the `df` +Data Frame to give _exactly_ the same display as we see for +`loaded_labeled_df`. + ++++ + +```{exercise-end} + +``` + ++++ + +```{solution-start} index-in-display +:class: dropdown +``` + ++++ + +You probably spotted that the `loaded_labeled_df` displays a `name` for the Index. You can also see this displaying the `.index` on its own: + +```{code-cell} +loaded_labeled_df.index +``` + +compared to: + +```{code-cell} +df.index +``` + +We see that the `.name` attribute differs for the two Indices; to make the Data Frame displays match, we should set the `.name` on the `df` Data Frame. + +The simplest way to do that is: + +```{code-cell} +# Make a copy of the `df` Data Frame. This step is unnecessary to solving +# the problem, it is just to be neat. +df_copy = df.copy() +``` + +```{code-cell} +# Set the Index name. +df_copy.index.name = 'Code' +df_copy +``` + +```{solution-end} + +``` + ++++ + +```{admonition} My title + +Some interesting information. + +``` + ++++ + +Some more text. + ++++ + +```{exercise-start} +:label: differing-indices +:class: dropdown +``` + +```{code-cell} +# df5 +``` + +After these examples, what is your final working theory about the algorithm +Pandas uses to match the Indices of Series, when creating Data Frames? + ++++ + +```{exercise-end} + +``` + ++++ + +```{solution-start} differing-indices +:class: dropdown +``` + ++++ + +Here's our hypothesis of the algorithm: + +- First check if the Series Indices are the same. If so, use the Index of any + Series. +- If they are not the same, first sort all Series by their Index values, and + use the resulting sorted Index. + +What was your hypothesis? If it was different from ours, why do you think yours fits the results better? What tests would you do to test your theory against our theory? + ++++ + +```{solution-end} + +``` + ++++ + +(plot-frames)= + +## Convenient Plotting with Data Frames + +Remember earlier we imported Matplotlib to plot some of our data? diff --git a/_scripts/tests/eg2.Rmd b/_scripts/tests/eg2.Rmd new file mode 100644 index 000000000..c2896b3dc --- /dev/null +++ b/_scripts/tests/eg2.Rmd @@ -0,0 +1,169 @@ +--- +jupyter: + jupytext: + formats: ipynb,Rmd + notebook_metadata_filter: all,-language_info + split_at_heading: true + text_representation: + extension: .Rmd + format_name: rmarkdown + format_version: '1.2' + jupytext_version: 1.17.1 + kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 + orphan: true +--- + +# Pandas from Numpy + +## What is Pandas? + +Pandas is an open-source python library for data manipulation and analysis. + +::: {note} + +**Why is Pandas called Pandas?** + +The “Pandas” name is short for “panel data”. The library was named after the +type of econometrics panel data that it was designed to analyse. [Panel +data](https://en.wikipedia.org/wiki/Panel_data) are longitudinal data where +the same observational units (e.g. countries) are observed over multiple +instances across time. + +::: + +The Pandas Data Frame is the most important feature of the Pandas library. Data Frames, as the name suggests, contain not only the data for an analysis, but a toolkit of methods for cleaning, plotting and interacting with the data in flexible ways. For more information about Pandas see [this page](https://Pandas.pydata.org/about/). + +The standard way to make a new Data Frame is to ask Pandas to read a data file +(like a `.csv` file) into a Data Frame. Before we do that however, we will +build our own Data Frame from scratch, beginning with the fundamental building +block for Data Frames: Numpy arrays. + +```{python} +# import the libraries needed for this page +import numpy as np +import pandas as pd +``` + +## Numpy arrays + +Let's say we have some data that applies to a set of countries, and we have some countries in mind: + +```{python} +country_names_array = np.array(['Australia', 'Brazil', 'Canada', + 'China', 'Germany', 'Spain', + 'France', 'United Kingdom', 'India', + 'Italy', 'Japan', 'South Korea', + 'Mexico', 'Russia', 'United States']) +country_names_array +``` + +For compactness, we'll also want to use the corresponding [standard +three-letter code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) for each +country, like so: + +Both Data Frames contain the same data, and the same labels. In fact, we can +use the `.equals` method of Data Frames to ask Pandas whether it agrees the +Data Frames are equivalent: + +```{python} +A = 2 +B = 3 +C = A + B +C +``` + +They are equivalent. + +::: {exercise-start} +:label: a-first-exercise +:class: dropdown +::: + +In fact the `df` and `loaded_labeled_df` data frames are not exactly the same. +If you look very carefully at the notebook output for the two data frames, you +may be able to spot the difference. Pandas `.equals` does not care about this +difference, but let's imagine we did. Try to work out how to change the `df` +Data Frame to give *exactly* the same display as we see for +`loaded_labeled_df`. + +::: {exercise-end} +::: + +::: {solution-start} a-first-exercise +:class: dropdown +::: + +You probably spotted that the `loaded_labeled_df` displays a `name` for the Index. You can also see this displaying the `.index` on its own: + +```{python} +B +``` + +compared to: + +```{python} +C +``` + +We see that the `.name` attribute differs for the two Indices; to make the Data Frame displays match, we should set the `.name` on the `df` Data Frame. + +The simplest way to do that is: + +```{python} +D = C * 4 +``` + +```{python} +E = D + 10 +``` + +::: {solution-end} +::: + +::: {admonition} My title + +Some interesting information. + +::: + +Some more text. + +::: {exercise-start} +:label: differing-indices +:class: dropdown +::: + + +```{python} +# df5 +``` + +After these examples, what is your final working theory about the algorithm +Pandas uses to match the Indices of Series, when creating Data Frames? + +::: {exercise-end} +::: + +::: {solution-start} differing-indices +:class: dropdown +::: + +Here's our hypothesis of the algorithm: + +* First check if the Series Indices are the same. If so, use the Index of any + Series. +* If they are not the same, first sort all Series by their Index values, and + use the resulting sorted Index. + +What was your hypothesis? If it was different from ours, why do you think yours fits the results better? What tests would you do to test your theory against our theory? + +::: {solution-end} +::: + +(plot-frames)= +## Convenient Plotting with Data Frames + +Remember earlier we imported Matplotlib to plot some of our data? diff --git a/_scripts/tests/eg2.md b/_scripts/tests/eg2.md new file mode 100644 index 000000000..4d0136bc1 --- /dev/null +++ b/_scripts/tests/eg2.md @@ -0,0 +1,169 @@ +--- +jupytext: + notebook_metadata_filter: all,-language_info + split_at_heading: true + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +# Pandas from Numpy + ++++ + +## What is Pandas? + +Pandas is an open-source python library for data manipulation and analysis. + +::: {note} + +**Why is Pandas called Pandas?** + +The “Pandas” name is short for “panel data”. The library was named after the +type of econometrics panel data that it was designed to analyse. [Panel +data](https://en.wikipedia.org/wiki/Panel_data) are longitudinal data where +the same observational units (e.g. countries) are observed over multiple +instances across time. + +::: + +The Pandas Data Frame is the most important feature of the Pandas library. Data Frames, as the name suggests, contain not only the data for an analysis, but a toolkit of methods for cleaning, plotting and interacting with the data in flexible ways. For more information about Pandas see [this page](https://Pandas.pydata.org/about/). + +The standard way to make a new Data Frame is to ask Pandas to read a data file +(like a `.csv` file) into a Data Frame. Before we do that however, we will +build our own Data Frame from scratch, beginning with the fundamental building +block for Data Frames: Numpy arrays. + +```{code-cell} +# import the libraries needed for this page +import numpy as np +import pandas as pd +``` + +## Numpy arrays + +Let's say we have some data that applies to a set of countries, and we have some countries in mind: + +```{code-cell} +country_names_array = np.array(['Australia', 'Brazil', 'Canada', + 'China', 'Germany', 'Spain', + 'France', 'United Kingdom', 'India', + 'Italy', 'Japan', 'South Korea', + 'Mexico', 'Russia', 'United States']) +country_names_array +``` + +For compactness, we'll also want to use the corresponding [standard +three-letter code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) for each +country, like so: + +Both Data Frames contain the same data, and the same labels. In fact, we can +use the `.equals` method of Data Frames to ask Pandas whether it agrees the +Data Frames are equivalent: + +```{code-cell} +A = 2 +B = 3 +C = A + B +C +``` + +They are equivalent. + +::: {exercise-start} +:label: a-first-exercise +:class: dropdown +::: + +In fact the `df` and `loaded_labeled_df` data frames are not exactly the same. +If you look very carefully at the notebook output for the two data frames, you +may be able to spot the difference. Pandas `.equals` does not care about this +difference, but let's imagine we did. Try to work out how to change the `df` +Data Frame to give _exactly_ the same display as we see for +`loaded_labeled_df`. + +::: {exercise-end} +::: + +::: {solution-start} a-first-exercise +:class: dropdown +::: + +You probably spotted that the `loaded_labeled_df` displays a `name` for the Index. You can also see this displaying the `.index` on its own: + +```{code-cell} +B +``` + +compared to: + +```{code-cell} +C +``` + +We see that the `.name` attribute differs for the two Indices; to make the Data Frame displays match, we should set the `.name` on the `df` Data Frame. + +The simplest way to do that is: + +```{code-cell} +D = C * 4 +``` + +```{code-cell} +E = D + 10 +``` + +::: {solution-end} +::: + +::: {admonition} My title + +Some interesting information. + +::: + +Some more text. + +::: {exercise-start} +:label: differing-indices +:class: dropdown +::: + +```{code-cell} +# df5 +``` + +After these examples, what is your final working theory about the algorithm +Pandas uses to match the Indices of Series, when creating Data Frames? + +::: {exercise-end} +::: + +::: {solution-start} differing-indices +:class: dropdown +::: + +Here's our hypothesis of the algorithm: + +- First check if the Series Indices are the same. If so, use the Index of any + Series. +- If they are not the same, first sort all Series by their Index values, and + use the resulting sorted Index. + +What was your hypothesis? If it was different from ours, why do you think yours fits the results better? What tests would you do to test your theory against our theory? + +::: {solution-end} +::: + +(plot-frames)= + +## Convenient Plotting with Data Frames + +Remember earlier we imported Matplotlib to plot some of our data? diff --git a/_scripts/tests/test_process.py b/_scripts/tests/test_process.py new file mode 100644 index 000000000..92b729fb6 --- /dev/null +++ b/_scripts/tests/test_process.py @@ -0,0 +1,142 @@ +"""Test notebook parsing""" + +from copy import deepcopy +import re +import sys +from pathlib import Path + +import jupytext + +import pytest + +HERE = Path(__file__).parent +THERE = HERE.parent +EG1_NB_PATH = HERE / "eg.Rmd" +EG2_NB_PATH = HERE / "eg2.Rmd" + +sys.path.append(str(THERE)) + +import process_notebooks as pn + + +def nb2rmd(nb, fmt="myst", ext=".Rmd"): + return jupytext.writes(nb, fmt) + + +@pytest.mark.parametrize("nb_path", (EG1_NB_PATH, EG2_NB_PATH)) +def test_process_nbs(nb_path): + url = f"foo/{nb_path.stem}.html" + out_nb = pn.load_process_nb(nb_path, fmt="myst", url=url) + out_txt = nb2rmd(out_nb) + out_lines = out_txt.splitlines() + assert out_lines.count("**Start of exercise**") == 2 + assert out_lines.count("**End of exercise**") == 2 + assert out_lines.count(f"**See the [corresponding page]({url}) for solution**") == 2 + # A bit of solution text, should not be there after processing. + assert "You probably spotted that" not in out_txt + assert "Here's our hypothesis of the algorithm:" not in out_txt + # Admonitions + assert out_lines.count("**Start of note**") == 1 + assert out_lines.count("**End of note**") == 1 + assert out_lines.count("**Start of admonition: My title**") == 1 + assert out_lines.count("**End of admonition**") == 1 + # Labels + assert "plot-frames" not in out_txt + + +@pytest.mark.parametrize("nb_path", (EG1_NB_PATH, EG2_NB_PATH)) +def test_admonition_finding(nb_path): + nb_text = nb_path.read_text() + nb_lines = nb_text.splitlines() + ad_lines = pn.get_admonition_lines(nb_text, nb_path) + for first, last in ad_lines: + assert pn._ADM_HEADER.match(nb_lines[first]) + assert pn._END_DIV_RE.match(nb_lines[last]) + + +def test_cell_processors(): + nb = jupytext.read(EG1_NB_PATH) + # Code cell at index 6, Markdown at index 7. + nb_cp = deepcopy(nb) + + def null_processor(cell): + return cell + + out = pn.process_cells(nb_cp, [null_processor]) + assert out["cells"] is not nb_cp["cells"] + assert out["cells"] == nb_cp["cells"] + + # Label processor. + # There is a label in the example notebook. + labeled_indices = [i for i, c in enumerate(nb["cells"]) if ")=\n" in c["source"]] + assert len(labeled_indices) == 1 + out = pn.process_cells(nb_cp, [pn.label_processor]) + other_in_cell = nb_cp["cells"].pop(labeled_indices[0]) + other_out_cell = out["cells"].pop(labeled_indices[0]) + # With these cells removed, the other cells compare equal. + assert out["cells"] == nb_cp["cells"] + # Label removed. + assert pn._LABEL.match(other_in_cell["source"]) + assert not pn._LABEL.match(other_out_cell["source"]) + + # remove-cell processor. + nb_cp = deepcopy(nb) + # No tagged cells in original notebook. + out = pn.process_cells(nb_cp, [pn.remove_processor]) + assert out["cells"] == nb_cp["cells"] + # An example code and Markdown cel. + eg_cells = [6, 7] + for eg_i in eg_cells: + nb_cp["cells"][eg_i]["metadata"]["tags"] = ["remove-cell"] + out = pn.process_cells(nb_cp, [pn.remove_processor]) + assert out["cells"] != nb_cp["cells"] + assert len(out["cells"]) == len(nb_cp["cells"]) - len(eg_cells) + # The two cells have been dropped. + assert out["cells"][eg_cells[0]] == nb_cp["cells"][eg_cells[-1] + 1] + + +def test_admonition_processing(): + src = """ +## Signal processing: {mod}`scipy.signal` + +::: {note} +:class: dropdown + +{mod}`scipy.signal` is for typical signal processing: 1D, +regularly-sampled signals. +::: + +**Resampling** {func}`scipy.signal.resample`: resample a signal to `n` +points using FFT. + +::: {admonition} Another thought + +Some text. + + +::: + +More text. +""" + out = pn.process_admonitions(src, EG1_NB_PATH) + exp = """ +## Signal processing: {mod}`scipy.signal` + +**Start of note** + +{mod}`scipy.signal` is for typical signal processing: 1D, +regularly-sampled signals. + +**End of note** + +**Resampling** {func}`scipy.signal.resample`: resample a signal to `n` +points using FFT. + +**Start of admonition: Another thought** + +Some text. + +**End of admonition** + +More text.""" + assert exp == out diff --git a/_toc.yml b/_toc.yml new file mode 100644 index 000000000..74606675c --- /dev/null +++ b/_toc.yml @@ -0,0 +1,50 @@ +format: jb-book +root: index +parts: + - caption: Getting started with Python for Science + chapters: + - file: intro/intro + - file: intro/language/python_language + sections: + - file: intro/language/first_steps + - file: intro/language/basic_types + - file: intro/language/control_flow + - file: intro/language/functions + - file: intro/language/reusing_code + - file: intro/language/io + - file: intro/language/standard_library + - file: intro/language/exceptions + - file: intro/language/oop + - file: intro/numpy/index + sections: + - file: intro/numpy/array_object + - file: intro/numpy/operations + - file: intro/numpy/elaborate_arrays + - file: intro/numpy/advanced_operations + - file: intro/numpy/exercises + - file: intro/matplotlib/index + - file: intro/scipy/index + - file: intro/help/help + - caption: Advanced topics + chapters: + - file: advanced/advanced_python/index + - file: advanced/advanced_numpy/index + - file: advanced/debugging/index + - file: advanced/optimizing/index + - file: advanced/scipy_sparse/introduction + sections: + - file: advanced/scipy_sparse/storage_schemes + - file: advanced/scipy_sparse/solvers + - file: advanced/scipy_sparse/other_packages + - file: advanced/image_processing/index + - file: advanced/mathematical_optimization/index + - file: advanced/interfacing_with_c/interfacing_with_c + - caption: Packages and applications + chapters: + - file: packages/statistics/index + - file: packages/sympy + - file: packages/scikit-image/index + - file: packages/scikit-learn/index + - caption: About + chapters: + - file: about diff --git a/about.md b/about.md new file mode 100644 index 000000000..7664b5f2c --- /dev/null +++ b/about.md @@ -0,0 +1,20 @@ +# About the Scientific Python Lecture notes + +Release: {{ release }} + +The lectures are archived on Zenodo: + +![http://dx.doi.org/10.5281/zenodo.594102](https://zenodo.org/badge/doi/10.5281/zenodo.594102.svg) + +::: {include} AUTHORS.md +:start-line: 4 +::: + +::: {include} CHANGES.md +::: + +::: {include} LICENSE.md +::: + +::: {include} CONTRIBUTING.md +::: diff --git a/about.rst b/about.rst deleted file mode 100644 index 908062e31..000000000 --- a/about.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. only:: latex - - ==================================== - About the Scientific Python Lectures - ==================================== - - - About the Scientific Python Lectures - ==================================== - - Release: |release| - - The lectures are archived on zenodo: http://dx.doi.org/10.5281/zenodo.594102 - - All code and material is licensed under a - Creative Commons Attribution 4.0 International License (CC-by) - http://creativecommons.org/licenses/by/4.0/ - - .. raw:: latex - - \begin{multicols}{2} - - .. toctree:: - - AUTHORS.rst - - .. raw:: latex - - \end{multicols} diff --git a/advanced/advanced_numpy/data b/advanced/advanced_numpy/data new file mode 120000 index 000000000..e67b45590 --- /dev/null +++ b/advanced/advanced_numpy/data @@ -0,0 +1 @@ +../../data \ No newline at end of file diff --git a/advanced/advanced_numpy/examples/plots/plot_maskedstats.py b/advanced/advanced_numpy/examples/plots/plot_maskedstats.py index 8b015217d..e6c4198d5 100644 --- a/advanced/advanced_numpy/examples/plots/plot_maskedstats.py +++ b/advanced/advanced_numpy/examples/plots/plot_maskedstats.py @@ -10,7 +10,7 @@ import matplotlib.pyplot as plt data = np.loadtxt("../../../../data/populations.txt") -populations = np.ma.masked_array(data[:, 1:]) # type: ignore[var-annotated] +populations = np.ma.masked_array(data[:, 1:]) year = data[:, 0] bad_years = ((year >= 1903) & (year <= 1910)) | ((year >= 1917) & (year <= 1918)) diff --git a/advanced/advanced_numpy/index.md b/advanced/advanced_numpy/index.md new file mode 100644 index 000000000..527b60f13 --- /dev/null +++ b/advanced/advanced_numpy/index.md @@ -0,0 +1,1804 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(advanced-numpy)= + +# Advanced NumPy + +**Author**: _Pauli Virtanen_ + +NumPy is at the base of Python's scientific stack of tools. Its purpose +to implement efficient operations on many items in a block of memory. +Understanding how it works in detail helps in making efficient use of its +flexibility, taking useful shortcuts. + +This section covers: + +- Anatomy of NumPy arrays, and its consequences. Tips and + tricks. +- Universal functions: what, why, and what to do if you want + a new one. +- Integration with other tools: NumPy offers several ways to + wrap any data in an ndarray, without unnecessary copies. +- Recently added features, and what's in them: PEP + 3118 buffers, generalized ufuncs, ... + +:::{admonition} Prerequisites + +- NumPy +- Cython +- Pillow (Python imaging library, used in a couple of examples) + ::: + +```{code-cell} +# Import Numpy module. +import numpy as np +# Import Matplotlib (for later). +import matplotlib.pyplot as plt +``` + +## Life of ndarray + +### It's... + +::: {admonition} What is an **ndarray** + +An **ndarray** is: + +- A block of memory and +- an indexing scheme and +- a data type descriptor. + ::: + +Put another way, an ndarray has **raw data**, and algorithms to: + +- locate an element +- interpret an element + +::: {image} threefundamental.png +::: + +```c +typedef struct PyArrayObject { + PyObject_HEAD + + /* Block of memory */ + char *data; + + /* Data type descriptor */ + PyArray_Descr *descr; + + /* Indexing scheme */ + int nd; + npy_intp *dimensions; + npy_intp *strides; + + /* Other stuff */ + PyObject *base; + int flags; + PyObject *weakreflist; +} PyArrayObject; +``` + +### Block of memory + +```{code-cell} +x = np.array([1, 2, 3], dtype=np.int32) +x.data +``` + +```{code-cell} +bytes(x.data) +``` + +Memory address of the data: + +```{code-cell} +x.__array_interface__['data'][0] +``` + +The whole `__array_interface__`: + +```{code-cell} +x.__array_interface__ +``` + +Reminder: two {class}`ndarrays ` may share the same memory: + +```{code-cell} +x = np.array([1, 2, 3, 4]) +y = x[:-1] +x[0] = 9 +y +``` + +Memory does not need to be owned by an {class}`ndarray`: + +```{code-cell} +x = b'1234' +``` + +x is a string (in Python 3 a bytes), we can represent its data as an +array of ints: + +```{code-cell} +y = np.frombuffer(x, dtype=np.int8) +y.data +``` + +```{code-cell} +y.base is x +``` + +```{code-cell} +y.flags +``` + +The `owndata` and `writeable` flags indicate status of the memory +block. + +:::{admonition} See also + +[array interface](https://numpy.org/doc/stable/reference/arrays.interface.html) +::: + +### Data types + +#### The descriptor + +{class}`dtype` describes a single item in the array: + +::: {list-table} **Dtypes** + +- - type + - **scalar type** of the data, one of: + - int8, int16, float64, _et al._ (fixed size) + - str, unicode, void (flexible size) +- - itemsize + - **size** of the data block +- - byteorder + - **byte order**: + - big-endian `>` + - little-endian `<` + - not applicable `|` +- - fields + - sub-dtypes, if it's a **structured data type** +- - shape + - shape of the array, if it's a **sub-array** + +::: + +```{code-cell} +np.dtype(int).type +``` + +```{code-cell} +np.dtype(int).itemsize +``` + +```{code-cell} +np.dtype(int).byteorder +``` + +#### Example: reading `.wav` files + +The `.wav` file header: + +| | | +| --------------- | ------------------------------------- | +| chunk_id | `"RIFF"` | +| chunk_size | 4-byte unsigned little-endian integer | +| format | `"WAVE"` | +| fmt_id | `"fmt "` | +| fmt_size | 4-byte unsigned little-endian integer | +| audio_fmt | 2-byte unsigned little-endian integer | +| num_channels | 2-byte unsigned little-endian integer | +| sample_rate | 4-byte unsigned little-endian integer | +| byte_rate | 4-byte unsigned little-endian integer | +| block_align | 2-byte unsigned little-endian integer | +| bits_per_sample | 2-byte unsigned little-endian integer | +| data_id | `"data"` | +| data_size | 4-byte unsigned little-endian integer | + +- 44-byte block of raw data (in the beginning of the file) +- ... followed by `data_size` bytes of actual sound data. + +The `.wav` file header as a NumPy _structured_ data type: + +```{code-cell} +wav_header_dtype = np.dtype([ + ("chunk_id", (bytes, 4)), # flexible-sized scalar type, item size 4 + ("chunk_size", " 1000`. +Use it to determine which `c` are in the Mandelbrot set. + +Our function is a simple one, so make use of the `PyUFunc_*` helpers. + +Write it in Cython + +:::{admonition} See also + +mandel.pyx, mandelplot.py +::: + +:::{only} latex + +```{literalinclude} examples/mandel.pyx + +``` + +::: + +**Reminder**: some pre-made Ufunc loops: + +| | | +| -------------- | --------------------------------------------------------------------------------- | +| `PyUfunc_f_f` | `float elementwise_func(float input_1)` | +| `PyUfunc_ff_f` | `float elementwise_func(float input_1, float input_2)` | +| `PyUfunc_d_d` | `double elementwise_func(double input_1)` | +| `PyUfunc_dd_d` | `double elementwise_func(double input_1, double input_2)` | +| `PyUfunc_D_D` | `elementwise_func(complex_double *input, complex_double* output)` | +| `PyUfunc_DD_D` | `elementwise_func(complex_double *in1, complex_double *in2, complex_double* out)` | + +Type codes: + +``` +NPY_BOOL, NPY_BYTE, NPY_UBYTE, NPY_SHORT, NPY_USHORT, NPY_INT, NPY_UINT, +NPY_LONG, NPY_ULONG, NPY_LONGLONG, NPY_ULONGLONG, NPY_FLOAT, NPY_DOUBLE, +NPY_LONGDOUBLE, NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE, NPY_DATETIME, +NPY_TIMEDELTA, NPY_OBJECT, NPY_STRING, NPY_UNICODE, NPY_VOID +``` + +::: {exercise-end} +::: + +::: {solution-start} mandelbrot-ufunc +:class: dropdown +::: + +```{literalinclude} examples/mandel-answer.pyx +:language: python +``` + +```{literalinclude} examples/mandelplot.py +:language: python +``` + +::: {image} mandelbrot.png +::: + +:::{note} + +Most of the boilerplate could be automated by these Cython modules: + + + +::: + +**Several accepted input types** + +E.g. supporting both single- and double-precision versions + +```cython +cdef void mandel_single_point(double complex *z_in, + double complex *c_in, + double complex *z_out) nogil: + ... + +cdef void mandel_single_point_singleprec(float complex *z_in, + float complex *c_in, + float complex *z_out) nogil: + ... + +cdef PyUFuncGenericFunction loop_funcs[2] +cdef char input_output_types[3*2] +cdef void *elementwise_funcs[1*2] + +loop_funcs[0] = PyUFunc_DD_D +input_output_types[0] = NPY_CDOUBLE +input_output_types[1] = NPY_CDOUBLE +input_output_types[2] = NPY_CDOUBLE +elementwise_funcs[0] = mandel_single_point + +loop_funcs[1] = PyUFunc_FF_F +input_output_types[3] = NPY_CFLOAT +input_output_types[4] = NPY_CFLOAT +input_output_types[5] = NPY_CFLOAT +elementwise_funcs[1] = mandel_single_point_singleprec + +mandel = PyUFunc_FromFuncAndData( + loop_func, + elementwise_funcs, + input_output_types, + 2, # number of supported input types <---------------- + 2, # number of input args + 1, # number of output args + 0, # `identity` element, never mind this + "mandel", # function name + "mandel(z, c) -> computes iterated z*z + c", # docstring + 0 # unused + ) +``` + +::: {solution-end} +::: + +### Generalized ufuncs + +**ufunc** + +`output = elementwise_function(input)` + +Both `output` and `input` can be a single array element only. + +**generalized ufunc** + +`output` and `input` can be arrays with a fixed number of dimensions + +For example, matrix trace (sum of diag elements): + +```text +input shape = (n, n) +output shape = () # i.e. scalar + +(n, n) -> () +``` + +Matrix product: + +```text +input_1 shape = (m, n) +input_2 shape = (n, p) +output shape = (m, p) + +(m, n), (n, p) -> (m, p) +``` + +- This is called the _"signature"_ of the generalized ufunc +- The dimensions on which the g-ufunc acts, are _"core dimensions"_ + +**Status in NumPy** + +- g-ufuncs are in NumPy already ... +- new ones can be created with `PyUFunc_FromFuncAndDataAndSignature` +- most linear-algebra functions are implemented as g-ufuncs to enable working + with stacked arrays: + +```{code-cell} +import numpy as np +rng = np.random.default_rng(27446968) +np.linalg.det(rng.random((3, 5, 5))) +``` + +```{code-cell} +np.linalg._umath_linalg.det.signature +``` + +- matrix multiplication this way could be useful for operating on + many small matrices at once +- Also see `tensordot` and `einsum` + + + +**Generalized ufunc loop** + +Matrix multiplication `(m,n),(n,p) -> (m,p)` + +```c +void gufunc_loop(void **args, int *dimensions, int *steps, void *data) +{ + char *input_1 = (char*)args[0]; /* these are as previously */ + char *input_2 = (char*)args[1]; + char *output = (char*)args[2]; + + int input_1_stride_m = steps[3]; /* strides for the core dimensions */ + int input_1_stride_n = steps[4]; /* are added after the non-core */ + int input_2_strides_n = steps[5]; /* steps */ + int input_2_strides_p = steps[6]; + int output_strides_n = steps[7]; + int output_strides_p = steps[8]; + + int m = dimension[1]; /* core dimensions are added after */ + int n = dimension[2]; /* the main dimension; order as in */ + int p = dimension[3]; /* signature */ + + int i; + + for (i = 0; i < dimensions[0]; ++i) { + matmul_for_strided_matrices(input_1, input_2, output, + strides for each array...); + + input_1 += steps[0]; + input_2 += steps[1]; + output += steps[2]; + } +} +``` + +## Interoperability features + +### Sharing multidimensional, typed data + +Suppose you + +1. Write a library than handles (multidimensional) binary data, +2. Want to make it easy to manipulate the data with NumPy, or whatever + other library, +3. ... but would **not** like to have NumPy as a dependency. + +Currently, 3 solutions: + +1. the "old" buffer interface +2. the array interface +3. the "new" buffer interface ({pep}`3118`) + +### The old buffer protocol + +- Only 1-D buffers +- No data type information +- C-level interface; `PyBufferProcs tp_as_buffer` in the type object +- But it's integrated into Python (e.g. strings support it) + +Mini-exercise using [Pillow](https://python-pillow.org/) (Python +Imaging Library): + +:::{admonition} See also + +pilbuffer.py +::: + +::: {exercise-start} +:label: pil-buffer +:class: dropdown +::: + +```{code-cell} +from PIL import Image +data = np.zeros((200, 200, 4), dtype=np.uint8) +data[:, :] = [255, 0, 0, 255] # Red +# In PIL, RGBA images consist of 32-bit integers whose bytes are [RR,GG,BB,AA] +data = data.view(np.int32).squeeze() +img = Image.frombuffer("RGBA", (200, 200), data, "raw", "RGBA", 0, 1) +img.save('test.png') +``` + +**The question** + +What happens if `data` is now modified, and `img` saved again? + +::: {exercise-end} +::: + +### The old buffer protocol + +Show how to exchange data between numpy and a library that only knows +the buffer interface: + +```{code-cell} +# Make a sample image, RGBA format +x = np.zeros((200, 200, 4), dtype=np.uint8) +x[:, :, 0] = 255 # red +x[:, :, 3] = 255 # opaque + +data_i32 = x.view(np.int32) # Check that you understand why this is OK! + +img = Image.frombuffer("RGBA", (200, 200), data_i32) +img.save("test_red.png") + +# Modify the original data, and save again. +x[:, :, 1] = 255 +img.save("test_recolored.png") +``` + +::: {image} test_red.png +::: + +::: {image} test_recolored.png +::: + +### Array interface protocol + +- Multidimensional buffers +- Data type information present +- NumPy-specific approach; slowly deprecated (but not going away) +- Not integrated in Python otherwise + +:::{admonition} See also + +Documentation: + +::: + +```{code-cell} +x = np.array([[1, 2], [3, 4]]) +x.__array_interface__ +``` + +```{code-cell} +:tags: [hide-input] + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import os +if not os.path.exists('data'): os.mkdir('data') +plt.imsave('data/test.png', data) +``` + +```{code-cell} +from PIL import Image +img = Image.open('data/test.png') +img.__array_interface__ +``` + +```{code-cell} +x = np.asarray(img) +x.shape +``` + +:::{note} + +A more C-friendly variant of the array interface is also defined. + +::: + +(array-siblings)= + +## Array siblings: {class}`chararray`, {class}`MaskedArray` + +### {class}`chararray `: vectorized string operations + +```{code-cell} +x = np.char.asarray(['a', ' bbb', ' ccc']) +x +``` + +```{code-cell} +x.upper() +``` + +### {class}`MaskedArray ` missing data + +Masked arrays are arrays that may have missing or invalid entries. + +For example, suppose we have an array where the fourth entry is invalid: + +```{code-cell} +x = np.array([1, 2, 3, -99, 5]) +``` + +One way to describe this is to create a masked array: + +```{code-cell} +mx = np.ma.MaskedArray(x, mask=[0, 0, 0, 1, 0]) +mx +``` + +Masked mean ignores masked data: + +```{code-cell} +mx.mean() +``` + +```{code-cell} +np.mean(mx) +``` + +:::{warning} +Not all NumPy functions respect masks, for instance +`np.dot`, so check the return types. +::: + +The `MaskedArray` returns a **view** to the original array: + +```{code-cell} +mx[1] = 9 +x +``` + +#### The mask + +You can modify the mask by assigning: + +```{code-cell} +mx[1] = np.ma.masked +mx +``` + +The mask is cleared on assignment: + +```{code-cell} +mx[1] = 9 +mx +``` + +The mask is also available directly: + +```{code-cell} +mx.mask +``` + +The masked entries can be filled with a given value to get an usual +array back: + +```{code-cell} +x2 = mx.filled(-1) +x2 +``` + +The mask can also be cleared: + +```{code-cell} +mx.mask = np.ma.nomask +mx +``` + +#### Domain-aware functions + +The masked array package also contains domain-aware functions: + +```{code-cell} +np.ma.log(np.array([1, 2, -1, -2, 3, -5])) +``` + +:::{note} + +Streamlined and more seamless support for dealing with missing data +in arrays is making its way into NumPy 1.7. Stay tuned! + +::: + +**Example: Masked statistics** + +Canadian rangers were distracted when counting hares and lynxes in +1903-1910 and 1917-1918, and got the numbers are wrong. (Carrot +farmers stayed alert, though.) Compute the mean populations over +time, ignoring the invalid numbers. + +```{code-cell} +data = np.loadtxt('data/populations.txt') +populations = np.ma.MaskedArray(data[:,1:]) +year = data[:, 0] +``` + +```{code-cell} +bad_years = (((year >= 1903) & (year <= 1910)) + | ((year >= 1917) & (year <= 1918))) +# '&' means 'and' and '|' means 'or' +populations[bad_years, 0] = np.ma.masked +populations[bad_years, 1] = np.ma.masked +``` + +```{code-cell} +populations.mean(axis=0) +``` + +```{code-cell} +populations.std(axis=0) +``` + +Note that Matplotlib knows about masked arrays: + +```{code-cell} +plt.plot(year, populations, 'o-') +``` + +### `np.recarray`: purely convenience + +```{code-cell} +arr = np.array([('a', 1), ('b', 2)], dtype=[('x', 'S1'), ('y', int)]) +arr2 = arr.view(np.recarray) +arr2.x +``` + +```{code-cell} +arr2.y +``` + +## Summary + +- Anatomy of the ndarray: data, dtype, strides. +- Universal functions: elementwise operations, how to make new ones +- Ndarray subclasses +- Various buffer interfaces for integration with other tools +- Recent additions: PEP 3118, generalized ufuncs + +## Contributing to NumPy/SciPy + +Get this tutorial: + +### Why + +- "There's a bug?" +- "I don't understand what this is supposed to do?" +- "I have this fancy code. Would you like to have it?" +- "I'd like to help! What can I do?" + +### Reporting bugs + +- Bug tracker (prefer **this**) + + - + - + - Click the "Sign up" link to get an account + +- Mailing lists () + + - If you're unsure + - No replies in a week or so? Just file a bug ticket. + +#### Good bug report + +```text +Title: numpy.random.permutations fails for non-integer arguments + +I'm trying to generate random permutations, using numpy.random.permutations + +When calling numpy.random.permutation with non-integer arguments +it fails with a cryptic error message:: + + >>> rng.permutation(12) + array([ 2, 6, 4, 1, 8, 11, 10, 5, 9, 3, 7, 0]) + >>> rng.permutation(12.) + Traceback (most recent call last): + File "", line 1, in + File "_generator.pyx", line 4844, in numpy.random._generator.Generator.permutation + numpy.exceptions.AxisError: axis 0 is out of bounds for array of dimension 0 + +This also happens with long arguments, and so +np.random.permutation(X.shape[0]) where X is an array fails on 64 +bit windows (where shape is a tuple of longs). + +It would be great if it could cast to integer or at least raise a +proper error for non-integer types. + +I'm using NumPy 1.4.1, built from the official tarball, on Windows +64 with Visual studio 2008, on Python.org 64-bit Python. +``` + +0. What are you trying to do? +1. **Small code snippet reproducing the bug** (if possible) + + - What actually happens + - What you'd expect + +2. Platform (Windows / Linux / OSX, 32/64 bits, x86/PPC, ...) +3. Version of NumPy/SciPy + +```{code-cell} +print(np.__version__) +``` + +**Check that the following is what you expect** + +```{code-cell} +print(np.__file__) +``` + +In case you have old/broken NumPy installations lying around. + +If unsure, try to remove existing NumPy installations, and reinstall... + +### Contributing to documentation + +1. Documentation editor + + - + + - Registration + + - Register an account + + - Subscribe to `scipy-dev` mailing list (subscribers-only) + + - Problem with mailing lists: you get mail + + - But: **you can turn mail delivery off** + + - "change your subscription options", at the bottom of + + + + - Send a mail @ `scipy-dev` mailing list; ask for activation: + + ```text + To: scipy-dev@scipy.org + + Hi, + + I'd like to edit NumPy/SciPy docstrings. My account is XXXXX + + Cheers, + N. N. + ``` + + - Check the style guide: + + - + - Don't be intimidated; to fix a small thing, just fix it + + - Edit + +2. Edit sources and send patches (as for bugs) + +3. Complain on the mailing list + +### Contributing features + +The contribution of features is documented on + +### How to help, in general + +- Bug fixes always welcome! + + - What irks you most + - Browse the tracker + +- Documentation work + + - API docs: improvements to docstrings + + - Know some SciPy module well? + + - _User guide_ + + - + +- Ask on communication channels: + + - `numpy-discussion` list + - `scipy-dev` list diff --git a/advanced/advanced_numpy/index.rst b/advanced/advanced_numpy/index.rst deleted file mode 100644 index d7e1d11ff..000000000 --- a/advanced/advanced_numpy/index.rst +++ /dev/null @@ -1,1669 +0,0 @@ -.. For doctests - >>> import numpy as np - >>> rng = np.random.default_rng(27446968) - >>> # For doctest on headless environments - >>> import matplotlib.pyplot as plt - -.. _advanced_numpy: - -============== -Advanced NumPy -============== - -**Author**: *Pauli Virtanen* - -NumPy is at the base of Python's scientific stack of tools. Its purpose -to implement efficient operations on many items in a block of memory. -Understanding how it works in detail helps in making efficient use of its -flexibility, taking useful shortcuts. - -This section covers: - -- Anatomy of NumPy arrays, and its consequences. Tips and - tricks. - -- Universal functions: what, why, and what to do if you want - a new one. - -- Integration with other tools: NumPy offers several ways to - wrap any data in an ndarray, without unnecessary copies. - -- Recently added features, and what's in them: PEP - 3118 buffers, generalized ufuncs, ... - -.. currentmodule:: numpy - -.. topic:: Prerequisites - - * NumPy - * Cython - * Pillow (Python imaging library, used in a couple of examples) - -.. contents:: Chapter contents - :local: - :depth: 2 - -.. tip:: - - In this section, NumPy will be imported as follows:: - - >>> import numpy as np - - -Life of ndarray -=============== - -It's... -------- - -**ndarray** = - - block of memory + indexing scheme + data type descriptor - - - raw data - - how to locate an element - - how to interpret an element - -.. image:: threefundamental.png - -.. code-block:: c - - typedef struct PyArrayObject { - PyObject_HEAD - - /* Block of memory */ - char *data; - - /* Data type descriptor */ - PyArray_Descr *descr; - - /* Indexing scheme */ - int nd; - npy_intp *dimensions; - npy_intp *strides; - - /* Other stuff */ - PyObject *base; - int flags; - PyObject *weakreflist; - } PyArrayObject; - - -Block of memory ---------------- - ->>> x = np.array([1, 2, 3], dtype=np.int32) ->>> x.data -<... at ...> ->>> bytes(x.data) -b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00' - -Memory address of the data: - ->>> x.__array_interface__['data'][0] # doctest: +SKIP -64803824 - -The whole ``__array_interface__``: - ->>> x.__array_interface__ -{'data': (..., False), 'strides': None, 'descr': [('', '` may share the same memory:: - - >>> x = np.array([1, 2, 3, 4]) - >>> y = x[:-1] - >>> x[0] = 9 - >>> y - array([9, 2, 3]) - -Memory does not need to be owned by an :class:`ndarray`:: - - >>> x = b'1234' - -x is a string (in Python 3 a bytes), we can represent its data as an -array of ints:: - - >>> y = np.frombuffer(x, dtype=np.int8) - >>> y.data - <... at ...> - >>> y.base is x - True - - >>> y.flags - C_CONTIGUOUS : True - F_CONTIGUOUS : True - OWNDATA : False - WRITEABLE : False - ALIGNED : True - WRITEBACKIFCOPY : False - - -The ``owndata`` and ``writeable`` flags indicate status of the memory -block. - -.. seealso:: `array interface `_ - -Data types ----------- - -The descriptor -^^^^^^^^^^^^^^ - -:class:`dtype` describes a single item in the array: - -========= =================================================== -type **scalar type** of the data, one of: - - int8, int16, float64, *et al.* (fixed size) - - str, unicode, void (flexible size) - -itemsize **size** of the data block -byteorder **byte order**: big-endian ``>`` / little-endian ``<`` / not applicable ``|`` -fields sub-dtypes, if it's a **structured data type** -shape shape of the array, if it's a **sub-array** -========= =================================================== - ->>> np.dtype(int).type - ->>> np.dtype(int).itemsize -8 ->>> np.dtype(int).byteorder -'=' - - -Example: reading ``.wav`` files -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The ``.wav`` file header: - -================ ========================================== -chunk_id ``"RIFF"`` -chunk_size 4-byte unsigned little-endian integer -format ``"WAVE"`` -fmt_id ``"fmt "`` -fmt_size 4-byte unsigned little-endian integer -audio_fmt 2-byte unsigned little-endian integer -num_channels 2-byte unsigned little-endian integer -sample_rate 4-byte unsigned little-endian integer -byte_rate 4-byte unsigned little-endian integer -block_align 2-byte unsigned little-endian integer -bits_per_sample 2-byte unsigned little-endian integer -data_id ``"data"`` -data_size 4-byte unsigned little-endian integer -================ ========================================== - -- 44-byte block of raw data (in the beginning of the file) -- ... followed by ``data_size`` bytes of actual sound data. - -The ``.wav`` file header as a NumPy *structured* data type:: - - >>> wav_header_dtype = np.dtype([ - ... ("chunk_id", (bytes, 4)), # flexible-sized scalar type, item size 4 - ... ("chunk_size", ">> wav_header_dtype['format'] - dtype('S4') - >>> wav_header_dtype.fields - mappingproxy({'chunk_id': (dtype('S4'), 0), 'chunk_size': (dtype('uint32'), 4), 'format': (dtype('S4'), 8), 'fmt_id': (dtype('S4'), 12), 'fmt_size': (dtype('uint32'), 16), 'audio_fmt': (dtype('uint16'), 20), 'num_channels': (dtype('uint16'), 22), 'sample_rate': (dtype('uint32'), 24), 'byte_rate': (dtype('uint32'), 28), 'block_align': (dtype('uint16'), 32), 'bits_per_sample': (dtype('uint16'), 34), 'data_id': (dtype(('S1', (2, 2))), 36), 'data_size': (dtype('uint32'), 40)}) - >>> wav_header_dtype.fields['format'] - (dtype('S4'), 8) - -- The first element is the sub-dtype in the structured data, corresponding - to the name ``format`` - -- The second one is its offset (in bytes) from the beginning of the item - -.. topic:: Exercise - :class: green - - Mini-exercise, make a "sparse" dtype by using offsets, and only some - of the fields:: - - >>> wav_header_dtype = np.dtype(dict( - ... names=['format', 'sample_rate', 'data_id'], - ... offsets=[offset_1, offset_2, offset_3], # counted from start of structure in bytes - ... formats=list of dtypes for each of the fields, - ... )) # doctest: +SKIP - - and use that to read the sample rate, and ``data_id`` (as sub-array). - ->>> f = open('data/test.wav', 'r') ->>> wav_header = np.fromfile(f, dtype=wav_header_dtype, count=1) ->>> f.close() ->>> print(wav_header) # doctest: +SKIP -[ ('RIFF', 17402L, 'WAVE', 'fmt ', 16L, 1, 1, 16000L, 32000L, 2, 16, [['d', 'a'], ['t', 'a']], 17366L)] ->>> wav_header['sample_rate'] -array([16000], dtype=uint32) - -Let's try accessing the sub-array: - ->>> wav_header['data_id'] # doctest: +SKIP -array([[['d', 'a'], - ['t', 'a']]], - dtype='|S1') ->>> wav_header.shape -(1,) ->>> wav_header['data_id'].shape -(1, 2, 2) - -When accessing sub-arrays, the dimensions get added to the end! - -.. note:: - - There are existing modules such as ``wavfile``, ``audiolab``, - etc. for loading sound data... - - -Casting and re-interpretation/views -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -**casting** - - - on assignment - - on array construction - - on arithmetic - - etc. - - and manually: ``.astype(dtype)`` - -**data re-interpretation** - - - manually: ``.view(dtype)`` - - -Casting -........ - -- Casting in arithmetic, in nutshell: - - - only type (not value!) of operands matters - - - largest "safe" type able to represent both is picked - - - scalars can "lose" to arrays in some situations - -- Casting in general copies data:: - - >>> x = np.array([1, 2, 3, 4], dtype=float) - >>> x - array([1., 2., 3., 4.]) - >>> y = x.astype(np.int8) - >>> y - array([1, 2, 3, 4], dtype=int8) - >>> y + 1 - array([2, 3, 4, 5], dtype=int8) - >>> y + 256 - Traceback (most recent call last): - File "", line 1, in - OverflowError: Python integer 256 out of bounds for int8 - >>> y + 256.0 - array([257., 258., 259., 260.]) - >>> y + np.array([256], dtype=np.int32) - array([257, 258, 259, 260], dtype=int32) - -- Casting on setitem: dtype of the array is not changed on item assignment:: - - >>> y[:] = y + 1.5 - >>> y - array([2, 3, 4, 5], dtype=int8) - -.. note:: - - Exact rules: see `NumPy documentation - `_ - - -Re-interpretation / viewing -............................ - -- Data block in memory (4 bytes) - - ========== ==== ========== ==== ========== ==== ========== - ``0x01`` || ``0x02`` || ``0x03`` || ``0x04`` - ========== ==== ========== ==== ========== ==== ========== - - - 4 of uint8, OR, - - 4 of int8, OR, - - 2 of int16, OR, - - 1 of int32, OR, - - 1 of float32, OR, - - ... - - How to switch from one to another? - -1. Switch the dtype: - - >>> x = np.array([1, 2, 3, 4], dtype=np.uint8) - >>> x.dtype = ">> x - array([ 513, 1027], dtype=int16) - >>> 0x0201, 0x0403 - (513, 1027) - - ========== ========== ==== ========== ========== - ``0x01`` ``0x02`` || ``0x03`` ``0x04`` - ========== ========== ==== ========== ========== - - - .. note:: little-endian: least significant byte is on the *left* in memory - - -2. Create a new view of type ``uint32``, shorthand ``i4``: - - >>> y = x.view(">> y - array([67305985], dtype=int32) - >>> 0x04030201 - 67305985 - - ========== ========== ========== ========== - ``0x01`` ``0x02`` ``0x03`` ``0x04`` - ========== ========== ========== ========== - -.. note:: - - - ``.view()`` makes *views*, does not copy (or alter) the memory block - - only changes the dtype (and adjusts array shape):: - - >>> x[1] = 5 - >>> y - array([328193], dtype=int32) - >>> y.base is x - True - -.. rubric:: Mini-exercise: data re-interpretation - -.. seealso:: view-colors.py - -You have RGBA data in an array:: - - >>> x = np.zeros((10, 10, 4), dtype=np.int8) - >>> x[:, :, 0] = 1 - >>> x[:, :, 1] = 2 - >>> x[:, :, 2] = 3 - >>> x[:, :, 3] = 4 - -where the last three dimensions are the R, B, and G, and alpha channels. - -How to make a (10, 10) structured array with field names 'r', 'g', 'b', 'a' -without copying data? :: - - >>> y = ... # doctest: +SKIP - - >>> assert (y['r'] == 1).all() # doctest: +SKIP - >>> assert (y['g'] == 2).all() # doctest: +SKIP - >>> assert (y['b'] == 3).all() # doctest: +SKIP - >>> assert (y['a'] == 4).all() # doctest: +SKIP - -*Solution* - - .. raw:: html - - ... - - -.. warning:: - - Another two arrays, each occupying exactly 4 bytes of memory: - - >>> x = np.array([[1, 3], [2, 4]], dtype=np.uint8) - >>> x - array([[1, 3], - [2, 4]], dtype=uint8) - >>> y = x.transpose() - >>> y - array([[1, 2], - [3, 4]], dtype=uint8) - - We view the elements of ``x`` (1 byte each) as ``int16`` (2 bytes each): - - >>> x.view(np.int16) - array([[ 769], - [1026]], dtype=int16) - - What is happening here? Take a look at the bytes stored in memory - by ``x``: - - >>> x.tobytes() - b'\x01\x03\x02\x04' - - The ``\x`` stands for heXadecimal, so what we are seeing is:: - - 0x01 0x03 0x02 0x04 - - We ask NumPy to interpret these bytes as elements of dtype - ``int16``—each of which occupies *two* bytes in memory. Therefore, - ``0x01 0x03`` becomes the first ``uint16`` and ``0x02 0x04`` the - second. - - You may then expect to see ``0x0103`` (259, when converting from - hexadecimal to decimal) as the first result. But your computer - likely stores most significant bytes first, and as such reads the - number as ``0x0301`` or 769 (go on and type `0x0301` into your Python - terminal to verify). - - We can do the same on a copy of ``y`` (why doesn't it work on ``y`` - directly?): - - >>> y.copy().view(np.int16) - array([[ 513], - [1027]], dtype=int16) - - Can you explain these numbers, 513 and 1027, as well as the output - shape of the resulting array? - - -Indexing scheme: strides ------------------------- - -Main point -^^^^^^^^^^ - -**The question**:: - - >>> x = np.array([[1, 2, 3], - ... [4, 5, 6], - ... [7, 8, 9]], dtype=np.int8) - >>> x.tobytes('A') - b'\x01\x02\x03\x04\x05\x06\x07\x08\t' - - At which byte in ``x.data`` does the item ``x[1, 2]`` begin? - -**The answer** (in NumPy) - - - **strides**: the number of bytes to jump to find the next element - - 1 stride per dimension - -.. code-block:: pycon - - >>> x.strides - (3, 1) - >>> byte_offset = 3 * 1 + 1 * 2 # to find x[1, 2] - >>> x.flat[byte_offset] - np.int8(6) - >>> x[1, 2] - np.int8(6) - -simple, **flexible** - - -C and Fortran order -..................... - -.. note:: - The Python built-in :py:class:`bytes` returns bytes in C-order by default - which can cause confusion when trying to inspect memory layout. We use - :meth:`numpy.ndarray.tobytes` with ``order=A`` instead, which preserves - the C or F ordering of the bytes in memory. - -:: - - >>> x = np.array([[1, 2, 3], - ... [4, 5, 6]], dtype=np.int16, order='C') - >>> x.strides - (6, 2) - >>> x.tobytes('A') - b'\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06\x00' - -* Need to jump 6 bytes to find the next row -* Need to jump 2 bytes to find the next column - -:: - - >>> y = np.array(x, order='F') - >>> y.strides - (2, 4) - >>> y.tobytes('A') - b'\x01\x00\x04\x00\x02\x00\x05\x00\x03\x00\x06\x00' - -* Need to jump 2 bytes to find the next row -* Need to jump 4 bytes to find the next column - - -- Similarly to higher dimensions: - - - C: last dimensions vary fastest (= smaller strides) - - F: first dimensions vary fastest - - .. math:: - - \mathrm{shape} &= (d_1, d_2, ..., d_n) - \\ - \mathrm{strides} &= (s_1, s_2, ..., s_n) - \\ - s_j^C &= d_{j+1} d_{j+2} ... d_{n} \times \mathrm{itemsize} - \\ - s_j^F &= d_{1} d_{2} ... d_{j-1} \times \mathrm{itemsize} - - -.. note:: - - Now we can understand the behavior of ``.view()``: - - >>> y = np.array([[1, 3], [2, 4]], dtype=np.uint8).transpose() - >>> x = y.copy() - - Transposition does not affect the memory layout of the data, only strides - - >>> x.strides - (2, 1) - >>> y.strides - (1, 2) - - >>> x.tobytes('A') - b'\x01\x02\x03\x04' - >>> y.tobytes('A') - b'\x01\x03\x02\x04' - - - the results are different when interpreted as 2 of int16 - - ``.copy()`` creates new arrays in the C order (by default) - -.. note:: **In-place operations with views** - - Prior to NumPy version 1.13, in-place operations with views could result in - **incorrect** results for large arrays. - Since :doc:`version 1.13 `, - NumPy includes checks for *memory overlap* to - guarantee that results are consistent with the non in-place version - (e.g. ``a = a + a.T`` produces the same result as ``a += a.T``). - Note however that this may result in the data being copied (as if using - ``a += a.T.copy()``), ultimately resulting in more memory being used than - might otherwise be expected for in-place operations! - - -Slicing with integers -....................... - -- *Everything* can be represented by changing only ``shape``, ``strides``, - and possibly adjusting the ``data`` pointer! -- Never makes copies of the data - -:: - - >>> x = np.array([1, 2, 3, 4, 5, 6], dtype=np.int32) - >>> y = x[::-1] - >>> y - array([6, 5, 4, 3, 2, 1], dtype=int32) - >>> y.strides - (-4,) - - >>> y = x[2:] - >>> y.__array_interface__['data'][0] - x.__array_interface__['data'][0] - 8 - - >>> x = np.zeros((10, 10, 10), dtype=float) - >>> x.strides - (800, 80, 8) - >>> x[::2,::3,::4].strides - (1600, 240, 32) - -- Similarly, transposes never make copies (it just swaps strides):: - - >>> x = np.zeros((10, 10, 10), dtype=float) - >>> x.strides - (800, 80, 8) - >>> x.T.strides - (8, 80, 800) - -But: not all reshaping operations can be represented by playing with -strides:: - - >>> a = np.arange(6, dtype=np.int8).reshape(3, 2) - >>> b = a.T - >>> b.strides - (1, 2) - -So far, so good. However:: - - >>> bytes(a.data) - b'\x00\x01\x02\x03\x04\x05' - >>> b - array([[0, 2, 4], - [1, 3, 5]], dtype=int8) - >>> c = b.reshape(3*2) - >>> c - array([0, 2, 4, 1, 3, 5], dtype=int8) - -Here, there is no way to represent the array ``c`` given one stride -and the block of memory for ``a``. Therefore, the ``reshape`` -operation needs to make a copy here. - -.. _stride-manipulation-label: - -Example: fake dimensions with strides -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. rubric:: Stride manipulation - ->>> from numpy.lib.stride_tricks import as_strided ->>> help(as_strided) -Help on function as_strided in module numpy.lib.stride_tricks: -... - -.. warning:: - - ``as_strided`` does **not** check that you stay inside the memory - block bounds... - ->>> x = np.array([1, 2, 3, 4], dtype=np.int16) ->>> as_strided(x, strides=(2*2, ), shape=(2, )) -array([1, 3], dtype=int16) ->>> x[::2] -array([1, 3], dtype=int16) - - -.. seealso:: stride-fakedims.py - -**Exercise** - - :: - - array([1, 2, 3, 4], dtype=np.int8) - - -> array([[1, 2, 3, 4], - [1, 2, 3, 4], - [1, 2, 3, 4]], dtype=np.int8) - - using only ``as_strided``.:: - - Hint: byte_offset = stride[0]*index[0] + stride[1]*index[1] + ... - -*Spoiler* - - .. raw:: html - - ... - - - -.. _broadcasting_advanced: - -Broadcasting -^^^^^^^^^^^^ - -- Doing something useful with it: outer product - of ``[1, 2, 3, 4]`` and ``[5, 6, 7]`` - ->>> x = np.array([1, 2, 3, 4], dtype=np.int16) ->>> x2 = as_strided(x, strides=(0, 1*2), shape=(3, 4)) ->>> x2 -array([[1, 2, 3, 4], - [1, 2, 3, 4], - [1, 2, 3, 4]], dtype=int16) - ->>> y = np.array([5, 6, 7], dtype=np.int16) ->>> y2 = as_strided(y, strides=(1*2, 0), shape=(3, 4)) ->>> y2 -array([[5, 5, 5, 5], - [6, 6, 6, 6], - [7, 7, 7, 7]], dtype=int16) - ->>> x2 * y2 -array([[ 5, 10, 15, 20], - [ 6, 12, 18, 24], - [ 7, 14, 21, 28]], dtype=int16) - -.. rubric:: ... seems somehow familiar ... - ->>> x = np.array([1, 2, 3, 4], dtype=np.int16) ->>> y = np.array([5, 6, 7], dtype=np.int16) ->>> x[np.newaxis,:] * y[:,np.newaxis] -array([[ 5, 10, 15, 20], - [ 6, 12, 18, 24], - [ 7, 14, 21, 28]], dtype=int16) - -- Internally, array **broadcasting** is indeed implemented using 0-strides. - - -More tricks: diagonals -^^^^^^^^^^^^^^^^^^^^^^ - -.. seealso:: stride-diagonals.py - -**Challenge** - - * Pick diagonal entries of the matrix: (assume C memory order):: - - >>> x = np.array([[1, 2, 3], - ... [4, 5, 6], - ... [7, 8, 9]], dtype=np.int32) - - >>> x_diag = as_strided(x, shape=(3,), strides=(???,)) # doctest: +SKIP - - * Pick the first super-diagonal entries ``[2, 6]``. - - * And the sub-diagonals? - - (Hint to the last two: slicing first moves the point where striding - starts from.) - -*Solution* - - .. raw:: html - - ... - - -.. seealso:: stride-diagonals.py - -**Challenge** - - Compute the tensor trace:: - - >>> x = np.arange(5*5*5*5).reshape(5, 5, 5, 5) - >>> s = 0 - >>> for i in range(5): - ... for j in range(5): - ... s += x[j, i, j, i] - - by striding, and using ``sum()`` on the result. :: - - >>> y = as_strided(x, shape=(5, 5), strides=(TODO, TODO)) # doctest: +SKIP - >>> s2 = ... # doctest: +SKIP - >>> assert s == s2 # doctest: +SKIP - -*Solution* - - .. raw:: html - - ... - - - -.. _cache_effects: - -CPU cache effects -^^^^^^^^^^^^^^^^^ - -Memory layout can affect performance: - -.. ipython:: - - In [1]: x = np.zeros((20000,)) - - In [2]: y = np.zeros((20000*67,))[::67] - - In [3]: x.shape, y.shape - ((20000,), (20000,)) - - In [4]: %timeit x.sum() - 100000 loops, best of 3: 0.180 ms per loop - - In [5]: %timeit y.sum() - 100000 loops, best of 3: 2.34 ms per loop - - In [6]: x.strides, y.strides - ((8,), (536,)) - - -.. rubric:: Smaller strides are faster? - -.. image:: cpu-cacheline.png - -- CPU pulls data from main memory to its cache in blocks - -- If many array items consecutively operated on fit in a single block (small stride): - - - :math:`\Rightarrow` fewer transfers needed - - - :math:`\Rightarrow` faster - -.. seealso:: - - * `numexpr `_ is designed to mitigate - cache effects when evaluating array expressions. - - * `numba `_ is a compiler for Python code, - that is aware of numpy arrays. - -Findings in dissection ----------------------- - -.. image:: threefundamental.png - -- *memory block*: may be shared, ``.base``, ``.data`` - -- *data type descriptor*: structured data, sub-arrays, byte order, - casting, viewing, ``.astype()``, ``.view()`` - -- *strided indexing*: strides, C/F-order, slicing w/ integers, - ``as_strided``, broadcasting, stride tricks, ``diag``, CPU cache - coherence - - -Universal functions -=================== - -What they are? --------------- - -- Ufunc performs and elementwise operation on all elements of an array. - - Examples:: - - np.add, np.subtract, scipy.special.*, ... - -- Automatically support: broadcasting, casting, ... - -- The author of an ufunc only has to supply the elementwise operation, - NumPy takes care of the rest. - -- The elementwise operation needs to be implemented in C (or, e.g., Cython) - - -Parts of an Ufunc -^^^^^^^^^^^^^^^^^^ - -1. Provided by user - - .. sourcecode:: c - - void ufunc_loop(void **args, int *dimensions, int *steps, void *data) - { - /* - * int8 output = elementwise_function(int8 input_1, int8 input_2) - * - * This function must compute the ufunc for many values at once, - * in the way shown below. - */ - char *input_1 = (char*)args[0]; - char *input_2 = (char*)args[1]; - char *output = (char*)args[2]; - int i; - - for (i = 0; i < dimensions[0]; ++i) { - *output = elementwise_function(*input_1, *input_2); - input_1 += steps[0]; - input_2 += steps[1]; - output += steps[2]; - } - } - -2. The NumPy part, built by - - .. sourcecode:: c - - char types[3] - - types[0] = NPY_BYTE /* type of first input arg */ - types[1] = NPY_BYTE /* type of second input arg */ - types[2] = NPY_BYTE /* type of third input arg */ - - PyObject *python_ufunc = PyUFunc_FromFuncAndData( - ufunc_loop, - NULL, - types, - 1, /* ntypes */ - 2, /* num_inputs */ - 1, /* num_outputs */ - identity_element, - name, - docstring, - unused) - - - A ufunc can also support multiple different input-output type - combinations. - -Making it easier -^^^^^^^^^^^^^^^^ - -3. ``ufunc_loop`` is of very generic form, and NumPy provides - pre-made ones - - ================ ======================================================= - ``PyUfunc_f_f`` ``float elementwise_func(float input_1)`` - ``PyUfunc_ff_f`` ``float elementwise_func(float input_1, float input_2)`` - ``PyUfunc_d_d`` ``double elementwise_func(double input_1)`` - ``PyUfunc_dd_d`` ``double elementwise_func(double input_1, double input_2)`` - ``PyUfunc_D_D`` ``elementwise_func(npy_cdouble *input, npy_cdouble* output)`` - ``PyUfunc_DD_D`` ``elementwise_func(npy_cdouble *in1, npy_cdouble *in2, npy_cdouble* out)`` - ================ ======================================================= - - * Only ``elementwise_func`` needs to be supplied - - * ... except when your elementwise function is not in one of the above forms - -Exercise: building an ufunc from scratch ----------------------------------------- - -The Mandelbrot fractal is defined by the iteration - -.. math:: - - z \leftarrow z^2 + c - -where :math:`c = x + i y` is a complex number. This iteration is -repeated -- if :math:`z` stays finite no matter how long the iteration -runs, :math:`c` belongs to the Mandelbrot set. - -- Make ufunc called ``mandel(z0, c)`` that computes:: - - z = z0 - for k in range(iterations): - z = z*z + c - - say, 100 iterations or until ``z.real**2 + z.imag**2 > 1000``. - Use it to determine which `c` are in the Mandelbrot set. - -- Our function is a simple one, so make use of the ``PyUFunc_*`` helpers. - -- Write it in Cython - -.. seealso:: mandel.pyx, mandelplot.py - -.. only:: latex - - .. literalinclude:: examples/mandel.pyx - -Reminder: some pre-made Ufunc loops: - -================ ======================================================= -``PyUfunc_f_f`` ``float elementwise_func(float input_1)`` -``PyUfunc_ff_f`` ``float elementwise_func(float input_1, float input_2)`` -``PyUfunc_d_d`` ``double elementwise_func(double input_1)`` -``PyUfunc_dd_d`` ``double elementwise_func(double input_1, double input_2)`` -``PyUfunc_D_D`` ``elementwise_func(complex_double *input, complex_double* output)`` -``PyUfunc_DD_D`` ``elementwise_func(complex_double *in1, complex_double *in2, complex_double* out)`` -================ ======================================================= - -Type codes:: - - NPY_BOOL, NPY_BYTE, NPY_UBYTE, NPY_SHORT, NPY_USHORT, NPY_INT, NPY_UINT, - NPY_LONG, NPY_ULONG, NPY_LONGLONG, NPY_ULONGLONG, NPY_FLOAT, NPY_DOUBLE, - NPY_LONGDOUBLE, NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE, NPY_DATETIME, - NPY_TIMEDELTA, NPY_OBJECT, NPY_STRING, NPY_UNICODE, NPY_VOID - - -Solution: building an ufunc from scratch ----------------------------------------- - -.. literalinclude:: examples/mandel-answer.pyx - :language: python - -.. literalinclude:: examples/mandelplot.py - :language: python - -.. image:: mandelbrot.png - -.. note:: - - Most of the boilerplate could be automated by these Cython modules: - - https://github.com/cython/cython/wiki/MarkLodato-CreatingUfuncs - -.. rubric:: Several accepted input types - -E.g. supporting both single- and double-precision versions - -.. sourcecode:: cython - - cdef void mandel_single_point(double complex *z_in, - double complex *c_in, - double complex *z_out) nogil: - ... - - cdef void mandel_single_point_singleprec(float complex *z_in, - float complex *c_in, - float complex *z_out) nogil: - ... - - cdef PyUFuncGenericFunction loop_funcs[2] - cdef char input_output_types[3*2] - cdef void *elementwise_funcs[1*2] - - loop_funcs[0] = PyUFunc_DD_D - input_output_types[0] = NPY_CDOUBLE - input_output_types[1] = NPY_CDOUBLE - input_output_types[2] = NPY_CDOUBLE - elementwise_funcs[0] = mandel_single_point - - loop_funcs[1] = PyUFunc_FF_F - input_output_types[3] = NPY_CFLOAT - input_output_types[4] = NPY_CFLOAT - input_output_types[5] = NPY_CFLOAT - elementwise_funcs[1] = mandel_single_point_singleprec - - mandel = PyUFunc_FromFuncAndData( - loop_func, - elementwise_funcs, - input_output_types, - 2, # number of supported input types <---------------- - 2, # number of input args - 1, # number of output args - 0, # `identity` element, never mind this - "mandel", # function name - "mandel(z, c) -> computes iterated z*z + c", # docstring - 0 # unused - ) - - - -Generalized ufuncs ------------------- - -**ufunc** - - ``output = elementwise_function(input)`` - - Both ``output`` and ``input`` can be a single array element only. - -**generalized ufunc** - - ``output`` and ``input`` can be arrays with a fixed number of dimensions - - For example, matrix trace (sum of diag elements):: - - input shape = (n, n) - output shape = () i.e. scalar - - (n, n) -> () - - Matrix product:: - - input_1 shape = (m, n) - input_2 shape = (n, p) - output shape = (m, p) - - (m, n), (n, p) -> (m, p) - - * This is called the *"signature"* of the generalized ufunc - * The dimensions on which the g-ufunc acts, are *"core dimensions"* - -.. rubric:: Status in NumPy - -* g-ufuncs are in NumPy already ... -* new ones can be created with ``PyUFunc_FromFuncAndDataAndSignature`` -* most linear-algebra functions are implemented as g-ufuncs to enable working - with stacked arrays:: - - >>> import numpy as np - >>> rng = np.random.default_rng(27446968) - >>> np.linalg.det(rng.random((3, 5, 5))) - array([ 0.01829761, -0.0077266 , -0.05336566]) - >>> np.linalg._umath_linalg.det.signature - '(m,m)->()' - - * matrix multiplication this way could be useful for operating on - many small matrices at once - - * Also see ``tensordot`` and ``einsum`` - -.. The below gufunc examples were from `np.core.umath_tests`, - which is now deprecated. We need another source of example - gufuncs. See the discussion at: - - https://mail.python.org/archives/list/numpy-discussion@python.org/thread/ZG7AUSPYYUNSPQU3YUZS2XCFD7AT3BJP/ - -.. >>> import numpy.core.umath_tests as ut -.. >>> ut.matrix_multiply.signature -.. '(m,n),(n,p)->(m,p)' -.. -.. >>> x = np.ones((10, 2, 4)) -.. >>> y = np.ones((10, 4, 5)) -.. >>> ut.matrix_multiply(x, y).shape -.. (10, 2, 5) - -.. * in both examples the last two dimensions became *core dimensions*, -.. and are modified as per the *signature* -.. * otherwise, the g-ufunc operates "elementwise" - - -.. rubric:: Generalized ufunc loop - -Matrix multiplication ``(m,n),(n,p) -> (m,p)`` - -.. sourcecode:: c - - void gufunc_loop(void **args, int *dimensions, int *steps, void *data) - { - char *input_1 = (char*)args[0]; /* these are as previously */ - char *input_2 = (char*)args[1]; - char *output = (char*)args[2]; - - int input_1_stride_m = steps[3]; /* strides for the core dimensions */ - int input_1_stride_n = steps[4]; /* are added after the non-core */ - int input_2_strides_n = steps[5]; /* steps */ - int input_2_strides_p = steps[6]; - int output_strides_n = steps[7]; - int output_strides_p = steps[8]; - - int m = dimension[1]; /* core dimensions are added after */ - int n = dimension[2]; /* the main dimension; order as in */ - int p = dimension[3]; /* signature */ - - int i; - - for (i = 0; i < dimensions[0]; ++i) { - matmul_for_strided_matrices(input_1, input_2, output, - strides for each array...); - - input_1 += steps[0]; - input_2 += steps[1]; - output += steps[2]; - } - } - - -Interoperability features -========================= - -Sharing multidimensional, typed data ------------------------------------- - -Suppose you - -1. Write a library than handles (multidimensional) binary data, - -2. Want to make it easy to manipulate the data with NumPy, or whatever - other library, - -3. ... but would **not** like to have NumPy as a dependency. - -Currently, 3 solutions: - -1. the "old" buffer interface - -2. the array interface - -3. the "new" buffer interface (:pep:`3118`) - - -The old buffer protocol ------------------------ - -- Only 1-D buffers -- No data type information -- C-level interface; ``PyBufferProcs tp_as_buffer`` in the type object -- But it's integrated into Python (e.g. strings support it) - -Mini-exercise using `Pillow `_ (Python -Imaging Library): - -.. seealso:: pilbuffer.py - ->>> from PIL import Image ->>> data = np.zeros((200, 200, 4), dtype=np.uint8) ->>> data[:, :] = [255, 0, 0, 255] # Red ->>> # In PIL, RGBA images consist of 32-bit integers whose bytes are [RR,GG,BB,AA] ->>> data = data.view(np.int32).squeeze() ->>> img = Image.frombuffer("RGBA", (200, 200), data, "raw", "RGBA", 0, 1) ->>> img.save('test.png') - -**Q:** - - Check what happens if ``data`` is now modified, and ``img`` saved again. - -The old buffer protocol ------------------------ - -.. literalinclude:: examples/pilbuffer-answer.py - :language: python - -.. image:: test.png - -.. image:: test2.png - - -Array interface protocol ------------------------- - -- Multidimensional buffers -- Data type information present -- NumPy-specific approach; slowly deprecated (but not going away) -- Not integrated in Python otherwise - -.. seealso:: - - Documentation: - https://numpy.org/doc/stable/reference/arrays.interface.html - -:: - - >>> x = np.array([[1, 2], [3, 4]]) - >>> x.__array_interface__ # doctest: +SKIP - {'data': (171694552, False), # memory address of data, is readonly? - 'descr': [('', '>> import matplotlib - >>> matplotlib.use('Agg') - >>> import matplotlib.pyplot as plt - >>> import os - >>> if not os.path.exists('data'): os.mkdir('data') - >>> plt.imsave('data/test.png', data) - - -:: - >>> from PIL import Image - >>> img = Image.open('data/test.png') - >>> img.__array_interface__ - {'version': 3, - 'data': ..., - 'shape': (200, 200, 4), - 'typestr': '|u1'} - >>> x = np.asarray(img) - >>> x.shape - (200, 200, 4) - - -.. note:: - - A more C-friendly variant of the array interface is also defined. - -.. _array_siblings: - -Array siblings: :class:`chararray`, :class:`maskedarray` -======================================================== - -:class:`chararray`: vectorized string operations --------------------------------------------------- - ->>> x = np.char.asarray(['a', ' bbb', ' ccc']) ->>> x -chararray(['a', ' bbb', ' ccc'], dtype='>> x.upper() -chararray(['A', ' BBB', ' CCC'], dtype='>> x = np.array([1, 2, 3, -99, 5]) - -One way to describe this is to create a masked array:: - - >>> mx = np.ma.masked_array(x, mask=[0, 0, 0, 1, 0]) - >>> mx - masked_array(data=[1, 2, 3, --, 5], - mask=[False, False, False, True, False], - fill_value=999999) - -Masked mean ignores masked data:: - - >>> mx.mean() - np.float64(2.75) - >>> np.mean(mx) - np.float64(2.75) - -.. warning:: Not all NumPy functions respect masks, for instance - ``np.dot``, so check the return types. - -The ``masked_array`` returns a **view** to the original array:: - - >>> mx[1] = 9 - >>> x - array([ 1, 9, 3, -99, 5]) - -The mask -^^^^^^^^ - -You can modify the mask by assigning:: - - >>> mx[1] = np.ma.masked - >>> mx - masked_array(data=[1, --, 3, --, 5], - mask=[False, True, False, True, False], - fill_value=999999) - - -The mask is cleared on assignment:: - - >>> mx[1] = 9 - >>> mx - masked_array(data=[1, 9, 3, --, 5], - mask=[False, False, False, True, False], - fill_value=999999) - - -The mask is also available directly:: - - >>> mx.mask - array([False, False, False, True, False]) - -The masked entries can be filled with a given value to get an usual -array back:: - - >>> x2 = mx.filled(-1) - >>> x2 - array([ 1, 9, 3, -1, 5]) - -The mask can also be cleared:: - - >>> mx.mask = np.ma.nomask - >>> mx - masked_array(data=[1, 9, 3, -99, 5], - mask=[False, False, False, False, False], - fill_value=999999) - - -Domain-aware functions -^^^^^^^^^^^^^^^^^^^^^^ - -The masked array package also contains domain-aware functions:: - - >>> np.ma.log(np.array([1, 2, -1, -2, 3, -5])) - masked_array(data=[0.0, 0.693147180559..., --, --, 1.098612288668..., --], - mask=[False, False, True, True, False, True], - fill_value=1e+20) - - -.. note:: - - Streamlined and more seamless support for dealing with missing data - in arrays is making its way into NumPy 1.7. Stay tuned! - -.. topic:: Example: Masked statistics - - Canadian rangers were distracted when counting hares and lynxes in - 1903-1910 and 1917-1918, and got the numbers are wrong. (Carrot - farmers stayed alert, though.) Compute the mean populations over - time, ignoring the invalid numbers. :: - - >>> data = np.loadtxt('data/populations.txt') - >>> populations = np.ma.masked_array(data[:,1:]) - >>> year = data[:, 0] - - >>> bad_years = (((year >= 1903) & (year <= 1910)) - ... | ((year >= 1917) & (year <= 1918))) - >>> # '&' means 'and' and '|' means 'or' - >>> populations[bad_years, 0] = np.ma.masked - >>> populations[bad_years, 1] = np.ma.masked - - >>> populations.mean(axis=0) - masked_array(data=[40472.72727272727, 18627.272727272728, 42400.0], - mask=[False, False, False], - fill_value=1e+20) - - >>> populations.std(axis=0) - masked_array(data=[21087.656489006717, 15625.799814240254, 3322.5062255844787], - mask=[False, False, False], - fill_value=1e+20) - - - Note that Matplotlib knows about masked arrays:: - - >>> plt.plot(year, populations, 'o-') - [, ...] - -.. image:: auto_examples/images/sphx_glr_plot_maskedstats_001.png - :width: 50% - :target: auto_examples/plot_maskedstats.html - :align: center - - -:class:`recarray`: purely convenience ---------------------------------------- - ->>> arr = np.array([('a', 1), ('b', 2)], dtype=[('x', 'S1'), ('y', int)]) ->>> arr2 = arr.view(np.recarray) ->>> arr2.x -array([b'a', b'b'], dtype='|S1') ->>> arr2.y -array([1, 2]) - - -Summary -======= - -* Anatomy of the ndarray: data, dtype, strides. - -* Universal functions: elementwise operations, how to make new ones - -* Ndarray subclasses - -* Various buffer interfaces for integration with other tools - -* Recent additions: PEP 3118, generalized ufuncs - - -Contributing to NumPy/SciPy -=========================== - - Get this tutorial: https://www.euroscipy.org/talk/882 - -Why ---- - -- "There's a bug?" - -- "I don't understand what this is supposed to do?" - -- "I have this fancy code. Would you like to have it?" - -- "I'd like to help! What can I do?" - -Reporting bugs --------------- - -- Bug tracker (prefer **this**) - - - https://github.com/numpy/numpy/issues - - - https://github.com/scipy/scipy/issues - - - Click the "Sign up" link to get an account - -- Mailing lists (https://numpy.org/community/) - - - If you're unsure - - - No replies in a week or so? Just file a bug ticket. - - -Good bug report -^^^^^^^^^^^^^^^^ - -:: - - Title: numpy.random.permutations fails for non-integer arguments - - I'm trying to generate random permutations, using numpy.random.permutations - - When calling numpy.random.permutation with non-integer arguments - it fails with a cryptic error message:: - - >>> rng.permutation(12) - array([ 2, 6, 4, 1, 8, 11, 10, 5, 9, 3, 7, 0]) - >>> rng.permutation(12.) #doctest: +SKIP - Traceback (most recent call last): - File "", line 1, in - File "_generator.pyx", line 4844, in numpy.random._generator.Generator.permutation - numpy.exceptions.AxisError: axis 0 is out of bounds for array of dimension 0 - - This also happens with long arguments, and so - np.random.permutation(X.shape[0]) where X is an array fails on 64 - bit windows (where shape is a tuple of longs). - - It would be great if it could cast to integer or at least raise a - proper error for non-integer types. - - I'm using NumPy 1.4.1, built from the official tarball, on Windows - 64 with Visual studio 2008, on Python.org 64-bit Python. - -0. What are you trying to do? - -1. **Small code snippet reproducing the bug** (if possible) - - - What actually happens - - - What you'd expect - -2. Platform (Windows / Linux / OSX, 32/64 bits, x86/PPC, ...) - -3. Version of NumPy/SciPy - - >>> print(np.__version__) - 2... - - **Check that the following is what you expect** - - >>> print(np.__file__) - /... - - In case you have old/broken NumPy installations lying around. - - If unsure, try to remove existing NumPy installations, and reinstall... - -Contributing to documentation ------------------------------ - -1. Documentation editor - - - https://numpy.org/doc/stable/ - - - Registration - - - Register an account - - - Subscribe to ``scipy-dev`` mailing list (subscribers-only) - - - Problem with mailing lists: you get mail - - - But: **you can turn mail delivery off** - - - "change your subscription options", at the bottom of - - https://mail.python.org/mailman3/lists/scipy-dev.python.org/ - - - Send a mail @ ``scipy-dev`` mailing list; ask for activation:: - - To: scipy-dev@scipy.org - - Hi, - - I'd like to edit NumPy/SciPy docstrings. My account is XXXXX - - Cheers, - N. N. - - - Check the style guide: - - - https://numpy.org/doc/stable/ - - - Don't be intimidated; to fix a small thing, just fix it - - - Edit - -2. Edit sources and send patches (as for bugs) - -3. Complain on the mailing list - - -Contributing features ---------------------- - - The contribution of features is documented on https://numpy.org/doc/stable/dev/ - -How to help, in general ------------------------ - -- Bug fixes always welcome! - - - What irks you most - - Browse the tracker - -- Documentation work - - - API docs: improvements to docstrings - - - Know some SciPy module well? - - - *User guide* - - - https://numpy.org/doc/stable/user/ - -- Ask on communication channels: - - - ``numpy-discussion`` list - - ``scipy-dev`` list diff --git a/advanced/advanced_numpy/test.png b/advanced/advanced_numpy/test.png deleted file mode 100644 index d4775a833..000000000 Binary files a/advanced/advanced_numpy/test.png and /dev/null differ diff --git a/advanced/advanced_python/index.md b/advanced/advanced_python/index.md new file mode 100644 index 000000000..0ba3567e5 --- /dev/null +++ b/advanced/advanced_python/index.md @@ -0,0 +1,1257 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Advanced Python Constructs + +**Author** _Zbigniew Jędrzejewski-Szmek_ + +This section covers some features of the Python language which can +be considered advanced — in the sense that not every language has +them, and also in the sense that they are more useful in more +complicated programs or libraries, but not in the sense of being +particularly specialized, or particularly complicated. + +It is important to underline that this chapter is purely about the +language itself — about features supported through special syntax +complemented by functionality of the Python stdlib, which could not be +implemented through clever external modules. + +The process of developing the Python programming language, its syntax, is very +transparent; proposed changes are evaluated from various angles and discussed +via _Python Enhancement Proposals_ — [PEPs]. As a result, features described in +this chapter were added after it was shown that they indeed solve real problems +and that their use is as simple as possible. + +## Iterators, generator expressions and generators + +### Iterators + +:::{sidebar} Simplicity + +> This duplication of effort is wasteful, and replacing the various home-grown +> approaches with a standard feature usually ends up making things more readable, +> and interoperable as well. — _Guido van Rossum_ in [Adding Optional Static Typing to Python](https://www.artima.com/weblogs/viewpost.jsp?thread=86641) + +::: + +An iterator is an object adhering to the [iterator protocol] — basically this +means that it has a `next ` method, which, when called, returns +the next item in the sequence, and when there's nothing to return, raises the +`StopIteration ` exception. + +An iterator object allows to loop just once. It +holds the state (position) of a single iteration, or from the other +side, each loop over a sequence requires a single iterator +object. This means that we can iterate over the same sequence more +than once concurrently. Separating the iteration logic from the +sequence allows us to have more than one way of iteration. + +Calling the `__iter__ ` method on a container to +create an iterator object is the most straightforward way to get hold +of an iterator. The `iter` function does that for us, saving a few +keystrokes. + +```{code-cell} +nums = [1, 2, 3] # note that ... varies: these are different objects +iter(nums) +``` + +```{code-cell} +nums.__iter__() +``` + +```{code-cell} +nums.__reversed__() +``` + +```{code-cell} +it = iter(nums) +next(it) +``` + +```{code-cell} +next(it) +``` + +```{code-cell} +next(it) +``` + +```{code-cell} +:tags: [raises-exception] + +next(it) +``` + +When used in a loop, `StopIteration ` is +swallowed and causes the loop to finish. But with explicit invocation, +we can see that once the iterator is exhausted, accessing it raises an +exception. + +Using the `for..in` loop also uses the `__iter__` +method. This allows us to transparently start the iteration over a +sequence. But if we already have the iterator, we want to be able to +use it in an `for` loop in the same way. In order to achieve this, +iterators in addition to `next` are also required to have a method +called `__iter__` which returns the iterator (`self`). + +Support for iteration is pervasive in Python: +all sequences and unordered containers in the standard library allow +this. The concept is also stretched to other things: +e.g. `file` objects support iteration over lines. + +```python +with open("/etc/fstab") as f: + assert f is f.__iter__() +``` + +The `file` is an iterator itself and its `__iter__` method doesn't create +a separate object: only a single thread of sequential access is allowed. + ++++ + +### Generator expressions + +A second way in which iterator objects are created is through +**generator expressions**, the basis for **list comprehensions**. To +increase clarity, a generator expression must always be enclosed in +parentheses or an expression. If round parentheses are used, then a +generator iterator is created. If rectangular parentheses are used, +the process is short-circuited and we get a `list`. + +```{code-cell} +(i for i in nums) +``` + +```{code-cell} +[i for i in nums] +``` + +```{code-cell} +list(i for i in nums) +``` + +The list comprehension syntax also extends to +**dictionary and set comprehensions**. +A `set` is created when the generator expression is enclosed in curly +braces. A `dict` is created when the generator expression contains +"pairs" of the form `key:value`: + +```{code-cell} +{i for i in range(3)} +``` + +```{code-cell} +{i:i**2 for i in range(3)} +``` + +One _gotcha_ should be mentioned: in old Pythons the index variable +(`i`) would leak, and in versions >= 3 this is fixed. + +### Generators + +:::{sidebar} Generators + +> A generator is a function that produces a sequence of results instead of +> a single value. — _David Beazley_ in the slides for [A Curious Course on +> Coroutines and Concurrency](https://www.dabeaz.com/coroutines) + +::: + +A third way to create iterator objects is to call a generator function. +A **generator** is a function containing the keyword `yield`. It must be +noted that the mere presence of this keyword completely changes the +nature of the function: this `yield` statement doesn't have to be +invoked, or even reachable, but causes the function to be marked as a +generator. When a normal function is called, the instructions +contained in the body start to be executed. When a generator is +called, the execution stops before the first instruction in the body. +An invocation of a generator function creates a generator object, +adhering to the iterator protocol. As with normal function +invocations, concurrent and recursive invocations are allowed. + +When `next` is called, the function is executed until the first `yield`. +Each encountered `yield` statement gives a value becomes the return +value of `next`. After executing the `yield` statement, the +execution of this function is suspended. + +```{code-cell} +def f(): + yield 1 + yield 2 + +f() +``` + +```{code-cell} +gen = f() +next(gen) +``` + +```{code-cell} +next(gen) +``` + +```{code-cell} +:tags: [raises-exception] + +next(gen) +``` + +Let's go over the life of the single invocation of the generator +function. + +```{code-cell} +def f(): + print("-- start --") + yield 3 + print("-- finish --") + yield 4 + +gen = f() +next(gen) +``` + +```{code-cell} +next(gen) +``` + +```{code-cell} +:tags: [raises-exception] + +next(gen) +``` + +Contrary to a normal function, where executing `f()` would +immediately cause the first `print` to be executed, `gen` is +assigned without executing any statements in the function body. Only +when `gen.__next__()` is invoked by `next`, the statements up to +the first `yield` are executed. The second `next` prints +`-- finish --` and execution halts on the second `yield`. The third +`next` falls of the end of the function. +Since no `yield` was reached, an exception is raised. + +What happens with the function after a yield, when the control passes +to the caller? The state of each generator is stored in the generator +object. From the point of view of the generator function, is looks +almost as if it was running in a separate thread, but this is just an +illusion: execution is strictly single-threaded, but the interpreter +keeps and restores the state in between the requests for the next value. + +Why are generators useful? As noted in the parts about iterators, a +generator function is just a different way to create an iterator +object. Everything that can be done with `yield` statements, could +also be done with `next` methods. Nevertheless, using a +function and having the interpreter perform its magic to create an +iterator has advantages. A function can be much shorter +than the definition of a class with the required `next` and +`__iter__` methods. What is more important, it is easier for the author +of the generator to understand the state which is kept in local +variables, as opposed to instance attributes, which have to be +used to pass data between consecutive invocations of `next` on +an iterator object. + +A broader question is why are iterators useful? When an iterator is +used to power a loop, the loop becomes very simple. The code to +initialise the state, to decide if the loop is finished, and to find +the next value is extracted into a separate place. This highlights the +body of the loop — the interesting part. In addition, it is possible +to reuse the iterator code in other places. + +### Bidirectional communication + +Each `yield` statement causes a value to be passed to the +caller. This is the reason for the introduction of generators +by {pep}`255`. But communication in the +reverse direction is also useful. One obvious way would be some +external state, either a global variable or a shared mutable +object. Direct communication is possible thanks to {pep}`342`. +It is achieved by turning the previously boring +`yield` statement into an expression. When the generator resumes +execution after a `yield` statement, the caller can call a method on +the generator object to either pass a value **into** the generator, +which then is returned by the `yield` statement, or a +different method to inject an exception into the generator. + +The first of the new methods is `send(value) `, which +is similar to `next() `, but passes `value` into +the generator to be used for the value of the `yield` expression. In +fact, `g.next()` and `g.send(None)` are equivalent. + +The second of the new methods is +`throw(type, value=None, traceback=None) ` +which is equivalent to: + ++++ + +```python +raise type, value, traceback +``` + ++++ + +at the point of the `yield` statement. + +Unlike `raise` (which immediately raises an exception from the +current execution point), `throw()` first resumes the generator, and +only then raises the exception. The word throw was picked because +it is suggestive of putting the exception in another location, and is +associated with exceptions in other languages. + +What happens when an exception is raised inside the generator? It can +be either raised explicitly or when executing some statements or it +can be injected at the point of a `yield` statement by means of the +`throw()` method. In either case, such an exception propagates in the +standard manner: it can be intercepted by an `except` or `finally` +clause, or otherwise it causes the execution of the generator function +to be aborted and propagates in the caller. + +For completeness' sake, it's worth mentioning that generator iterators +also have a `close() ` method, which can be used to +force a generator that would otherwise be able to provide more values +to finish immediately. It allows the generator `__del__ ` +method to destroy objects holding the state of generator. +Let's define a generator which just prints what is passed in through +send and throw. + +```{code-cell} +import itertools + +def g(): + print('--start--') + for i in itertools.count(): + print('--yielding %i--' % i) + try: + ans = yield i + except GeneratorExit: + print('--closing--') + raise + except Exception as e: + print('--yield raised %r--' % e) + else: + print('--yield returned %s--' % ans) +``` + +```{code-cell} +it = g() +next(it) +``` + +```{code-cell} +it.send(11) +``` + +```{code-cell} +it.throw(IndexError) +``` + +```{code-cell} +it.close() +``` + +### Chaining generators + +:::{note} +This is a preview of {pep}`380` (not yet implemented, but accepted +for Python 3.3). +::: + +Let's say we are writing a generator and we want to yield a number of +values generated by a second generator, a **subgenerator**. +If yielding of values is the only concern, this can be performed +without much difficulty using a loop such as + ++++ + +```python +subgen = some_other_generator() +for v in subgen: + yield v +``` + ++++ + +However, if the subgenerator is to interact properly with the caller +in the case of calls to `send()`, `throw()` and `close()`, +things become considerably more difficult. The `yield` statement has +to be guarded by a `try..except..finally` structure +similar to the one defined in the previous section to "debug" the +generator function. Such code is provided in {pep}`380#id13`, here it +suffices to say that new syntax to properly yield from a subgenerator +is being introduced in Python 3.3: + ++++ + +```python +yield from some_other_generator() +``` + ++++ + +This behaves like the explicit loop above, repeatedly yielding values +from `some_other_generator` until it is exhausted, but also forwards +`send`, `throw` and `close` to the subgenerator. + +## Decorators + +:::{sidebar} Summary + +> This amazing feature appeared in the language almost apologetically and with +> concern that it might not be that useful. — _Bruce Eckel_ in [An Introduction to Python Decorators](https://www.artima.com/weblogs/viewpost.jsp?thread=240808) + +::: + +Since functions and classes are objects, they can be passed +around. Since they are mutable objects, they can be modified. The act +of altering a function or class object after it has been constructed +but before is is bound to its name is called decorating. + +There are two things hiding behind the name "decorator" — one is the +function which does the work of decorating, i.e. performs the real +work, and the other one is the expression adhering to the decorator +syntax, i.e. an at-symbol and the name of the decorating function. + +Function can be decorated by using the decorator syntax for +functions: + ++++ + +```python +@decorator # ② +def function(): # ① + pass +``` + ++++ + +- A function is defined in the standard way. ① +- An expression starting with `@` placed before the function + definition is the decorator ②. The part after `@` must be a simple + expression, usually this is just the name of a function or class. This + part is evaluated first, and after the function defined below is + ready, the decorator is called with the newly defined function object + as the single argument. The value returned by the decorator is + attached to the original name of the function. + +Decorators can be applied to functions and to classes. For +classes the semantics are identical — the original class definition +is used as an argument to call the decorator and whatever is returned +is assigned under the original name. + +Before the decorator syntax was implemented ({pep}`318`), it was +possible to achieve the same effect by assigning the function or class +object to a temporary variable and then invoking the decorator +explicitly and then assigning the return value to the name of the +function. This sounds like more typing, and it is, and also the name of +the decorated function doubling as a temporary variable must be used +at least three times, which is prone to errors. Nevertheless, the +example above is equivalent to: + ++++ + +```python +def function(): # ① + pass +function = decorator(function) # ② +``` + ++++ + +Decorators can be stacked — the order of application is +bottom-to-top, or inside-out. The semantics are such that the originally +defined function is used as an argument for the first decorator, +whatever is returned by the first decorator is used as an argument for +the second decorator, ..., and whatever is returned by the last +decorator is attached under the name of the original function. + +The decorator syntax was chosen for its readability. Since the +decorator is specified before the header of the function, it is +obvious that its is not a part of the function body and its clear that +it can only operate on the whole function. Because the expression is +prefixed with `@` is stands out and is hard to miss ("in your face", +according to the PEP :) ). When more than one decorator is applied, +each one is placed on a separate line in an easy to read way. + +### Replacing or tweaking the original object + +Decorators can either return the same function or class object or they +can return a completely different object. In the first case, the +decorator can exploit the fact that function and class objects are +mutable and add attributes, e.g. add a docstring to a class. A +decorator might do something useful even without modifying the object, +for example register the decorated class in a global registry. In the +second case, virtually anything is possible: when something +different is substituted for the original function or class, the new +object can be completely different. Nevertheless, such behaviour is +not the purpose of decorators: they are intended to tweak the +decorated object, not do something unpredictable. Therefore, when a +function is "decorated" by replacing it with a different function, the +new function usually calls the original function, after doing some +preparatory work. Likewise, when a class is "decorated" by replacing +if with a new class, the new class is usually derived from the +original class. When the purpose of the decorator is to do something +"every time", like to log every call to a decorated function, only the +second type of decorators can be used. On the other hand, if the first +type is sufficient, it is better to use it, because it is simpler. + +### Decorators implemented as classes and as functions + +The only _requirement_ on decorators is that they can be called with a +single argument. This means that decorators can be implemented as +normal functions, or as classes with a `__call__ ` +method, or in theory, even as lambda functions. + +Let's compare the function and class approaches. The decorator +expression (the part after `@`) can be either just a name, or a +call. The bare-name approach is nice (less to type, looks cleaner, +etc.), but is only possible when no arguments are needed to customise +the decorator. Decorators written as functions can be used in those +two cases: + +```{code-cell} +def simple_decorator(function): + print("doing decoration") + return function +@simple_decorator +def function(): + print("inside function") +``` + +```{code-cell} +function() +``` + +```{code-cell} +def decorator_with_arguments(arg): + print("defining the decorator") + def _decorator(function): + # in this inner function, arg is available too + print("doing decoration, %r" % arg) + return function + return _decorator +@decorator_with_arguments("abc") +def function(): + print("inside function") +``` + +```{code-cell} +function() +``` + +The two trivial decorators above fall into the category of decorators +which return the original function. If they were to return a new +function, an extra level of nestedness would be required. +In the worst case, three levels of nested functions. + +```{code-cell} +def replacing_decorator_with_args(arg): + print("defining the decorator") + def _decorator(function): + # in this inner function, arg is available too + print("doing decoration, %r" % arg) + def _wrapper(*args, **kwargs): + print("inside wrapper, %r %r" % (args, kwargs)) + return function(*args, **kwargs) + return _wrapper + return _decorator +@replacing_decorator_with_args("abc") +def function(*args, **kwargs): + print("inside function, %r %r" % (args, kwargs)) + return 14 +``` + +```{code-cell} +function(11, 12) +``` + +The `_wrapper` function is defined to accept all positional and +keyword arguments. In general we cannot know what arguments the +decorated function is supposed to accept, so the wrapper function +just passes everything to the wrapped function. One unfortunate +consequence is that the apparent argument list is misleading. + +Compared to decorators defined as functions, complex decorators +defined as classes are simpler. When an object is created, the +`__init__ ` method is only allowed to return `None`, +and the type of the created object cannot be changed. This means that +when a decorator is defined as a class, it doesn't make much sense to +use the argument-less form: the final decorated object would just be +an instance of the decorating class, returned by the constructor call, +which is not very useful. Therefore it's enough to discuss class-based +decorators where arguments are given in the decorator expression and +the decorator `__init__` method is used for decorator construction. + +```{code-cell} +class decorator_class(object): + def __init__(self, arg): + # this method is called in the decorator expression + print("in decorator init, %s" % arg) + self.arg = arg + def __call__(self, function): + # this method is called to do the job + print("in decorator call, %s" % self.arg) + return function +deco_instance = decorator_class('foo') +``` + +```{code-cell} +@deco_instance +def function(*args, **kwargs): + print("in function, %s %s" % (args, kwargs)) +``` + +```{code-cell} +function() +``` + +Contrary to normal rules ({PEP}`8`) decorators written as classes +behave more like functions and therefore their name often starts with a +lowercase letter. + +In reality, it doesn't make much sense to create a new class just to +have a decorator which returns the original function. Objects are +supposed to hold state, and such decorators are more useful when the +decorator returns a new object. + +```{code-cell} +class replacing_decorator_class(object): + def __init__(self, arg): + # this method is called in the decorator expression + print("in decorator init, %s" % arg) + self.arg = arg + def __call__(self, function): + # this method is called to do the job + print("in decorator call, %s" % self.arg) + self.function = function + return self._wrapper + def _wrapper(self, *args, **kwargs): + print("in the wrapper, %s %s" % (args, kwargs)) + return self.function(*args, **kwargs) +deco_instance = replacing_decorator_class('foo') +``` + +```{code-cell} +@deco_instance +def function(*args, **kwargs): + print("in function, %s %s" % (args, kwargs)) +``` + +```{code-cell} +function(11, 12) +``` + +A decorator like this can do pretty much anything, since it can modify +the original function object and mangle the arguments, call the +original function or not, and afterwards mangle the return value. + +### Copying the docstring and other attributes of the original function + +When a new function is returned by the decorator to replace the +original function, an unfortunate consequence is that the original +function name, the original docstring, the original argument list are +lost. Those attributes of the original function can partially be "transplanted" +to the new function by setting `__doc__` (the docstring), `__module__` +and `__name__` (the full name of the function), and +`__annotations__` (extra information about arguments and the return +value of the function available in Python 3). This can be done +automatically by using `functools.update_wrapper`. + +:::{admonition} `functools.update_wrapper(wrapper, wrapped) ` +"Update a wrapper function to look like the wrapped function." + +```{code-cell} +import functools +def replacing_decorator_with_args(arg): + print("defining the decorator") + def _decorator(function): + print("doing decoration, %r" % arg) + def _wrapper(*args, **kwargs): + print("inside wrapper, %r %r" % (args, kwargs)) + return function(*args, **kwargs) + return functools.update_wrapper(_wrapper, function) + return _decorator +@replacing_decorator_with_args("abc") +def function(): + "extensive documentation" + print("inside function") + return 14 +``` + +```{code-cell} +function +``` + +```{code-cell} +print(function.__doc__) +``` + +::: + +One important thing is missing from the list of attributes which can +be copied to the replacement function: the argument list. The default +values for arguments can be modified through the `__defaults__`, +`__kwdefaults__` attributes, but unfortunately the argument list +itself cannot be set as an attribute. This means that +`help(function)` will display a useless argument list which will be +confusing for the user of the function. An effective but ugly way +around this problem is to create the wrapper dynamically, using +`eval`. This can be automated by using the external `decorator` +module. It provides support for the `decorator` decorator, which takes a +wrapper and turns it into a decorator which preserves the function +signature. + +To sum things up, decorators should always use `functools.update_wrapper` +or some other means of copying function attributes. + +### Examples in the standard library + +First, it should be mentioned that there's a number of useful +decorators available in the standard library. There are three decorators +which really form a part of the language: + +- `classmethod` causes a method to become a "class method", + which means that it can be invoked without creating an instance of + the class. When a normal method is invoked, the interpreter inserts + the instance object as the first positional parameter, + `self`. When a class method is invoked, the class itself is given + as the first parameter, often called `cls`. + + Class methods are still accessible through the class' namespace, so + they don't pollute the module's namespace. Class methods can be used + to provide alternative constructors: + +```{code-cell} + class Array(object): + def __init__(self, data): + self.data = data + + @classmethod + def fromfile(cls, file): + data = numpy.load(file) + return cls(data) +``` + +This is cleaner than using a multitude of flags to `__init__`. + +- `staticmethod` is applied to methods to make them "static", + i.e. basically a normal function, but accessible through the class + namespace. This can be useful when the function is only needed + inside this class (its name would then be prefixed with `_`), or when we + want the user to think of the method as connected to the class, + despite an implementation which doesn't require this. + +- `property` is the pythonic answer to the problem of getters + and setters. A method decorated with `property` becomes a getter + which is automatically called on attribute access. + +```{code-cell} +class A(object): + @property + def a(self): + "an important attribute" + return "a value" +A.a +``` + +```{code-cell} +A().a +``` + +In this example, `A.a` is an read-only attribute. It is also +documented: `help(A)` includes the docstring for attribute `a` +taken from the getter method. Defining `a` as a property allows it +to be a calculated on the fly, and has the side effect of making it +read-only, because no setter is defined. + +To have a setter and a getter, two methods are required, +obviously: + +```{code-cell} + class Rectangle(object): + def __init__(self, edge): + self.edge = edge + + @property + def area(self): + """Computed area. + + Setting this updates the edge length to the proper value. + """ + return self.edge**2 + + @area.setter + def area(self, area): + self.edge = area ** 0.5 +``` + +The way that this works, is that the `property` decorator replaces +the getter method with a property object. This object in turn has +three methods, `getter`, `setter`, and `deleter`, which can be +used as decorators. Their job is to set the getter, setter and +deleter of the property object (stored as attributes `fget`, +`fset`, and `fdel`). The getter can be set like in the example +above, when creating the object. When defining the setter, we +already have the property object under `area`, and we add the +setter to it by using the `setter` method. All this happens when +we are creating the class. + +Afterwards, when an instance of the class has been created, the +property object is special. When the interpreter executes attribute +access, assignment, or deletion, the job is delegated to the methods +of the property object. + +To make everything crystal clear, let's define a "debug" example: + +```{code-cell} +class D(object): + @property + def a(self): + print("getting 1") + return 1 + @a.setter + def a(self, value): + print("setting %r" % value) + @a.deleter + def a(self): + print("deleting") +D.a +``` + +```{code-cell} +D.a.fget +``` + +```{code-cell} +D.a.fset +``` + +```{code-cell} +D.a.fdel +``` + +```{code-cell} +d = D() # ... varies, this is not the same `a` function +d.a +``` + +```{code-cell} +d.a = 2 +``` + +```{code-cell} +del d.a +``` + +```{code-cell} +d.a +``` + +Properties are a bit of a stretch for the decorator syntax. One of the +premises of the decorator syntax — that the name is not duplicated +— is violated, but nothing better has been invented so far. It is +just good style to use the same name for the getter, setter, and +deleter methods. + +% property documentation mentions that this only works for +% old-style classes, but this seems to be an error. + +Some newer examples include: + +- `functools.lru_cache` memoizes an arbitrary function + maintaining a limited cache of arguments:answer pairs (Python 3.2) +- `functools.total_ordering` is a class decorator which fills in + missing ordering methods + (`__lt__ `, `__gt__ `, + `__le__ `, ...) + based on a single available one. + + + +### Deprecation of functions + +Let's say we want to print a deprecation warning on stderr on the +first invocation of a function we don't like anymore. If we don't want +to modify the function, we can use a decorator: + +```{code-cell} +class deprecated(object): + """Print a deprecation warning once on first use of the function. + + >>> @deprecated() + ... def f(): + ... pass + >>> f() + f is deprecated + """ + def __call__(self, func): + self.func = func + self.count = 0 + return self._wrapper + def _wrapper(self, *args, **kwargs): + self.count += 1 + if self.count == 1: + print(self.func.__name__, 'is deprecated') + return self.func(*args, **kwargs) +``` + + + +It can also be implemented as a function: + +```{code-cell} +def deprecated(func): + """Print a deprecation warning once on first use of the function. + + >>> @deprecated + ... def f(): + ... pass + >>> f() + f is deprecated + """ + count = [0] + def wrapper(*args, **kwargs): + count[0] += 1 + if count[0] == 1: + print(func.__name__, 'is deprecated') + return func(*args, **kwargs) + return wrapper +``` + +### A `while`-loop removing decorator + +Let's say we have function which returns a lists of things, and this +list created by running a loop. If we don't know how many objects will +be needed, the standard way to do this is something like: + +```{code-cell} +def find_answers(): + answers = [] + while True: + ans = look_for_next_answer() + if ans is None: + break + answers.append(ans) + return answers +``` + +This is fine, as long as the body of the loop is fairly compact. Once +it becomes more complicated, as often happens in real code, this +becomes pretty unreadable. We could simplify this by using `yield` +statements, but then the user would have to explicitly call +`list(find_answers())`. + +We can define a decorator which constructs the list for us: + +```{code-cell} +def vectorized(generator_func): + def wrapper(*args, **kwargs): + return list(generator_func(*args, **kwargs)) + return functools.update_wrapper(wrapper, generator_func) +``` + +Our function then becomes: + +```{code-cell} +@vectorized +def find_answers(): + while True: + ans = look_for_next_answer() + if ans is None: + break + yield ans +``` + +### A plugin registration system + +This is a class decorator which doesn't modify the class, but just +puts it in a global registry. It falls into the category of decorators +returning the original object: + +```{code-cell} +class WordProcessor(object): + PLUGINS = [] + def process(self, text): + for plugin in self.PLUGINS: + text = plugin().cleanup(text) + return text + + @classmethod + def plugin(cls, plugin): + cls.PLUGINS.append(plugin) + +@WordProcessor.plugin +class CleanMdashesExtension(object): + def cleanup(self, text): + return text.replace('—', u'\N{em dash}') +``` + +Here we use a decorator to decentralise the registration of +plugins. We call our decorator with a noun, instead of a verb, because +we use it to declare that our class is a plugin for +`WordProcessor`. Method `plugin` simply appends the class to the +list of plugins. + +A word about the plugin itself: it replaces HTML entity for em-dash +with a real Unicode em-dash character. It exploits the [unicode +literal notation][unicode literal notation] to insert a character by using its name in the +unicode database ("EM DASH"). If the Unicode character was inserted +directly, it would be impossible to distinguish it from an en-dash in +the source of a program. + +:::{admonition} See also + +**More examples and reading** + +- {pep}`318` (function and method decorator syntax) + +- {pep}`3129` (class decorator syntax) + +- + +- + +- + +- Bruce Eckel + + - [Decorators I]: Introduction to Python Decorators + - [Python Decorators II]: Decorator Arguments + - [Python Decorators III]: A Decorator-Based Build System + ::: + +## Context managers + +A context manager is an object with `__enter__ ` and +`__exit__ ` methods which can be used in the `with` statement: + ++++ + +```python +with manager as var: + do_something(var) +``` + ++++ + +is in the simplest case + +equivalent to + ++++ + +```python +var = manager.__enter__() +try: + do_something(var) +finally: + manager.__exit__() +``` + ++++ + +In other words, the context manager protocol defined in {pep}`343` +permits the extraction of the boring part of a `try..except..finally` structure +into a separate class leaving only the interesting `do_something` block. + +1. The `__enter__ ` method is called first. It can + return a value which will be assigned to `var`. + The `as`-part is optional: if it isn't present, the value + returned by `__enter__` is simply ignored. +2. The block of code underneath `with` is executed. Just like with + `try` clauses, it can either execute successfully to the end, or + it can `break`, `continue` or `return`, or + it can throw an exception. Either way, after the block is finished, + the `__exit__ ` method is called. + If an exception was thrown, the information about the exception is + passed to `__exit__`, which is described below in the next + subsection. In the normal case, exceptions can be ignored, just + like in a `finally` clause, and will be rethrown after + `__exit__` is finished. + +Let's say we want to make sure that a file is closed immediately after +we are done writing to it: + +```{code-cell} +class closing(object): + def __init__(self, obj): + self.obj = obj + def __enter__(self): + return self.obj + def __exit__(self, *args): + self.obj.close() +with closing(open('/tmp/file', 'w')) as f: + f.write('the contents\n') +``` + +Here we have made sure that the `f.close()` is called when the +`with` block is exited. Since closing files is such a common +operation, the support for this is already present in the `file` +class. It has an `__exit__` method which calls `close` and can be +used as a context manager itself: + +```{code-cell} +with open('/tmp/file', 'a') as f: + f.write('more contents\n') +``` + +The common use for `try..finally` is releasing resources. Various +different cases are implemented similarly: in the `__enter__` +phase the resource is acquired, in the `__exit__` phase it is +released, and the exception, if thrown, is propagated. As with files, +there's often a natural operation to perform after the object has been +used and it is most convenient to have the support built in. With each +release, Python provides support in more places: + +- all file-like objects: + + - `file` ➔ automatically closed + - `fileinput`, `tempfile` + - `bz2.BZ2File`, `gzip.GzipFile`, + `tarfile.TarFile`, `zipfile.ZipFile` + - `ftplib`, `nntplib` ➔ close connection + +- locks + + - `multiprocessing.RLock` ➔ lock and unlock + - `multiprocessing.Semaphore` + - `memoryview` ➔ automatically release + +- `decimal.localcontext` ➔ modify precision of computations temporarily + +- `_winreg.PyHKEY <_winreg.OpenKey>` ➔ open and close hive key + +- `warnings.catch_warnings` ➔ kill warnings temporarily + +- `contextlib.closing` ➔ the same as the example above, call `close` + +- parallel programming + + - `concurrent.futures.ThreadPoolExecutor` ➔ invoke in parallel then kill thread pool + - `concurrent.futures.ProcessPoolExecutor` ➔ invoke in parallel then kill process pool + - `nogil` ➔ solve the GIL problem temporarily (cython only :( ) + +### Catching exceptions + +When an exception is thrown in the `with`-block, it is passed as +arguments to `__exit__`. Three arguments are used, the same as +returned by {py:func}`sys.exc_info`: type, value, traceback. When no +exception is thrown, `None` is used for all three arguments. The +context manager can "swallow" the exception by returning a true value +from `__exit__`. Exceptions can be easily ignored, because if +`__exit__` doesn't use `return` and just falls of the end, +`None` is returned, a false value, and therefore the exception is +rethrown after `__exit__` is finished. + +The ability to catch exceptions opens interesting possibilities. A +classic example comes from unit-tests — we want to make sure that +some code throws the right kind of exception: + +```{code-cell} +class assert_raises(object): + # based on pytest and unittest.TestCase + def __init__(self, type): + self.type = type + def __enter__(self): + pass + def __exit__(self, type, value, traceback): + if type is None: + raise AssertionError('exception expected') + if issubclass(type, self.type): + return True # swallow the expected exception + raise AssertionError('wrong exception type') + +with assert_raises(KeyError): + {}['foo'] +``` + +### Using generators to define context managers + +When discussing [generators], it was said that we prefer generators to +iterators implemented as classes because they are shorter, sweeter, +and the state is stored as local, not instance, variables. On the +other hand, as described in [Bidirectional communication], the flow +of data between the generator and its caller can be bidirectional. +This includes exceptions, which can be thrown into the +generator. We would like to implement context managers as special +generator functions. In fact, the generator protocol was designed to +support this use case. + ++++ + +```python +@contextlib.contextmanager +def some_generator(): + + try: + yield + finally: + +``` + ++++ + +The `contextlib.contextmanager` helper takes a generator and turns it +into a context manager. The generator has to obey some rules which are +enforced by the wrapper function — most importantly it must +`yield` exactly once. The part before the `yield` is executed from +`__enter__`, the block of code protected by the context manager is +executed when the generator is suspended in `yield`, and the rest is +executed in `__exit__`. If an exception is thrown, the interpreter +hands it to the wrapper through `__exit__` arguments, and the +wrapper function then throws it at the point of the `yield` +statement. Through the use of generators, the context manager is +shorter and simpler. + +Let's rewrite the `closing` example as a generator: + +```{code-cell} +import contextlib + +@contextlib.contextmanager +def closing(obj): + try: + yield obj + finally: + obj.close() +``` + +Let's rewrite the `assert_raises` example as a generator: + +```{code-cell} +@contextlib.contextmanager +def assert_raises(type): + try: + yield + except type: + return + except Exception as value: + raise AssertionError('wrong exception type') + else: + raise AssertionError('exception expected') +``` + +Here we use a decorator to turn generator functions into context managers! + +[a curious course on coroutines and concurrency]: https://www.dabeaz.com/coroutines/ +[adding optional static typing to python]: https://www.artima.com/weblogs/viewpost.jsp?thread=86641 +[decorators i]: https://www.artima.com/weblogs/viewpost.jsp?thread=240808 +[iterator protocol]: https://docs.python.org/dev/library/stdtypes.html#iterator-types +[peps]: https://peps.python.org/ +[python decorators ii]: https://www.artima.com/weblogs/viewpost.jsp?thread=240845 +[python decorators iii]: https://www.artima.com/weblogs/viewpost.jsp?thread=241209 +[unicode literal notation]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals diff --git a/advanced/advanced_python/index.rst b/advanced/advanced_python/index.rst deleted file mode 100644 index 7bca59539..000000000 --- a/advanced/advanced_python/index.rst +++ /dev/null @@ -1,1133 +0,0 @@ -.. |==>| unicode:: U+02794 .. thick rightwards arrow - -.. default-role:: py:obj - -========================== -Advanced Python Constructs -========================== - -**Author** *Zbigniew Jędrzejewski-Szmek* - -This section covers some features of the Python language which can -be considered advanced --- in the sense that not every language has -them, and also in the sense that they are more useful in more -complicated programs or libraries, but not in the sense of being -particularly specialized, or particularly complicated. - -It is important to underline that this chapter is purely about the -language itself --- about features supported through special syntax -complemented by functionality of the Python stdlib, which could not be -implemented through clever external modules. - -The process of developing the Python programming language, its syntax, -is very transparent; proposed changes are -evaluated from various angles and discussed via *Python Enhancement -Proposals* --- PEPs_. As a result, features described in this chapter -were added after it was shown that they indeed solve real problems and -that their use is as simple as possible. - -.. _PEPs: https://peps.python.org/ - -.. contents:: Chapter contents - :local: - :depth: 4 - - - -Iterators, generator expressions and generators -=============================================== - -Iterators -^^^^^^^^^ - -.. sidebar:: Simplicity - - Duplication of effort is wasteful, and replacing the various - home-grown approaches with a standard feature usually ends up - making things more readable, and interoperable as well. - - *Guido van Rossum* --- `Adding Optional Static Typing to Python`_ - -.. _`Adding Optional Static Typing to Python`: - https://www.artima.com/weblogs/viewpost.jsp?thread=86641 - - -An iterator is an object adhering to the `iterator protocol`_ ---- basically this means that it has a `next ` method, -which, when called, returns the next item in the sequence, and when -there's nothing to return, raises the -`StopIteration ` exception. - -.. _`iterator protocol`: https://docs.python.org/dev/library/stdtypes.html#iterator-types - -An iterator object allows to loop just once. It -holds the state (position) of a single iteration, or from the other -side, each loop over a sequence requires a single iterator -object. This means that we can iterate over the same sequence more -than once concurrently. Separating the iteration logic from the -sequence allows us to have more than one way of iteration. - -Calling the `__iter__ ` method on a container to -create an iterator object is the most straightforward way to get hold -of an iterator. The `iter` function does that for us, saving a few -keystrokes. :: - - >>> nums = [1, 2, 3] # note that ... varies: these are different objects - >>> iter(nums) - <...iterator object at ...> - >>> nums.__iter__() - <...iterator object at ...> - >>> nums.__reversed__() - <...reverseiterator object at ...> - - >>> it = iter(nums) - >>> next(it) - 1 - >>> next(it) - 2 - >>> next(it) - 3 - >>> next(it) - Traceback (most recent call last): - File "", line 1, in - StopIteration - -When used in a loop, `StopIteration ` is -swallowed and causes the loop to finish. But with explicit invocation, -we can see that once the iterator is exhausted, accessing it raises an -exception. - -Using the :compound:`for..in ` loop also uses the ``__iter__`` -method. This allows us to transparently start the iteration over a -sequence. But if we already have the iterator, we want to be able to -use it in an ``for`` loop in the same way. In order to achieve this, -iterators in addition to ``next`` are also required to have a method -called ``__iter__`` which returns the iterator (``self``). - -Support for iteration is pervasive in Python: -all sequences and unordered containers in the standard library allow -this. The concept is also stretched to other things: -e.g. ``file`` objects support iteration over lines. - - >>> with open("/etc/fstab") as f: # doctest: +SKIP - ... f is f.__iter__() - ... - True - -The ``file`` is an iterator itself and it's ``__iter__`` method -doesn't create a separate object: only a single thread of sequential -access is allowed. - -Generator expressions -^^^^^^^^^^^^^^^^^^^^^ - -A second way in which iterator objects are created is through -**generator expressions**, the basis for **list comprehensions**. To -increase clarity, a generator expression must always be enclosed in -parentheses or an expression. If round parentheses are used, then a -generator iterator is created. If rectangular parentheses are used, -the process is short-circuited and we get a ``list``. :: - - >>> (i for i in nums) - at 0x...> - >>> [i for i in nums] - [1, 2, 3] - >>> list(i for i in nums) - [1, 2, 3] - -The list comprehension syntax also extends to -**dictionary and set comprehensions**. -A ``set`` is created when the generator expression is enclosed in curly -braces. A ``dict`` is created when the generator expression contains -"pairs" of the form ``key:value``:: - - >>> {i for i in range(3)} - {0, 1, 2} - >>> {i:i**2 for i in range(3)} - {0: 0, 1: 1, 2: 4} - -One *gotcha* should be mentioned: in old Pythons the index variable -(``i``) would leak, and in versions >= 3 this is fixed. - -Generators -^^^^^^^^^^ - -.. sidebar:: Generators - - A generator is a function that produces a - sequence of results instead of a single value. - - *David Beazley* --- `A Curious Course on Coroutines and Concurrency`_ - -.. _`A Curious Course on Coroutines and Concurrency`: - https://www.dabeaz.com/coroutines/ - -A third way to create iterator objects is to call a generator function. -A **generator** is a function containing the keyword :simple:`yield`. It must be -noted that the mere presence of this keyword completely changes the -nature of the function: this ``yield`` statement doesn't have to be -invoked, or even reachable, but causes the function to be marked as a -generator. When a normal function is called, the instructions -contained in the body start to be executed. When a generator is -called, the execution stops before the first instruction in the body. -An invocation of a generator function creates a generator object, -adhering to the iterator protocol. As with normal function -invocations, concurrent and recursive invocations are allowed. - -When ``next`` is called, the function is executed until the first ``yield``. -Each encountered ``yield`` statement gives a value becomes the return -value of ``next``. After executing the ``yield`` statement, the -execution of this function is suspended. :: - - >>> def f(): - ... yield 1 - ... yield 2 - >>> f() - - >>> gen = f() - >>> next(gen) - 1 - >>> next(gen) - 2 - >>> next(gen) - Traceback (most recent call last): - File "", line 1, in - StopIteration - -Let's go over the life of the single invocation of the generator -function. :: - - >>> def f(): - ... print("-- start --") - ... yield 3 - ... print("-- finish --") - ... yield 4 - >>> gen = f() - >>> next(gen) - -- start -- - 3 - >>> next(gen) - -- finish -- - 4 - >>> next(gen) - Traceback (most recent call last): - ... - StopIteration - -Contrary to a normal function, where executing ``f()`` would -immediately cause the first ``print`` to be executed, ``gen`` is -assigned without executing any statements in the function body. Only -when ``gen.__next__()`` is invoked by ``next``, the statements up to -the first ``yield`` are executed. The second ``next`` prints -``-- finish --`` and execution halts on the second ``yield``. The third -``next`` falls of the end of the function. -Since no ``yield`` was reached, an exception is raised. - -What happens with the function after a yield, when the control passes -to the caller? The state of each generator is stored in the generator -object. From the point of view of the generator function, is looks -almost as if it was running in a separate thread, but this is just an -illusion: execution is strictly single-threaded, but the interpreter -keeps and restores the state in between the requests for the next value. - -Why are generators useful? As noted in the parts about iterators, a -generator function is just a different way to create an iterator -object. Everything that can be done with ``yield`` statements, could -also be done with ``next`` methods. Nevertheless, using a -function and having the interpreter perform its magic to create an -iterator has advantages. A function can be much shorter -than the definition of a class with the required ``next`` and -``__iter__`` methods. What is more important, it is easier for the author -of the generator to understand the state which is kept in local -variables, as opposed to instance attributes, which have to be -used to pass data between consecutive invocations of ``next`` on -an iterator object. - -A broader question is why are iterators useful? When an iterator is -used to power a loop, the loop becomes very simple. The code to -initialise the state, to decide if the loop is finished, and to find -the next value is extracted into a separate place. This highlights the -body of the loop --- the interesting part. In addition, it is possible -to reuse the iterator code in other places. - -Bidirectional communication -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Each ``yield`` statement causes a value to be passed to the -caller. This is the reason for the introduction of generators -by :pep:`255`. But communication in the -reverse direction is also useful. One obvious way would be some -external state, either a global variable or a shared mutable -object. Direct communication is possible thanks to :pep:`342`. -It is achieved by turning the previously boring -``yield`` statement into an expression. When the generator resumes -execution after a ``yield`` statement, the caller can call a method on -the generator object to either pass a value **into** the generator, -which then is returned by the ``yield`` statement, or a -different method to inject an exception into the generator. - -The first of the new methods is `send(value) `, which -is similar to `next() `, but passes ``value`` into -the generator to be used for the value of the ``yield`` expression. In -fact, ``g.next()`` and ``g.send(None)`` are equivalent. - -The second of the new methods is -`throw(type, value=None, traceback=None) ` -which is equivalent to:: - - raise type, value, traceback - -at the point of the ``yield`` statement. - -Unlike :simple:`raise` (which immediately raises an exception from the -current execution point), ``throw()`` first resumes the generator, and -only then raises the exception. The word throw was picked because -it is suggestive of putting the exception in another location, and is -associated with exceptions in other languages. - -What happens when an exception is raised inside the generator? It can -be either raised explicitly or when executing some statements or it -can be injected at the point of a ``yield`` statement by means of the -``throw()`` method. In either case, such an exception propagates in the -standard manner: it can be intercepted by an ``except`` or ``finally`` -clause, or otherwise it causes the execution of the generator function -to be aborted and propagates in the caller. - -For completeness' sake, it's worth mentioning that generator iterators -also have a `close() ` method, which can be used to -force a generator that would otherwise be able to provide more values -to finish immediately. It allows the generator `__del__ ` -method to destroy objects holding the state of generator. -Let's define a generator which just prints what is passed in through -send and throw. :: - - >>> import itertools - >>> def g(): - ... print('--start--') - ... for i in itertools.count(): - ... print('--yielding %i--' % i) - ... try: - ... ans = yield i - ... except GeneratorExit: - ... print('--closing--') - ... raise - ... except Exception as e: - ... print('--yield raised %r--' % e) - ... else: - ... print('--yield returned %s--' % ans) - - >>> it = g() - >>> next(it) - --start-- - --yielding 0-- - 0 - >>> it.send(11) - --yield returned 11-- - --yielding 1-- - 1 - >>> it.throw(IndexError) - --yield raised IndexError()-- - --yielding 2-- - 2 - >>> it.close() - --closing-- - -Chaining generators -^^^^^^^^^^^^^^^^^^^ - -.. note:: - - This is a preview of :pep:`380` (not yet implemented, but accepted - for Python 3.3). - -Let's say we are writing a generator and we want to yield a number of -values generated by a second generator, a **subgenerator**. -If yielding of values is the only concern, this can be performed -without much difficulty using a loop such as - -.. code-block:: pycon - - subgen = some_other_generator() - for v in subgen: - yield v - -However, if the subgenerator is to interact properly with the caller -in the case of calls to ``send()``, ``throw()`` and ``close()``, -things become considerably more difficult. The ``yield`` statement has -to be guarded by a :compound:`try..except..finally ` structure -similar to the one defined in the previous section to "debug" the -generator function. Such code is provided in :pep:`380#id13`, here it -suffices to say that new syntax to properly yield from a subgenerator -is being introduced in Python 3.3: - -.. code-block:: pycon - - yield from some_other_generator() - -This behaves like the explicit loop above, repeatedly yielding values -from ``some_other_generator`` until it is exhausted, but also forwards -``send``, ``throw`` and ``close`` to the subgenerator. - -Decorators -========== - -.. sidebar:: Summary - - This amazing feature appeared in the language almost apologetically - and with concern that it might not be that useful. - - *Bruce Eckel* --- An Introduction to Python Decorators - -Since functions and classes are objects, they can be passed -around. Since they are mutable objects, they can be modified. The act -of altering a function or class object after it has been constructed -but before is is bound to its name is called decorating. - -There are two things hiding behind the name "decorator" --- one is the -function which does the work of decorating, i.e. performs the real -work, and the other one is the expression adhering to the decorator -syntax, i.e. an at-symbol and the name of the decorating function. - -Function can be decorated by using the decorator syntax for -functions:: - - @decorator # ② - def function(): # ① - pass - -- A function is defined in the standard way. ① -- An expression starting with ``@`` placed before the function - definition is the decorator ②. The part after ``@`` must be a simple - expression, usually this is just the name of a function or class. This - part is evaluated first, and after the function defined below is - ready, the decorator is called with the newly defined function object - as the single argument. The value returned by the decorator is - attached to the original name of the function. - -Decorators can be applied to functions and to classes. For -classes the semantics are identical --- the original class definition -is used as an argument to call the decorator and whatever is returned -is assigned under the original name. - -Before the decorator syntax was implemented (:pep:`318`), it was -possible to achieve the same effect by assigning the function or class -object to a temporary variable and then invoking the decorator -explicitly and then assigning the return value to the name of the -function. This sounds like more typing, and it is, and also the name of -the decorated function doubling as a temporary variable must be used -at least three times, which is prone to errors. Nevertheless, the -example above is equivalent to:: - - def function(): # ① - pass - function = decorator(function) # ② - -Decorators can be stacked --- the order of application is -bottom-to-top, or inside-out. The semantics are such that the originally -defined function is used as an argument for the first decorator, -whatever is returned by the first decorator is used as an argument for -the second decorator, ..., and whatever is returned by the last -decorator is attached under the name of the original function. - -The decorator syntax was chosen for its readability. Since the -decorator is specified before the header of the function, it is -obvious that its is not a part of the function body and its clear that -it can only operate on the whole function. Because the expression is -prefixed with ``@`` is stands out and is hard to miss ("in your face", -according to the PEP :) ). When more than one decorator is applied, -each one is placed on a separate line in an easy to read way. - - -Replacing or tweaking the original object -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Decorators can either return the same function or class object or they -can return a completely different object. In the first case, the -decorator can exploit the fact that function and class objects are -mutable and add attributes, e.g. add a docstring to a class. A -decorator might do something useful even without modifying the object, -for example register the decorated class in a global registry. In the -second case, virtually anything is possible: when something -different is substituted for the original function or class, the new -object can be completely different. Nevertheless, such behaviour is -not the purpose of decorators: they are intended to tweak the -decorated object, not do something unpredictable. Therefore, when a -function is "decorated" by replacing it with a different function, the -new function usually calls the original function, after doing some -preparatory work. Likewise, when a class is "decorated" by replacing -if with a new class, the new class is usually derived from the -original class. When the purpose of the decorator is to do something -"every time", like to log every call to a decorated function, only the -second type of decorators can be used. On the other hand, if the first -type is sufficient, it is better to use it, because it is simpler. - -Decorators implemented as classes and as functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The only *requirement* on decorators is that they can be called with a -single argument. This means that decorators can be implemented as -normal functions, or as classes with a `__call__ ` -method, or in theory, even as lambda functions. - -Let's compare the function and class approaches. The decorator -expression (the part after ``@``) can be either just a name, or a -call. The bare-name approach is nice (less to type, looks cleaner, -etc.), but is only possible when no arguments are needed to customise -the decorator. Decorators written as functions can be used in those -two cases:: - - >>> def simple_decorator(function): - ... print("doing decoration") - ... return function - >>> @simple_decorator - ... def function(): - ... print("inside function") - doing decoration - >>> function() - inside function - - >>> def decorator_with_arguments(arg): - ... print("defining the decorator") - ... def _decorator(function): - ... # in this inner function, arg is available too - ... print("doing decoration, %r" % arg) - ... return function - ... return _decorator - >>> @decorator_with_arguments("abc") - ... def function(): - ... print("inside function") - defining the decorator - doing decoration, 'abc' - >>> function() - inside function - -The two trivial decorators above fall into the category of decorators -which return the original function. If they were to return a new -function, an extra level of nestedness would be required. -In the worst case, three levels of nested functions. :: - - >>> def replacing_decorator_with_args(arg): - ... print("defining the decorator") - ... def _decorator(function): - ... # in this inner function, arg is available too - ... print("doing decoration, %r" % arg) - ... def _wrapper(*args, **kwargs): - ... print("inside wrapper, %r %r" % (args, kwargs)) - ... return function(*args, **kwargs) - ... return _wrapper - ... return _decorator - >>> @replacing_decorator_with_args("abc") - ... def function(*args, **kwargs): - ... print("inside function, %r %r" % (args, kwargs)) - ... return 14 - defining the decorator - doing decoration, 'abc' - >>> function(11, 12) - inside wrapper, (11, 12) {} - inside function, (11, 12) {} - 14 - -The ``_wrapper`` function is defined to accept all positional and -keyword arguments. In general we cannot know what arguments the -decorated function is supposed to accept, so the wrapper function -just passes everything to the wrapped function. One unfortunate -consequence is that the apparent argument list is misleading. - -Compared to decorators defined as functions, complex decorators -defined as classes are simpler. When an object is created, the -`__init__ ` method is only allowed to return `None`, -and the type of the created object cannot be changed. This means that -when a decorator is defined as a class, it doesn't make much sense to -use the argument-less form: the final decorated object would just be -an instance of the decorating class, returned by the constructor call, -which is not very useful. Therefore it's enough to discuss class-based -decorators where arguments are given in the decorator expression and -the decorator ``__init__`` method is used for decorator construction. :: - - >>> class decorator_class(object): - ... def __init__(self, arg): - ... # this method is called in the decorator expression - ... print("in decorator init, %s" % arg) - ... self.arg = arg - ... def __call__(self, function): - ... # this method is called to do the job - ... print("in decorator call, %s" % self.arg) - ... return function - >>> deco_instance = decorator_class('foo') - in decorator init, foo - >>> @deco_instance - ... def function(*args, **kwargs): - ... print("in function, %s %s" % (args, kwargs)) - in decorator call, foo - >>> function() - in function, () {} - -Contrary to normal rules (:PEP:`8`) decorators written as classes -behave more like functions and therefore their name often starts with a -lowercase letter. - -In reality, it doesn't make much sense to create a new class just to -have a decorator which returns the original function. Objects are -supposed to hold state, and such decorators are more useful when the -decorator returns a new object. :: - - >>> class replacing_decorator_class(object): - ... def __init__(self, arg): - ... # this method is called in the decorator expression - ... print("in decorator init, %s" % arg) - ... self.arg = arg - ... def __call__(self, function): - ... # this method is called to do the job - ... print("in decorator call, %s" % self.arg) - ... self.function = function - ... return self._wrapper - ... def _wrapper(self, *args, **kwargs): - ... print("in the wrapper, %s %s" % (args, kwargs)) - ... return self.function(*args, **kwargs) - >>> deco_instance = replacing_decorator_class('foo') - in decorator init, foo - >>> @deco_instance - ... def function(*args, **kwargs): - ... print("in function, %s %s" % (args, kwargs)) - in decorator call, foo - >>> function(11, 12) - in the wrapper, (11, 12) {} - in function, (11, 12) {} - -A decorator like this can do pretty much anything, since it can modify -the original function object and mangle the arguments, call the -original function or not, and afterwards mangle the return value. - -Copying the docstring and other attributes of the original function -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When a new function is returned by the decorator to replace the -original function, an unfortunate consequence is that the original -function name, the original docstring, the original argument list are -lost. Those attributes of the original function can partially be "transplanted" -to the new function by setting ``__doc__`` (the docstring), ``__module__`` -and ``__name__`` (the full name of the function), and -``__annotations__`` (extra information about arguments and the return -value of the function available in Python 3). This can be done -automatically by using `functools.update_wrapper`. - -.. topic:: `functools.update_wrapper(wrapper, wrapped) ` - - "Update a wrapper function to look like the wrapped function." - - :: - - >>> import functools - >>> def replacing_decorator_with_args(arg): - ... print("defining the decorator") - ... def _decorator(function): - ... print("doing decoration, %r" % arg) - ... def _wrapper(*args, **kwargs): - ... print("inside wrapper, %r %r" % (args, kwargs)) - ... return function(*args, **kwargs) - ... return functools.update_wrapper(_wrapper, function) - ... return _decorator - >>> @replacing_decorator_with_args("abc") - ... def function(): - ... "extensive documentation" - ... print("inside function") - ... return 14 - defining the decorator - doing decoration, 'abc' - >>> function - - >>> print(function.__doc__) - extensive documentation - -One important thing is missing from the list of attributes which can -be copied to the replacement function: the argument list. The default -values for arguments can be modified through the ``__defaults__``, -``__kwdefaults__`` attributes, but unfortunately the argument list -itself cannot be set as an attribute. This means that -``help(function)`` will display a useless argument list which will be -confusing for the user of the function. An effective but ugly way -around this problem is to create the wrapper dynamically, using -``eval``. This can be automated by using the external ``decorator`` -module. It provides support for the ``decorator`` decorator, which takes a -wrapper and turns it into a decorator which preserves the function -signature. - -To sum things up, decorators should always use ``functools.update_wrapper`` -or some other means of copying function attributes. - -Examples in the standard library -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -First, it should be mentioned that there's a number of useful -decorators available in the standard library. There are three decorators -which really form a part of the language: - -- `classmethod` causes a method to become a "class method", - which means that it can be invoked without creating an instance of - the class. When a normal method is invoked, the interpreter inserts - the instance object as the first positional parameter, - ``self``. When a class method is invoked, the class itself is given - as the first parameter, often called ``cls``. - - Class methods are still accessible through the class' namespace, so - they don't pollute the module's namespace. Class methods can be used - to provide alternative constructors:: - - class Array(object): - def __init__(self, data): - self.data = data - - @classmethod - def fromfile(cls, file): - data = numpy.load(file) - return cls(data) - - This is cleaner than using a multitude of flags to ``__init__``. - -- `staticmethod` is applied to methods to make them "static", - i.e. basically a normal function, but accessible through the class - namespace. This can be useful when the function is only needed - inside this class (its name would then be prefixed with ``_``), or when we - want the user to think of the method as connected to the class, - despite an implementation which doesn't require this. - -- `property` is the pythonic answer to the problem of getters - and setters. A method decorated with ``property`` becomes a getter - which is automatically called on attribute access. - - >>> class A(object): - ... @property - ... def a(self): - ... "an important attribute" - ... return "a value" - >>> A.a - - >>> A().a - 'a value' - - In this example, ``A.a`` is an read-only attribute. It is also - documented: ``help(A)`` includes the docstring for attribute ``a`` - taken from the getter method. Defining ``a`` as a property allows it - to be a calculated on the fly, and has the side effect of making it - read-only, because no setter is defined. - - To have a setter and a getter, two methods are required, - obviously:: - - class Rectangle(object): - def __init__(self, edge): - self.edge = edge - - @property - def area(self): - """Computed area. - - Setting this updates the edge length to the proper value. - """ - return self.edge**2 - - @area.setter - def area(self, area): - self.edge = area ** 0.5 - - The way that this works, is that the ``property`` decorator replaces - the getter method with a property object. This object in turn has - three methods, ``getter``, ``setter``, and ``deleter``, which can be - used as decorators. Their job is to set the getter, setter and - deleter of the property object (stored as attributes ``fget``, - ``fset``, and ``fdel``). The getter can be set like in the example - above, when creating the object. When defining the setter, we - already have the property object under ``area``, and we add the - setter to it by using the ``setter`` method. All this happens when - we are creating the class. - - Afterwards, when an instance of the class has been created, the - property object is special. When the interpreter executes attribute - access, assignment, or deletion, the job is delegated to the methods - of the property object. - - To make everything crystal clear, let's define a "debug" example:: - - >>> class D(object): - ... @property - ... def a(self): - ... print("getting 1") - ... return 1 - ... @a.setter - ... def a(self, value): - ... print("setting %r" % value) - ... @a.deleter - ... def a(self): - ... print("deleting") - >>> D.a - - >>> D.a.fget - - >>> D.a.fset - - >>> D.a.fdel - - >>> d = D() # ... varies, this is not the same `a` function - >>> d.a - getting 1 - 1 - >>> d.a = 2 - setting 2 - >>> del d.a - deleting - >>> d.a - getting 1 - 1 - - Properties are a bit of a stretch for the decorator syntax. One of the - premises of the decorator syntax --- that the name is not duplicated - --- is violated, but nothing better has been invented so far. It is - just good style to use the same name for the getter, setter, and - deleter methods. - - .. property documentation mentions that this only works for - old-style classes, but this seems to be an error. - -Some newer examples include: - -- `functools.lru_cache` memoizes an arbitrary function - maintaining a limited cache of arguments:answer pairs (Python 3.2) - -- `functools.total_ordering` is a class decorator which fills in - missing ordering methods - (`__lt__ `, `__gt__ `, - `__le__ `, ...) - based on a single available one. - - -.. - - `packaging.pypi.simple.socket_timeout` (in Python 3.3) adds - a socket timeout when retrieving data through a socket. - - -Deprecation of functions -^^^^^^^^^^^^^^^^^^^^^^^^ - -Let's say we want to print a deprecation warning on stderr on the -first invocation of a function we don't like anymore. If we don't want -to modify the function, we can use a decorator:: - - class deprecated(object): - """Print a deprecation warning once on first use of the function. - - >>> @deprecated() # doctest: +SKIP - ... def f(): - ... pass - >>> f() # doctest: +SKIP - f is deprecated - """ - def __call__(self, func): - self.func = func - self.count = 0 - return self._wrapper - def _wrapper(self, *args, **kwargs): - self.count += 1 - if self.count == 1: - print(self.func.__name__, 'is deprecated') - return self.func(*args, **kwargs) - -.. TODO: use update_wrapper here - -It can also be implemented as a function:: - - def deprecated(func): - """Print a deprecation warning once on first use of the function. - - >>> @deprecated # doctest: +SKIP - ... def f(): - ... pass - >>> f() # doctest: +SKIP - f is deprecated - """ - count = [0] - def wrapper(*args, **kwargs): - count[0] += 1 - if count[0] == 1: - print(func.__name__, 'is deprecated') - return func(*args, **kwargs) - return wrapper - -A ``while``-loop removing decorator -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Let's say we have function which returns a lists of things, and this -list created by running a loop. If we don't know how many objects will -be needed, the standard way to do this is something like:: - - def find_answers(): - answers = [] - while True: - ans = look_for_next_answer() - if ans is None: - break - answers.append(ans) - return answers - -This is fine, as long as the body of the loop is fairly compact. Once -it becomes more complicated, as often happens in real code, this -becomes pretty unreadable. We could simplify this by using ``yield`` -statements, but then the user would have to explicitly call -``list(find_answers())``. - -We can define a decorator which constructs the list for us:: - - def vectorized(generator_func): - def wrapper(*args, **kwargs): - return list(generator_func(*args, **kwargs)) - return functools.update_wrapper(wrapper, generator_func) - -Our function then becomes:: - - @vectorized - def find_answers(): - while True: - ans = look_for_next_answer() - if ans is None: - break - yield ans - -A plugin registration system -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This is a class decorator which doesn't modify the class, but just -puts it in a global registry. It falls into the category of decorators -returning the original object:: - - class WordProcessor(object): - PLUGINS = [] - def process(self, text): - for plugin in self.PLUGINS: - text = plugin().cleanup(text) - return text - - @classmethod - def plugin(cls, plugin): - cls.PLUGINS.append(plugin) - - @WordProcessor.plugin - class CleanMdashesExtension(object): - def cleanup(self, text): - return text.replace('—', u'\N{em dash}') - -Here we use a decorator to decentralise the registration of -plugins. We call our decorator with a noun, instead of a verb, because -we use it to declare that our class is a plugin for -``WordProcessor``. Method ``plugin`` simply appends the class to the -list of plugins. - -A word about the plugin itself: it replaces HTML entity for em-dash -with a real Unicode em-dash character. It exploits the `unicode -literal notation`_ to insert a character by using its name in the -unicode database ("EM DASH"). If the Unicode character was inserted -directly, it would be impossible to distinguish it from an en-dash in -the source of a program. - -.. _`unicode literal notation`: - https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals - -.. seealso:: **More examples and reading** - - * :pep:`318` (function and method decorator syntax) - * :pep:`3129` (class decorator syntax) - * https://wiki.python.org/moin/PythonDecoratorLibrary - * https://docs.python.org/dev/library/functools.html - * https://pypi.org/project/decorator - * Bruce Eckel - - - `Decorators I`_: Introduction to Python Decorators - - `Python Decorators II`_: Decorator Arguments - - `Python Decorators III`_: A Decorator-Based Build System - - .. _`Decorators I`: https://www.artima.com/weblogs/viewpost.jsp?thread=240808 - .. _`Python Decorators II`: https://www.artima.com/weblogs/viewpost.jsp?thread=240845 - .. _`Python Decorators III`: https://www.artima.com/weblogs/viewpost.jsp?thread=241209 - - -Context managers -================ - -A context manager is an object with `__enter__ ` and -`__exit__ ` methods which can be used in the :compound:`with` -statement:: - - with manager as var: - do_something(var) - -is in the simplest case -equivalent to :: - - var = manager.__enter__() - try: - do_something(var) - finally: - manager.__exit__() - -In other words, the context manager protocol defined in :pep:`343` -permits the extraction of the boring part of a -:compound:`try..except..finally ` structure into a separate class -leaving only the interesting ``do_something`` block. - -1. The `__enter__ ` method is called first. It can - return a value which will be assigned to ``var``. - The ``as``-part is optional: if it isn't present, the value - returned by ``__enter__`` is simply ignored. -2. The block of code underneath ``with`` is executed. Just like with - ``try`` clauses, it can either execute successfully to the end, or - it can :simple:`break`, :simple:`continue` or :simple:`return`, or - it can throw an exception. Either way, after the block is finished, - the `__exit__ ` method is called. - If an exception was thrown, the information about the exception is - passed to ``__exit__``, which is described below in the next - subsection. In the normal case, exceptions can be ignored, just - like in a ``finally`` clause, and will be rethrown after - ``__exit__`` is finished. - -Let's say we want to make sure that a file is closed immediately after -we are done writing to it:: - - >>> class closing(object): - ... def __init__(self, obj): - ... self.obj = obj - ... def __enter__(self): - ... return self.obj - ... def __exit__(self, *args): - ... self.obj.close() - >>> with closing(open('/tmp/file', 'w')) as f: - ... f.write('the contents\n') # doctest: +SKIP - -Here we have made sure that the ``f.close()`` is called when the -``with`` block is exited. Since closing files is such a common -operation, the support for this is already present in the ``file`` -class. It has an ``__exit__`` method which calls ``close`` and can be -used as a context manager itself:: - - >>> with open('/tmp/file', 'a') as f: - ... f.write('more contents\n') # doctest: +SKIP - -The common use for ``try..finally`` is releasing resources. Various -different cases are implemented similarly: in the ``__enter__`` -phase the resource is acquired, in the ``__exit__`` phase it is -released, and the exception, if thrown, is propagated. As with files, -there's often a natural operation to perform after the object has been -used and it is most convenient to have the support built in. With each -release, Python provides support in more places: - -* all file-like objects: - - - `file` |==>| automatically closed - - `fileinput`, `tempfile` - - `bz2.BZ2File`, `gzip.GzipFile`, - `tarfile.TarFile`, `zipfile.ZipFile` - - `ftplib`, `nntplib` |==>| close connection -* locks - - - `multiprocessing.RLock` |==>| lock and unlock - - `multiprocessing.Semaphore` - - `memoryview` |==>| automatically release -* `decimal.localcontext` |==>| modify precision of computations temporarily -* `_winreg.PyHKEY <_winreg.OpenKey>` |==>| open and close hive key -* `warnings.catch_warnings` |==>| kill warnings temporarily -* `contextlib.closing` |==>| the same as the example above, call ``close`` -* parallel programming - - - `concurrent.futures.ThreadPoolExecutor` |==>| invoke in parallel then kill thread pool - - `concurrent.futures.ProcessPoolExecutor` |==>| invoke in parallel then kill process pool - - `nogil` |==>| solve the GIL problem temporarily (cython only :( ) - - -Catching exceptions -^^^^^^^^^^^^^^^^^^^ - -When an exception is thrown in the ``with``-block, it is passed as -arguments to ``__exit__``. Three arguments are used, the same as -returned by :py:func:`sys.exc_info`: type, value, traceback. When no -exception is thrown, ``None`` is used for all three arguments. The -context manager can "swallow" the exception by returning a true value -from ``__exit__``. Exceptions can be easily ignored, because if -``__exit__`` doesn't use ``return`` and just falls of the end, -``None`` is returned, a false value, and therefore the exception is -rethrown after ``__exit__`` is finished. - -The ability to catch exceptions opens interesting possibilities. A -classic example comes from unit-tests --- we want to make sure that -some code throws the right kind of exception:: - - class assert_raises(object): - # based on pytest and unittest.TestCase - def __init__(self, type): - self.type = type - def __enter__(self): - pass - def __exit__(self, type, value, traceback): - if type is None: - raise AssertionError('exception expected') - if issubclass(type, self.type): - return True # swallow the expected exception - raise AssertionError('wrong exception type') - - with assert_raises(KeyError): - {}['foo'] - -Using generators to define context managers -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When discussing generators_, it was said that we prefer generators to -iterators implemented as classes because they are shorter, sweeter, -and the state is stored as local, not instance, variables. On the -other hand, as described in `Bidirectional communication`_, the flow -of data between the generator and its caller can be bidirectional. -This includes exceptions, which can be thrown into the -generator. We would like to implement context managers as special -generator functions. In fact, the generator protocol was designed to -support this use case. - -.. code-block:: pycon - - @contextlib.contextmanager - def some_generator(): - - try: - yield - finally: - - -The `contextlib.contextmanager` helper takes a generator and turns it -into a context manager. The generator has to obey some rules which are -enforced by the wrapper function --- most importantly it must -``yield`` exactly once. The part before the ``yield`` is executed from -``__enter__``, the block of code protected by the context manager is -executed when the generator is suspended in ``yield``, and the rest is -executed in ``__exit__``. If an exception is thrown, the interpreter -hands it to the wrapper through ``__exit__`` arguments, and the -wrapper function then throws it at the point of the ``yield`` -statement. Through the use of generators, the context manager is -shorter and simpler. - -Let's rewrite the ``closing`` example as a generator:: - - @contextlib.contextmanager - def closing(obj): - try: - yield obj - finally: - obj.close() - -Let's rewrite the ``assert_raises`` example as a generator:: - - @contextlib.contextmanager - def assert_raises(type): - try: - yield - except type: - return - except Exception as value: - raise AssertionError('wrong exception type') - else: - raise AssertionError('exception expected') - -Here we use a decorator to turn generator functions into context managers! diff --git a/advanced/debugging/index.md b/advanced/debugging/index.md new file mode 100644 index 000000000..b3558a66e --- /dev/null +++ b/advanced/debugging/index.md @@ -0,0 +1,684 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(debugging-chapter)= + +# Debugging code + +**Author**: _Gaël Varoquaux_ + +This section explores tools to understand better your code base: +debugging, to find and fix bugs. + +It is not specific to the scientific Python community, but the strategies +that we will employ are tailored to its needs. + +:::{admonition} Prerequisites + +- NumPy +- IPython +- [nosetests](https://nose.readthedocs.io/en/latest/) +- [pyflakes](https://pypi.org/project/pyflakes) +- gdb for the C-debugging part. + ::: + +## Avoiding bugs + +### Coding best practices to avoid getting in trouble + +:::{sidebar} Brian Kernighan +_“Everyone knows that debugging is twice as hard as writing a +program in the first place. So if you're as clever as you can be +when you write it, how will you ever debug it?”_ +::: + +- We all write buggy code. Accept it. Deal with it. + +- Write your code with testing and debugging in mind. + +- Keep It Simple, Stupid (KISS). + + - What is the simplest thing that could possibly work? + +- Don't Repeat Yourself (DRY). + + - Every piece of knowledge must have a single, unambiguous, + authoritative representation within a system. + - Constants, algorithms, etc... + +- Try to limit interdependencies of your code. (Loose Coupling) + +- Give your variables, functions and modules meaningful names (not + mathematics names) + +### pyflakes: fast static analysis + +They are several static analysis tools in Python; to name a few: + +- [pylint](https://pylint.pycqa.org/en/latest/) +- [pychecker](https://pychecker.sourceforge.net/) +- [pyflakes](https://pypi.org/project/pyflakes) +- [flake8](https://pypi.org/project/flake8) + +Here we focus on `pyflakes`, which is the simplest tool. + +- **Fast, simple** +- Detects syntax errors, missing imports, typos on names. + +Another good recommendation is the `flake8` tool which is a combination of +pyflakes and pep8. Thus, in addition to the types of errors that pyflakes +catches, flake8 detects violations of the recommendation in [PEP8](https://peps.python.org/pep-0008/) style guide. + +Integrating pyflakes (or flake8) in your editor or IDE is highly +recommended, it **does yield productivity gains**. + +#### Running pyflakes on the current edited file + +You can bind a key to run pyflakes in the current buffer. + +- **In kate** + Menu: 'settings -> configure kate + + - In plugins enable 'external tools' + - In external Tools', add `pyflakes`: + + ``` + kdialog --title "pyflakes %filename" --msgbox "$(pyflakes %filename)" + ``` + +- **In TextMate** + + Menu: TextMate -> Preferences -> Advanced -> Shell variables, add a + shell variable: + + ```shell + TM_PYCHECKER = /Library/Frameworks/Python.framework/Versions/Current/bin/pyflakes + ``` + + Then `Ctrl-Shift-V` is binded to a pyflakes report + +- **In vim** + In your `.vimrc` (binds F5 to `pyflakes`): + + ```vim + autocmd FileType python let &mp = 'echo "*** running % ***" ; pyflakes %' + autocmd FileType tex,mp,rst,python imap [15~ :make!^M + autocmd FileType tex,mp,rst,python map [15~ :make!^M + autocmd FileType tex,mp,rst,python set autowrite + ``` + +- **In emacs** + In your `.emacs` (binds F5 to `pyflakes`): + + ```lisp + (defun pyflakes-thisfile () (interactive) + (compile (format "pyflakes %s" (buffer-file-name))) + + (define-minor-mode pyflakes-mode + "Toggle pyflakes mode. + With no argument, this command toggles the mode. + Non-null prefix argument turns on the mode. + Null prefix argument turns off the mode." + ;; The initial value. + nil + ;; The indicator for the mode line. + " Pyflakes" + ;; The minor mode bindings. + '( ([f5] . pyflakes-thisfile) ) + ) + + (add-hook 'python-mode-hook (lambda () (pyflakes-mode t))) + ``` + +#### A type-as-go spell-checker like integration + +- **In vim** + + - Use the pyflakes.vim plugin: + + 1. download the zip file from + + 2. extract the files in `~/.vim/ftplugin/python` + 3. make sure your vimrc has `filetype plugin indent on` + + ![](vim_pyflakes.png) + + - Alternatively: use the [syntastic](https://github.com/vim-syntastic/syntastic) + plugin. This can be configured to use `flake8` too and also handles + on-the-fly checking for many other languages. + + ![](vim_syntastic.png) + +- **In emacs** + + Use the flymake mode with pyflakes, documented on + and included in Emacs 26 and + more recent. To activate it, use `M-x` (meta-key then x) and enter + `flymake-mode` at the prompt. To enable it automatically when + opening a Python file, add the following line to your .emacs file: + + ```lisp + (add-hook 'python-mode-hook '(lambda () (flymake-mode))) + ``` + +## Debugging workflow + +If you do have a non trivial bug, this is when debugging strategies kick +in. There is no silver bullet. Yet, strategies help. + +**For debugging a given problem, the favorable situation is when the problem is +isolated in a small number of lines of code, outside framework or application +code, with short modify-run-fail cycles.** + +1. Make it fail reliably. Find a test case that makes the code fail + every time. + +2. Divide and Conquer. Once you have a failing test case, isolate the + failing code. + + - Which module. + - Which function. + - Which line of code. + + => isolate a small reproducible failure: a test case + +3. Change one thing at a time and re-run the failing test case. + +4. Use the debugger to understand what is going wrong. + +5. Take notes and be patient. It may take a while. + +:::{note} +Once you have gone through this process: isolated a tight piece of +code reproducing the bug and fix the bug using this piece of code, add +the corresponding code to your test suite. +::: + ++++ + +## Using the Python debugger + +The python debugger, `pdb`: , +allows you to inspect your code interactively. + +Specifically it allows you to: + +- View the source code. +- Walk up and down the call stack. +- Inspect values of variables. +- Modify values of variables. +- Set breakpoints. + +:::{admonition} print +Yes, `print` statements do work as a debugging tool. However to +inspect runtime, it is often more efficient to use the debugger. +::: + +### Invoking the debugger + +Ways to launch the debugger: + +1. Postmortem, launch debugger after module errors. +2. Launch the module with the debugger. +3. Call the debugger inside the module + +#### Postmortem + +**Situation**: You're working in IPython and you get a traceback. + +Here we debug the file {download}`index_error.py`. When running it, an +{class}`IndexError` is raised. Type `%debug` and drop into the debugger. + +```ipython +In [1]: %run index_error.py +--------------------------------------------------------------------------- +IndexError Traceback (most recent call last) +File ~/src/scientific-python-lectures/advanced/debugging/index_error.py:10 + 6 print(lst[len(lst)]) + 9 if __name__ == "__main__": +---> 10 index_error() + +File ~/src/scientific-python-lectures/advanced/debugging/index_error.py:6, in index_error() + 4 def index_error(): + 5 lst = list("foobar") +----> 6 print(lst[len(lst)]) + +IndexError: list index out of range + +In [2]: %debug +> /home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py(6)index_error() + 4 def index_error(): + 5 lst = list("foobar") +----> 6 print(lst[len(lst)]) + 7 + 8 + +ipdb> list + 1 """Small snippet to raise an IndexError.""" + 2 + 3 + 4 def index_error(): + 5 lst = list("foobar") +----> 6 print(lst[len(lst)]) + 7 + 8 + 9 if __name__ == "__main__": + 10 index_error() + +ipdb> len(lst) +6 +ipdb> print(lst[len(lst) - 1]) +r +ipdb> quit +``` + +:::{admonition} Post-mortem debugging without IPython +In some situations you cannot use IPython, for instance to debug a +script that wants to be called from the command line. In this case, +you can call the script with `python -m pdb script.py`: + +```shell +$ python -m pdb index_error.py +> /home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py(1)() +-> """Small snippet to raise an IndexError.""" +(Pdb) continue +Traceback (most recent call last): + File "/usr/lib64/python3.11/pdb.py", line 1793, in main + pdb._run(target) + File "/usr/lib64/python3.11/pdb.py", line 1659, in _run + self.run(target.code) + File "/usr/lib64/python3.11/bdb.py", line 600, in run + exec(cmd, globals, locals) + File "", line 1, in + File "/home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py", line 10, in + index_error() + File "/home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py", line 6, in index_error + print(lst[len(lst)]) + ~~~^^^^^^^^^^ +IndexError: list index out of range +Uncaught exception. Entering post mortem debugging +Running 'cont' or 'step' will restart the program +> /home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py(6)index_error() +-> print(lst[len(lst)]) +(Pdb) +``` + +::: + +#### Step-by-step execution + +**Situation**: You believe a bug exists in a module but are not sure where. + +For instance we are trying to debug {download}`wiener_filtering.py`. +Indeed the code runs, but the filtering does not work well. + +- Run the script in IPython with the debugger using `%run -d +wiener_filtering.py` : + + ```text + In [1]: %run -d wiener_filtering.py + *** Blank or comment + *** Blank or comment + *** Blank or comment + NOTE: Enter 'c' at the ipdb> prompt to continue execution. + > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(1)() + ----> 1 """Wiener filtering a noisy raccoon face: this module is buggy""" + 2 + 3 import numpy as np + 4 import scipy as sp + 5 import matplotlib.pyplot as plt + ``` + +- Set a break point at line 29 using `b 29`: + + ```text + ipdb> n + > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(3)() + 1 """Wiener filtering a noisy raccoon face: this module is buggy""" + 2 + ----> 3 import numpy as np + 4 import scipy as sp + 5 import matplotlib.pyplot as plt + + ipdb> b 29 + Breakpoint 1 at /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py:29 + ``` + +- Continue execution to next breakpoint with `c(ont(inue))`: + + ```text + ipdb> c + > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(29)iterated_wiener() + 27 Do not use this: this is crappy code to demo bugs! + 28 """ + 1--> 29 noisy_img = noisy_img + 30 denoised_img = local_mean(noisy_img, size=size) + 31 l_var = local_var(noisy_img, size=size) + ``` + +- Step into code with `n(ext)` and `s(tep)`: `next` jumps to the next + statement in the current execution context, while `step` will go across + execution contexts, i.e. enable exploring inside function calls: + + ```ipython + ipdb> s + > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(30)iterated_wiener() + 28 """ + 1 29 noisy_img = noisy_img + ---> 30 denoised_img = local_mean(noisy_img, size=size) + 31 l_var = local_var(noisy_img, size=size) + 32 for i in range(3): + + ipdb> n + > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(31)iterated_wiener() + 1 29 noisy_img = noisy_img + 30 denoised_img = local_mean(noisy_img, size=size) + ---> 31 l_var = local_var(noisy_img, size=size) + 32 for i in range(3): + 33 res = noisy_img - denoised_img + ``` + +- Step a few lines and explore the local variables: + + ```text + ipdb> n + > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(32)iterated_wiener() + 30 denoised_img = local_mean(noisy_img, size=size) + 31 l_var = local_var(noisy_img, size=size) + ---> 32 for i in range(3): + 33 res = noisy_img - denoised_img + 34 noise = (res**2).sum() / res.size + + ipdb> print(l_var) + [[2571 2782 3474 ... 3008 2922 3141] + [2105 708 475 ... 469 354 2884] + [1697 420 645 ... 273 236 2517] + ... + [2437 345 432 ... 413 387 4188] + [2598 179 247 ... 367 441 3909] + [2808 2525 3117 ... 4413 4454 4385]] + ipdb> print(l_var.min()) + 0 + ``` + +Oh dear, nothing but integers, and 0 variation. Here is our bug, we are +doing integer arithmetic. + +:::{admonition} Raising exception on numerical errors +When we run the {download}`wiener_filtering.py` file, the following +warnings are raised: + +```ipython +In [2]: %run wiener_filtering.py +/home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py:35: RuntimeWarning: divide by zero encountered in divide + noise_level = 1 - noise / l_var +``` + +We can turn these warnings in exception, which enables us to do +post-mortem debugging on them, and find our problem more quickly: + +```ipython +In [3]: np.seterr(all='raise') +Out[3]: {'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'} + +In [4]: %run wiener_filtering.py +--------------------------------------------------------------------------- +FloatingPointError Traceback (most recent call last) +File ~/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py:52 + 49 plt.matshow(face[cut], cmap=plt.cm.gray) + 50 plt.matshow(noisy_face[cut], cmap=plt.cm.gray) +---> 52 denoised_face = iterated_wiener(noisy_face) + 53 plt.matshow(denoised_face[cut], cmap=plt.cm.gray) + 55 plt.show() + +File ~/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py:35, in iterated_wiener(noisy_img, size) + 33 res = noisy_img - denoised_img + 34 noise = (res**2).sum() / res.size +---> 35 noise_level = 1 - noise / l_var + 36 noise_level[noise_level < 0] = 0 + 37 denoised_img = np.int64(noise_level * res) + +FloatingPointError: divide by zero encountered in divide +``` + +::: + +#### Other ways of starting a debugger + +- **Raising an exception as a poor man break point** + + If you find it tedious to note the line number to set a break point, + you can simply raise an exception at the point that you want to + inspect and use IPython's `%debug`. Note that in this case you cannot + step or continue the execution. + +- **Debugging test failures using nosetests** + + You can run `nosetests --pdb` to drop in post-mortem debugging on + exceptions, and `nosetests --pdb-failure` to inspect test failures + using the debugger. + + In addition, you can use the IPython interface for the debugger in nose + by installing the nose plugin + [ipdbplugin](https://pypi.org/project/ipdbplugin). You can than + pass `--ipdb` and `--ipdb-failure` options to nosetests. + +- **Calling the debugger explicitly** + + Insert the following line where you want to drop in the debugger: + + ```python + import pdb; pdb.set_trace() + ``` + +:::{warning} +When running `nosetests`, the output is captured, and thus it seems +that the debugger does not work. Simply run the nosetests with the `-s` +flag. +::: + +:::{admonition} Graphical debuggers and alternatives + +- [pudb](https://pypi.org/project/pudb) is a good semi-graphical + debugger with a text user interface in the console. +- The [Visual Studio Code](https://code.visualstudio.com/) integrated + development environment includes a debugging mode. +- The [Mu editor](https://codewith.mu/) is a simple Python editor that + includes a debugging mode. + ::: + +### Debugger commands and interaction + +| | | +| ---------- | ------------------------------------------------------------------- | +| `l(list)` | Lists the code at the current position | +| `u(p)` | Walk up the call stack | +| `d(own)` | Walk down the call stack | +| `n(ext)` | Execute the next line (does not go down in new functions) | +| `s(tep)` | Execute the next statement (goes down in new functions) | +| `bt` | Print the call stack | +| `a` | Print the local variables | +| `!command` | Execute the given **Python** command (by opposition to pdb commands | + +:::{warning} +**Debugger commands are not Python code** + +You cannot name the variables the way you want. For instance, if in +you cannot override the variables in the current frame with the same +name: **use different names than your local variable when typing code +in the debugger**. +::: + +#### Getting help when in the debugger + +Type `h` or `help` to access the interactive help: + +```python +ipdb> help + +Documented commands (type help ): +======================================== +EOF commands enable ll pp s until +a condition exceptions longlist psource skip_hidden up +alias cont exit n q skip_predicates w +args context h next quit source whatis +b continue help p r step where +break d ignore pdef restart tbreak +bt debug j pdoc return u +c disable jump pfile retval unalias +cl display l pinfo run undisplay +clear down list pinfo2 rv unt + +Miscellaneous help topics: +========================== +exec pdb + +Undocumented commands: +====================== +interact +``` + +## Debugging segmentation faults using gdb + +If you have a segmentation fault, you cannot debug it with pdb, as it +crashes the Python interpreter before it can drop in the debugger. +Similarly, if you have a bug in C code embedded in Python, pdb is +useless. For this we turn to the gnu debugger, +[gdb](https://www.gnu.org/software/gdb/), available on Linux. + +Before we start with gdb, let us add a few Python-specific tools to it. +For this we add a few macros to our `~/.gdbinit`. The optimal choice of +macro depends on your Python version and your gdb version. I have added a +simplified version in {download}`gdbinit`, but feel free to read +[DebuggingWithGdb](https://wiki.python.org/moin/DebuggingWithGdb). + +To debug with gdb the Python script {download}`segfault.py`, we can run the +script in gdb as follows + +```console +$ gdb python +... +(gdb) run segfault.py +Starting program: /usr/bin/python segfault.py +[Thread debugging using libthread_db enabled] + +Program received signal SIGSEGV, Segmentation fault. +_strided_byte_copy (dst=0x8537478 "\360\343G", outstrides=4, src= + 0x86c0690
, instrides=32, N=3, + elsize=4) + at numpy/core/src/multiarray/ctors.c:365 +365 _FAST_MOVE(Int32); +(gdb) +``` + +We get a segfault, and gdb captures it for post-mortem debugging in the C +level stack (not the Python call stack). We can debug the C call stack +using gdb's commands: + +```console +(gdb) up +#1 0x004af4f5 in _copy_from_same_shape (dest=, + src=, myfunc=0x496780 <_strided_byte_copy>, + swap=0) +at numpy/core/src/multiarray/ctors.c:748 +748 myfunc(dit->dataptr, dest->strides[maxaxis], +``` + +As you can see, right now, we are in the C code of numpy. We would like +to know what is the Python code that triggers this segfault, so we go up +the stack until we hit the Python execution loop: + +```console +(gdb) up +#8 0x080ddd23 in call_function (f= + Frame 0x85371ec, for file /home/varoquau/usr/lib/python2.6/site-packages/numpy/core/arrayprint.py, line 156, in _leading_trailing (a=, _nc=), throwflag=0) + at ../Python/ceval.c:3750 +3750 ../Python/ceval.c: No such file or directory. + in ../Python/ceval.c + +(gdb) up +#9 PyEval_EvalFrameEx (f= + Frame 0x85371ec, for file /home/varoquau/usr/lib/python2.6/site-packages/numpy/core/arrayprint.py, line 156, in _leading_trailing (a=, _nc=), throwflag=0) + at ../Python/ceval.c:2412 +2412 in ../Python/ceval.c +(gdb) +``` + +Once we are in the Python execution loop, we can use our special Python +helper function. For instance we can find the corresponding Python code: + +```console +(gdb) pyframe +/home/varoquau/usr/lib/python2.6/site-packages/numpy/core/arrayprint.py (158): _leading_trailing +(gdb) +``` + +This is numpy code, we need to go up until we find code that we have +written: + +```console +(gdb) up +... +(gdb) up +#34 0x080dc97a in PyEval_EvalFrameEx (f= + Frame 0x82f064c, for file segfault.py, line 11, in print_big_array (small_array=, big_array=), throwflag=0) at ../Python/ceval.c:1630 +1630 ../Python/ceval.c: No such file or directory. + in ../Python/ceval.c +(gdb) pyframe +segfault.py (12): print_big_array +``` + +The corresponding code is: + +```{literalinclude} segfault.py +:language: py +:lines: 8-14 +``` + +Thus the segfault happens when printing `big_array[-10:]`. The reason is +simply that `big_array` has been allocated with its end outside the +program memory. + +:::{note} +For a list of Python-specific commands defined in the `gdbinit`, read +the source of this file. +::: + +--- + +::: {exercise-start} +:label: to-debug-ex +:class: dropdown +::: + +The following script is well documented and hopefully legible. It +seeks to answer a problem of actual interest for numerical computing, +but it does not work... Can you debug it? + +**Python source code:** {download}`to_debug.py ` + +:::{literalinclude} to_debug_solution.py +::: + +::: {exercise-end} +::: + +::: {solution-start} to-debug-ex +:class: dropdown +::: + +:::{literalinclude} to_debug_solution.py +::: + +::: {solution-end} +::: diff --git a/advanced/debugging/index.rst b/advanced/debugging/index.rst deleted file mode 100644 index dde341d8b..000000000 --- a/advanced/debugging/index.rst +++ /dev/null @@ -1,665 +0,0 @@ -.. _debugging_chapter: - -============== -Debugging code -============== - -**Author**: *Gaël Varoquaux* - -This section explores tools to understand better your code base: -debugging, to find and fix bugs. - -It is not specific to the scientific Python community, but the strategies -that we will employ are tailored to its needs. - -.. topic:: Prerequisites - - * NumPy - * IPython - * `nosetests `__ - * `pyflakes `__ - * gdb for the C-debugging part. - -.. contents:: Chapter contents - :local: - :depth: 2 - - -Avoiding bugs -============= - -Coding best practices to avoid getting in trouble --------------------------------------------------- - -.. sidebar:: Brian Kernighan - - *“Everyone knows that debugging is twice as hard as writing a - program in the first place. So if you're as clever as you can be - when you write it, how will you ever debug it?”* - -* We all write buggy code. Accept it. Deal with it. -* Write your code with testing and debugging in mind. -* Keep It Simple, Stupid (KISS). - - * What is the simplest thing that could possibly work? - -* Don't Repeat Yourself (DRY). - - * Every piece of knowledge must have a single, unambiguous, - authoritative representation within a system. - * Constants, algorithms, etc... - -* Try to limit interdependencies of your code. (Loose Coupling) -* Give your variables, functions and modules meaningful names (not - mathematics names) - -pyflakes: fast static analysis -------------------------------- - -They are several static analysis tools in Python; to name a few: - -* `pylint `_ -* `pychecker `_ -* `pyflakes `_ -* `flake8 `_ - -Here we focus on `pyflakes`, which is the simplest tool. - - * **Fast, simple** - - * Detects syntax errors, missing imports, typos on names. - -Another good recommendation is the `flake8` tool which is a combination of -pyflakes and pep8. Thus, in addition to the types of errors that pyflakes -catches, flake8 detects violations of the recommendation in `PEP8 -`_ style guide. - -Integrating pyflakes (or flake8) in your editor or IDE is highly -recommended, it **does yield productivity gains**. - -Running pyflakes on the current edited file -............................................ - -You can bind a key to run pyflakes in the current buffer. - -* **In kate** - Menu: 'settings -> configure kate - - * In plugins enable 'external tools' - - * In external Tools', add `pyflakes`:: - - kdialog --title "pyflakes %filename" --msgbox "$(pyflakes %filename)" - -* **In TextMate** - - Menu: TextMate -> Preferences -> Advanced -> Shell variables, add a - shell variable:: - - TM_PYCHECKER = /Library/Frameworks/Python.framework/Versions/Current/bin/pyflakes - - Then `Ctrl-Shift-V` is binded to a pyflakes report - - -* **In vim** - In your `.vimrc` (binds F5 to `pyflakes`):: - - autocmd FileType python let &mp = 'echo "*** running % ***" ; pyflakes %' - autocmd FileType tex,mp,rst,python imap [15~ :make!^M - autocmd FileType tex,mp,rst,python map [15~ :make!^M - autocmd FileType tex,mp,rst,python set autowrite - -* **In emacs** - In your `.emacs` (binds F5 to `pyflakes`):: - - (defun pyflakes-thisfile () (interactive) - (compile (format "pyflakes %s" (buffer-file-name))) - ) - - (define-minor-mode pyflakes-mode - "Toggle pyflakes mode. - With no argument, this command toggles the mode. - Non-null prefix argument turns on the mode. - Null prefix argument turns off the mode." - ;; The initial value. - nil - ;; The indicator for the mode line. - " Pyflakes" - ;; The minor mode bindings. - '( ([f5] . pyflakes-thisfile) ) - ) - - (add-hook 'python-mode-hook (lambda () (pyflakes-mode t))) - -A type-as-go spell-checker like integration -............................................ - -* **In vim** - - * Use the pyflakes.vim plugin: - - #. download the zip file from - https://www.vim.org/scripts/script.php?script_id=2441 - - #. extract the files in ``~/.vim/ftplugin/python`` - - #. make sure your vimrc has ``filetype plugin indent on`` - - .. image:: vim_pyflakes.png - - * Alternatively: use the `syntastic - `_ - plugin. This can be configured to use ``flake8`` too and also handles - on-the-fly checking for many other languages. - - .. image:: vim_syntastic.png - -* **In emacs** - - Use the flymake mode with pyflakes, documented on - https://www.emacswiki.org/emacs/FlyMake and included in Emacs 26 and - more recent. To activate it, use ``M-x`` (meta-key then x) and enter - `flymake-mode` at the prompt. To enable it automatically when - opening a Python file, add the following line to your .emacs file:: - - (add-hook 'python-mode-hook '(lambda () (flymake-mode))) - - -Debugging workflow -=================== - -If you do have a non trivial bug, this is when debugging strategies kick -in. There is no silver bullet. Yet, strategies help: - - **For debugging a given problem, the favorable situation is when the - problem is isolated in a small number of lines of code, outside - framework or application code, with short modify-run-fail cycles** - -#. Make it fail reliably. Find a test case that makes the code fail - every time. -#. Divide and Conquer. Once you have a failing test case, isolate the - failing code. - - * Which module. - * Which function. - * Which line of code. - - => isolate a small reproducible failure: a test case - -#. Change one thing at a time and re-run the failing test case. -#. Use the debugger to understand what is going wrong. -#. Take notes and be patient. It may take a while. - -.. note:: - - Once you have gone through this process: isolated a tight piece of - code reproducing the bug and fix the bug using this piece of code, add - the corresponding code to your test suite. - -Using the Python debugger -========================= - -The python debugger, ``pdb``: https://docs.python.org/3/library/pdb.html, -allows you to inspect your code interactively. - -Specifically it allows you to: - - * View the source code. - * Walk up and down the call stack. - * Inspect values of variables. - * Modify values of variables. - * Set breakpoints. - -.. topic:: **print** - - Yes, ``print`` statements do work as a debugging tool. However to - inspect runtime, it is often more efficient to use the debugger. - -Invoking the debugger ------------------------ - -Ways to launch the debugger: - -#. Postmortem, launch debugger after module errors. -#. Launch the module with the debugger. -#. Call the debugger inside the module - - -Postmortem -........... - -**Situation**: You're working in IPython and you get a traceback. - -Here we debug the file :download:`index_error.py`. When running it, an -:class:`IndexError` is raised. Type ``%debug`` and drop into the debugger. - -.. code-block:: ipython - - In [1]: %run index_error.py - --------------------------------------------------------------------------- - IndexError Traceback (most recent call last) - File ~/src/scientific-python-lectures/advanced/debugging/index_error.py:10 - 6 print(lst[len(lst)]) - 9 if __name__ == "__main__": - ---> 10 index_error() - - File ~/src/scientific-python-lectures/advanced/debugging/index_error.py:6, in index_error() - 4 def index_error(): - 5 lst = list("foobar") - ----> 6 print(lst[len(lst)]) - - IndexError: list index out of range - - In [2]: %debug - > /home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py(6)index_error() - 4 def index_error(): - 5 lst = list("foobar") - ----> 6 print(lst[len(lst)]) - 7 - 8 - - ipdb> list - 1 """Small snippet to raise an IndexError.""" - 2 - 3 - 4 def index_error(): - 5 lst = list("foobar") - ----> 6 print(lst[len(lst)]) - 7 - 8 - 9 if __name__ == "__main__": - 10 index_error() - - ipdb> len(lst) - 6 - ipdb> print(lst[len(lst) - 1]) - r - ipdb> quit - -.. topic:: Post-mortem debugging without IPython - - In some situations you cannot use IPython, for instance to debug a - script that wants to be called from the command line. In this case, - you can call the script with ``python -m pdb script.py``:: - - $ python -m pdb index_error.py - > /home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py(1)() - -> """Small snippet to raise an IndexError.""" - (Pdb) continue - Traceback (most recent call last): - File "/usr/lib64/python3.11/pdb.py", line 1793, in main - pdb._run(target) - File "/usr/lib64/python3.11/pdb.py", line 1659, in _run - self.run(target.code) - File "/usr/lib64/python3.11/bdb.py", line 600, in run - exec(cmd, globals, locals) - File "", line 1, in - File "/home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py", line 10, in - index_error() - File "/home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py", line 6, in index_error - print(lst[len(lst)]) - ~~~^^^^^^^^^^ - IndexError: list index out of range - Uncaught exception. Entering post mortem debugging - Running 'cont' or 'step' will restart the program - > /home/jarrod/src/scientific-python-lectures/advanced/debugging/index_error.py(6)index_error() - -> print(lst[len(lst)]) - (Pdb) - -Step-by-step execution -....................... - -**Situation**: You believe a bug exists in a module but are not sure where. - -For instance we are trying to debug :download:`wiener_filtering.py`. -Indeed the code runs, but the filtering does not work well. - -* Run the script in IPython with the debugger using ``%run -d - wiener_filtering.py`` : - - .. code-block:: ipython - - In [1]: %run -d wiener_filtering.py - *** Blank or comment - *** Blank or comment - *** Blank or comment - NOTE: Enter 'c' at the ipdb> prompt to continue execution. - > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(1)() - ----> 1 """Wiener filtering a noisy raccoon face: this module is buggy""" - 2 - 3 import numpy as np - 4 import scipy as sp - 5 import matplotlib.pyplot as plt - -* Set a break point at line 29 using ``b 29``: - - .. code-block:: ipython - - ipdb> n - > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(3)() - 1 """Wiener filtering a noisy raccoon face: this module is buggy""" - 2 - ----> 3 import numpy as np - 4 import scipy as sp - 5 import matplotlib.pyplot as plt - - ipdb> b 29 - Breakpoint 1 at /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py:29 - -* Continue execution to next breakpoint with ``c(ont(inue))``: - - .. code-block:: ipython - - ipdb> c - > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(29)iterated_wiener() - 27 Do not use this: this is crappy code to demo bugs! - 28 """ - 1--> 29 noisy_img = noisy_img - 30 denoised_img = local_mean(noisy_img, size=size) - 31 l_var = local_var(noisy_img, size=size) - -* Step into code with ``n(ext)`` and ``s(tep)``: ``next`` jumps to the next - statement in the current execution context, while ``step`` will go across - execution contexts, i.e. enable exploring inside function calls: - - .. code-block:: ipython - - ipdb> s - > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(30)iterated_wiener() - 28 """ - 1 29 noisy_img = noisy_img - ---> 30 denoised_img = local_mean(noisy_img, size=size) - 31 l_var = local_var(noisy_img, size=size) - 32 for i in range(3): - - ipdb> n - > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(31)iterated_wiener() - 1 29 noisy_img = noisy_img - 30 denoised_img = local_mean(noisy_img, size=size) - ---> 31 l_var = local_var(noisy_img, size=size) - 32 for i in range(3): - 33 res = noisy_img - denoised_img - -* Step a few lines and explore the local variables: - - .. code-block:: ipython - - ipdb> n - > /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py(32)iterated_wiener() - 30 denoised_img = local_mean(noisy_img, size=size) - 31 l_var = local_var(noisy_img, size=size) - ---> 32 for i in range(3): - 33 res = noisy_img - denoised_img - 34 noise = (res**2).sum() / res.size - - ipdb> print(l_var) - [[2571 2782 3474 ... 3008 2922 3141] - [2105 708 475 ... 469 354 2884] - [1697 420 645 ... 273 236 2517] - ... - [2437 345 432 ... 413 387 4188] - [2598 179 247 ... 367 441 3909] - [2808 2525 3117 ... 4413 4454 4385]] - ipdb> print(l_var.min()) - 0 - -Oh dear, nothing but integers, and 0 variation. Here is our bug, we are -doing integer arithmetic. - -.. topic:: Raising exception on numerical errors - - When we run the :download:`wiener_filtering.py` file, the following - warnings are raised: - - .. code-block:: ipython - - In [2]: %run wiener_filtering.py - /home/jarrod/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py:35: RuntimeWarning: divide by zero encountered in divide - noise_level = 1 - noise / l_var - - We can turn these warnings in exception, which enables us to do - post-mortem debugging on them, and find our problem more quickly: - - .. code-block:: ipython - - In [3]: np.seterr(all='raise') - Out[3]: {'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'} - - In [4]: %run wiener_filtering.py - --------------------------------------------------------------------------- - FloatingPointError Traceback (most recent call last) - File ~/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py:52 - 49 plt.matshow(face[cut], cmap=plt.cm.gray) - 50 plt.matshow(noisy_face[cut], cmap=plt.cm.gray) - ---> 52 denoised_face = iterated_wiener(noisy_face) - 53 plt.matshow(denoised_face[cut], cmap=plt.cm.gray) - 55 plt.show() - - File ~/src/scientific-python-lectures/advanced/debugging/wiener_filtering.py:35, in iterated_wiener(noisy_img, size) - 33 res = noisy_img - denoised_img - 34 noise = (res**2).sum() / res.size - ---> 35 noise_level = 1 - noise / l_var - 36 noise_level[noise_level < 0] = 0 - 37 denoised_img = np.int64(noise_level * res) - - FloatingPointError: divide by zero encountered in divide - - -Other ways of starting a debugger -.................................... - -* **Raising an exception as a poor man break point** - - If you find it tedious to note the line number to set a break point, - you can simply raise an exception at the point that you want to - inspect and use IPython's ``%debug``. Note that in this case you cannot - step or continue the execution. - -* **Debugging test failures using nosetests** - - You can run ``nosetests --pdb`` to drop in post-mortem debugging on - exceptions, and ``nosetests --pdb-failure`` to inspect test failures - using the debugger. - - In addition, you can use the IPython interface for the debugger in nose - by installing the nose plugin - `ipdbplugin `_. You can than - pass ``--ipdb`` and ``--ipdb-failure`` options to nosetests. - -* **Calling the debugger explicitly** - - Insert the following line where you want to drop in the debugger:: - - import pdb; pdb.set_trace() - -.. warning:: - - When running ``nosetests``, the output is captured, and thus it seems - that the debugger does not work. Simply run the nosetests with the ``-s`` - flag. - - -.. topic:: Graphical debuggers and alternatives - - * `pudb `_ is a good semi-graphical - debugger with a text user interface in the console. - - * The `Visual Studio Code `_ integrated - development environment includes a debugging mode. - - * The `Mu editor `_ is a simple Python editor that - includes a debugging mode. - - -Debugger commands and interaction ----------------------------------- - -============ ====================================================================== -``l(list)`` Lists the code at the current position -``u(p)`` Walk up the call stack -``d(own)`` Walk down the call stack -``n(ext)`` Execute the next line (does not go down in new functions) -``s(tep)`` Execute the next statement (goes down in new functions) -``bt`` Print the call stack -``a`` Print the local variables -``!command`` Execute the given **Python** command (by opposition to pdb commands -============ ====================================================================== - -.. warning:: **Debugger commands are not Python code** - - You cannot name the variables the way you want. For instance, if in - you cannot override the variables in the current frame with the same - name: **use different names than your local variable when typing code - in the debugger**. - -Getting help when in the debugger -................................. - -Type ``h`` or ``help`` to access the interactive help: - -.. sourcecode:: pycon - - ipdb> help - - Documented commands (type help ): - ======================================== - EOF commands enable ll pp s until - a condition exceptions longlist psource skip_hidden up - alias cont exit n q skip_predicates w - args context h next quit source whatis - b continue help p r step where - break d ignore pdef restart tbreak - bt debug j pdoc return u - c disable jump pfile retval unalias - cl display l pinfo run undisplay - clear down list pinfo2 rv unt - - Miscellaneous help topics: - ========================== - exec pdb - - Undocumented commands: - ====================== - interact - -Debugging segmentation faults using gdb -========================================== - -If you have a segmentation fault, you cannot debug it with pdb, as it -crashes the Python interpreter before it can drop in the debugger. -Similarly, if you have a bug in C code embedded in Python, pdb is -useless. For this we turn to the gnu debugger, -`gdb `_, available on Linux. - -Before we start with gdb, let us add a few Python-specific tools to it. -For this we add a few macros to our ``~/.gdbinit``. The optimal choice of -macro depends on your Python version and your gdb version. I have added a -simplified version in :download:`gdbinit`, but feel free to read -`DebuggingWithGdb `_. - -To debug with gdb the Python script :download:`segfault.py`, we can run the -script in gdb as follows - -.. sourcecode:: console - - $ gdb python - ... - (gdb) run segfault.py - Starting program: /usr/bin/python segfault.py - [Thread debugging using libthread_db enabled] - - Program received signal SIGSEGV, Segmentation fault. - _strided_byte_copy (dst=0x8537478 "\360\343G", outstrides=4, src= - 0x86c0690
, instrides=32, N=3, - elsize=4) - at numpy/core/src/multiarray/ctors.c:365 - 365 _FAST_MOVE(Int32); - (gdb) - -We get a segfault, and gdb captures it for post-mortem debugging in the C -level stack (not the Python call stack). We can debug the C call stack -using gdb's commands: - -.. sourcecode:: console - - (gdb) up - #1 0x004af4f5 in _copy_from_same_shape (dest=, - src=, myfunc=0x496780 <_strided_byte_copy>, - swap=0) - at numpy/core/src/multiarray/ctors.c:748 - 748 myfunc(dit->dataptr, dest->strides[maxaxis], - -As you can see, right now, we are in the C code of numpy. We would like -to know what is the Python code that triggers this segfault, so we go up -the stack until we hit the Python execution loop: - -.. sourcecode:: console - - (gdb) up - #8 0x080ddd23 in call_function (f= - Frame 0x85371ec, for file /home/varoquau/usr/lib/python2.6/site-packages/numpy/core/arrayprint.py, line 156, in _leading_trailing (a=, _nc=), throwflag=0) - at ../Python/ceval.c:3750 - 3750 ../Python/ceval.c: No such file or directory. - in ../Python/ceval.c - - (gdb) up - #9 PyEval_EvalFrameEx (f= - Frame 0x85371ec, for file /home/varoquau/usr/lib/python2.6/site-packages/numpy/core/arrayprint.py, line 156, in _leading_trailing (a=, _nc=), throwflag=0) - at ../Python/ceval.c:2412 - 2412 in ../Python/ceval.c - (gdb) - -Once we are in the Python execution loop, we can use our special Python -helper function. For instance we can find the corresponding Python code: - -.. sourcecode:: console - - (gdb) pyframe - /home/varoquau/usr/lib/python2.6/site-packages/numpy/core/arrayprint.py (158): _leading_trailing - (gdb) - -This is numpy code, we need to go up until we find code that we have -written: - -.. sourcecode:: console - - (gdb) up - ... - (gdb) up - #34 0x080dc97a in PyEval_EvalFrameEx (f= - Frame 0x82f064c, for file segfault.py, line 11, in print_big_array (small_array=, big_array=), throwflag=0) at ../Python/ceval.c:1630 - 1630 ../Python/ceval.c: No such file or directory. - in ../Python/ceval.c - (gdb) pyframe - segfault.py (12): print_big_array - -The corresponding code is: - -.. literalinclude:: segfault.py - :language: py - :lines: 8-14 - -Thus the segfault happens when printing ``big_array[-10:]``. The reason is -simply that ``big_array`` has been allocated with its end outside the -program memory. - -.. note:: - - For a list of Python-specific commands defined in the `gdbinit`, read - the source of this file. - - -____ - -.. topic:: **Wrap up exercise** - :class: green - - The following script is well documented and hopefully legible. It - seeks to answer a problem of actual interest for numerical computing, - but it does not work... Can you debug it? - - **Python source code:** :download:`to_debug.py ` - - .. only:: html - - .. literalinclude:: to_debug.py diff --git a/advanced/image_processing/examples/plot_block_mean.py b/advanced/image_processing/examples/plot_block_mean.py deleted file mode 100644 index 4cc4d6ef3..000000000 --- a/advanced/image_processing/examples/plot_block_mean.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Plot the block mean of an image -================================ - -An example showing how to use broad-casting to plot the mean of -blocks of an image. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -f = sp.datasets.face(gray=True) -sx, sy = f.shape -X, Y = np.ogrid[0:sx, 0:sy] - -regions = sy // 6 * (X // 4) + Y // 6 -block_mean = sp.ndimage.mean(f, labels=regions, index=np.arange(1, regions.max() + 1)) -block_mean.shape = (sx // 4, sy // 6) - -plt.figure(figsize=(5, 5)) -plt.imshow(block_mean, cmap="gray") -plt.axis("off") - -plt.show() diff --git a/advanced/image_processing/examples/plot_blur.py b/advanced/image_processing/examples/plot_blur.py deleted file mode 100644 index cfb6f5759..000000000 --- a/advanced/image_processing/examples/plot_blur.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Blurring of images -=================== - -An example showing various processes that blur an image. -""" - -import scipy as sp -import matplotlib.pyplot as plt - -face = sp.datasets.face(gray=True) -blurred_face = sp.ndimage.gaussian_filter(face, sigma=3) -very_blurred = sp.ndimage.gaussian_filter(face, sigma=5) -local_mean = sp.ndimage.uniform_filter(face, size=11) - -plt.figure(figsize=(9, 3)) -plt.subplot(131) -plt.imshow(blurred_face, cmap="gray") -plt.axis("off") -plt.subplot(132) -plt.imshow(very_blurred, cmap="gray") -plt.axis("off") -plt.subplot(133) -plt.imshow(local_mean, cmap="gray") -plt.axis("off") - -plt.subplots_adjust(wspace=0, hspace=0.0, top=0.99, bottom=0.01, left=0.01, right=0.99) - -plt.show() diff --git a/advanced/image_processing/examples/plot_clean_morpho.py b/advanced/image_processing/examples/plot_clean_morpho.py deleted file mode 100644 index cdcd1dc49..000000000 --- a/advanced/image_processing/examples/plot_clean_morpho.py +++ /dev/null @@ -1,54 +0,0 @@ -""" -Cleaning segmentation with mathematical morphology -=================================================== - -An example showing how to clean segmentation with mathematical -morphology: removing small regions and holes. - -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) -n = 10 -l = 256 -im = np.zeros((l, l)) -points = l * rng.random((2, n**2)) -im[(points[0]).astype(int), (points[1]).astype(int)] = 1 -im = sp.ndimage.gaussian_filter(im, sigma=l / (4.0 * n)) - -mask = (im > im.mean()).astype(float) - - -img = mask + 0.3 * rng.normal(size=mask.shape) - -binary_img = img > 0.5 - -# Remove small white regions -open_img = sp.ndimage.binary_opening(binary_img) -# Remove small black hole -close_img = sp.ndimage.binary_closing(open_img) - -plt.figure(figsize=(12, 3)) - -l = 128 - -plt.subplot(141) -plt.imshow(binary_img[:l, :l], cmap="gray") -plt.axis("off") -plt.subplot(142) -plt.imshow(open_img[:l, :l], cmap="gray") -plt.axis("off") -plt.subplot(143) -plt.imshow(close_img[:l, :l], cmap="gray") -plt.axis("off") -plt.subplot(144) -plt.imshow(mask[:l, :l], cmap="gray") -plt.contour(close_img[:l, :l], [0.5], linewidths=2, colors="r") -plt.axis("off") - -plt.subplots_adjust(wspace=0.02, hspace=0.3, top=1, bottom=0.1, left=0, right=1) - -plt.show() diff --git a/advanced/image_processing/examples/plot_denoising.py b/advanced/image_processing/examples/plot_denoising.py deleted file mode 100644 index c460290a4..000000000 --- a/advanced/image_processing/examples/plot_denoising.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -Denoising an image with the median filter -========================================== - -This example shows the original image, the noisy image, the denoised -one (with the median filter) and the difference between the two. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) - -im = np.zeros((20, 20)) -im[5:-5, 5:-5] = 1 -im = sp.ndimage.distance_transform_bf(im) -im_noise = im + 0.2 * rng.normal(size=im.shape) - -im_med = sp.ndimage.median_filter(im_noise, 3) - -plt.figure(figsize=(16, 5)) - -plt.subplot(141) -plt.imshow(im, interpolation="nearest") -plt.axis("off") -plt.title("Original image", fontsize=20) -plt.subplot(142) -plt.imshow(im_noise, interpolation="nearest", vmin=0, vmax=5) -plt.axis("off") -plt.title("Noisy image", fontsize=20) -plt.subplot(143) -plt.imshow(im_med, interpolation="nearest", vmin=0, vmax=5) -plt.axis("off") -plt.title("Median filter", fontsize=20) -plt.subplot(144) -plt.imshow(np.abs(im - im_med), cmap="hot", interpolation="nearest") -plt.axis("off") -plt.title("Error", fontsize=20) - - -plt.subplots_adjust(wspace=0.02, hspace=0.02, top=0.9, bottom=0, left=0, right=1) - -plt.show() diff --git a/advanced/image_processing/examples/plot_display_face.py b/advanced/image_processing/examples/plot_display_face.py deleted file mode 100644 index 4e4ff948a..000000000 --- a/advanced/image_processing/examples/plot_display_face.py +++ /dev/null @@ -1,28 +0,0 @@ -""" -Display a Raccoon Face -====================== - -An example that displays a raccoon face with matplotlib. -""" - -import scipy as sp -import matplotlib.pyplot as plt - -f = sp.datasets.face(gray=True) - -plt.figure(figsize=(10, 3.6)) - -plt.subplot(131) -plt.imshow(f, cmap="gray") - -plt.subplot(132) -plt.imshow(f, cmap="gray", vmin=30, vmax=200) -plt.axis("off") - -plt.subplot(133) -plt.imshow(f, cmap="gray") -plt.contour(f, [50, 200]) -plt.axis("off") - -plt.subplots_adjust(wspace=0, hspace=0.0, top=0.99, bottom=0.01, left=0.05, right=0.99) -plt.show() diff --git a/advanced/image_processing/examples/plot_face.py b/advanced/image_processing/examples/plot_face.py deleted file mode 100644 index 560da8eeb..000000000 --- a/advanced/image_processing/examples/plot_face.py +++ /dev/null @@ -1,17 +0,0 @@ -""" -Displaying a Raccoon Face -========================= - -Small example to plot a raccoon face. -""" - -import scipy as sp -import imageio.v3 as iio - -f = sp.datasets.face() -iio.imwrite("face.png", f) # uses the Image module (PIL) - -import matplotlib.pyplot as plt - -plt.imshow(f) -plt.show() diff --git a/advanced/image_processing/examples/plot_face_denoise.py b/advanced/image_processing/examples/plot_face_denoise.py deleted file mode 100644 index 29601d2a4..000000000 --- a/advanced/image_processing/examples/plot_face_denoise.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -Image denoising -================ - -This example demoes image denoising on a Raccoon face. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) - -f = sp.datasets.face(gray=True) -f = f[230:290, 220:320] - -noisy = f + 0.4 * f.std() * rng.random(f.shape) - -gauss_denoised = sp.ndimage.gaussian_filter(noisy, 2) -med_denoised = sp.ndimage.median_filter(noisy, 3) - - -plt.figure(figsize=(12, 2.8)) - -plt.subplot(131) -plt.imshow(noisy, cmap="gray", vmin=40, vmax=220) -plt.axis("off") -plt.title("noisy", fontsize=20) -plt.subplot(132) -plt.imshow(gauss_denoised, cmap="gray", vmin=40, vmax=220) -plt.axis("off") -plt.title("Gaussian filter", fontsize=20) -plt.subplot(133) -plt.imshow(med_denoised, cmap="gray", vmin=40, vmax=220) -plt.axis("off") -plt.title("Median filter", fontsize=20) - -plt.subplots_adjust(wspace=0.02, hspace=0.02, top=0.9, bottom=0, left=0, right=1) -plt.show() diff --git a/advanced/image_processing/examples/plot_find_edges.py b/advanced/image_processing/examples/plot_find_edges.py deleted file mode 100644 index 02816698d..000000000 --- a/advanced/image_processing/examples/plot_find_edges.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -Finding edges with Sobel filters -================================== - -The Sobel filter is one of the simplest way of finding edges. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) - -im = np.zeros((256, 256)) -im[64:-64, 64:-64] = 1 - -im = sp.ndimage.rotate(im, 15, mode="constant") -im = sp.ndimage.gaussian_filter(im, 8) - -sx = sp.ndimage.sobel(im, axis=0, mode="constant") -sy = sp.ndimage.sobel(im, axis=1, mode="constant") -sob = np.hypot(sx, sy) - -plt.figure(figsize=(16, 5)) -plt.subplot(141) -plt.imshow(im, cmap="gray") -plt.axis("off") -plt.title("square", fontsize=20) -plt.subplot(142) -plt.imshow(sx) -plt.axis("off") -plt.title("Sobel (x direction)", fontsize=20) -plt.subplot(143) -plt.imshow(sob) -plt.axis("off") -plt.title("Sobel filter", fontsize=20) - -im += 0.07 * rng.random(im.shape) - -sx = sp.ndimage.sobel(im, axis=0, mode="constant") -sy = sp.ndimage.sobel(im, axis=1, mode="constant") -sob = np.hypot(sx, sy) - -plt.subplot(144) -plt.imshow(sob) -plt.axis("off") -plt.title("Sobel for noisy image", fontsize=20) - - -plt.subplots_adjust(wspace=0.02, hspace=0.02, top=1, bottom=0, left=0, right=0.9) - -plt.show() diff --git a/advanced/image_processing/examples/plot_find_object.py b/advanced/image_processing/examples/plot_find_object.py deleted file mode 100644 index 9531bd253..000000000 --- a/advanced/image_processing/examples/plot_find_object.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Find the bounding box of an object -=================================== - -This example shows how to extract the bounding box of the largest object - -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) -n = 10 -l = 256 -im = np.zeros((l, l)) -points = l * rng.random((2, n**2)) -im[(points[0]).astype(int), (points[1]).astype(int)] = 1 -im = sp.ndimage.gaussian_filter(im, sigma=l / (4.0 * n)) - -mask = im > im.mean() - -label_im, nb_labels = sp.ndimage.label(mask) - -# Find the largest connected component -sizes = sp.ndimage.sum(mask, label_im, range(nb_labels + 1)) -mask_size = sizes < 1000 -remove_pixel = mask_size[label_im] -label_im[remove_pixel] = 0 -labels = np.unique(label_im) -label_im = np.searchsorted(labels, label_im) - -# Now that we have only one connected component, extract it's bounding box -slice_x, slice_y = sp.ndimage.find_objects(label_im == 4)[0] -roi = im[slice_x, slice_y] - -plt.figure(figsize=(4, 2)) -plt.axes((0, 0, 1, 1)) -plt.imshow(roi) -plt.axis("off") - -plt.show() diff --git a/advanced/image_processing/examples/plot_geom_face.py b/advanced/image_processing/examples/plot_geom_face.py deleted file mode 100644 index e824c4f99..000000000 --- a/advanced/image_processing/examples/plot_geom_face.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Geometrical transformations -============================== - -This examples demos some simple geometrical transformations on a Raccoon face. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -face = sp.datasets.face(gray=True) -lx, ly = face.shape -# Cropping -crop_face = face[lx // 4 : -lx // 4, ly // 4 : -ly // 4] -# up <-> down flip -flip_ud_face = np.flipud(face) -# rotation -rotate_face = sp.ndimage.rotate(face, 45) -rotate_face_noreshape = sp.ndimage.rotate(face, 45, reshape=False) - -plt.figure(figsize=(12.5, 2.5)) - - -plt.subplot(151) -plt.imshow(face, cmap="gray") -plt.axis("off") -plt.subplot(152) -plt.imshow(crop_face, cmap="gray") -plt.axis("off") -plt.subplot(153) -plt.imshow(flip_ud_face, cmap="gray") -plt.axis("off") -plt.subplot(154) -plt.imshow(rotate_face, cmap="gray") -plt.axis("off") -plt.subplot(155) -plt.imshow(rotate_face_noreshape, cmap="gray") -plt.axis("off") - -plt.subplots_adjust(wspace=0.02, hspace=0.3, top=1, bottom=0.1, left=0, right=1) - -plt.show() diff --git a/advanced/image_processing/examples/plot_granulo.py b/advanced/image_processing/examples/plot_granulo.py deleted file mode 100644 index 215e0344a..000000000 --- a/advanced/image_processing/examples/plot_granulo.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Granulometry -============ - -This example performs a simple granulometry analysis. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - - -def disk_structure(n): - struct = np.zeros((2 * n + 1, 2 * n + 1)) - x, y = np.indices((2 * n + 1, 2 * n + 1)) - mask = (x - n) ** 2 + (y - n) ** 2 <= n**2 - struct[mask] = 1 - return struct.astype(bool) - - -def granulometry(data, sizes=None): - s = max(data.shape) - if sizes is None: - sizes = range(1, s / 2, 2) - granulo = [ - sp.ndimage.binary_opening(data, structure=disk_structure(n)).sum() - for n in sizes - ] - return granulo - - -rng = np.random.default_rng(27446968) -n = 10 -l = 256 -im = np.zeros((l, l)) -points = l * rng.random((2, n**2)) -im[(points[0]).astype(int), (points[1]).astype(int)] = 1 -im = sp.ndimage.gaussian_filter(im, sigma=l / (4.0 * n)) - -mask = im > im.mean() - -granulo = granulometry(mask, sizes=np.arange(2, 19, 4)) - -plt.figure(figsize=(6, 2.2)) - -plt.subplot(121) -plt.imshow(mask, cmap="gray") -opened = sp.ndimage.binary_opening(mask, structure=disk_structure(10)) -opened_more = sp.ndimage.binary_opening(mask, structure=disk_structure(14)) -plt.contour(opened, [0.5], colors="b", linewidths=2) -plt.contour(opened_more, [0.5], colors="r", linewidths=2) -plt.axis("off") -plt.subplot(122) -plt.plot(np.arange(2, 19, 4), granulo, "ok", ms=8) - - -plt.subplots_adjust(wspace=0.02, hspace=0.15, top=0.95, bottom=0.15, left=0, right=0.95) -plt.show() diff --git a/advanced/image_processing/examples/plot_greyscale_dilation.py b/advanced/image_processing/examples/plot_greyscale_dilation.py deleted file mode 100644 index 2ede10a98..000000000 --- a/advanced/image_processing/examples/plot_greyscale_dilation.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -Greyscale dilation -==================== - -This example illustrates greyscale mathematical morphology. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -im = np.zeros((64, 64)) -rng = np.random.default_rng(27446968) -x, y = (63 * rng.random((2, 8))).astype(int) -im[x, y] = np.arange(8) - -bigger_points = sp.ndimage.grey_dilation(im, size=(5, 5), structure=np.ones((5, 5))) - -square = np.zeros((16, 16)) -square[4:-4, 4:-4] = 1 -dist = sp.ndimage.distance_transform_bf(square) -dilate_dist = sp.ndimage.grey_dilation(dist, size=(3, 3), structure=np.ones((3, 3))) - -plt.figure(figsize=(12.5, 3)) -plt.subplot(141) -plt.imshow(im, interpolation="nearest", cmap="nipy_spectral") -plt.axis("off") -plt.subplot(142) -plt.imshow(bigger_points, interpolation="nearest", cmap="nipy_spectral") -plt.axis("off") -plt.subplot(143) -plt.imshow(dist, interpolation="nearest", cmap="nipy_spectral") -plt.axis("off") -plt.subplot(144) -plt.imshow(dilate_dist, interpolation="nearest", cmap="nipy_spectral") -plt.axis("off") - -plt.subplots_adjust(wspace=0, hspace=0.02, top=0.99, bottom=0.01, left=0.01, right=0.99) -plt.show() diff --git a/advanced/image_processing/examples/plot_histo_segmentation.py b/advanced/image_processing/examples/plot_histo_segmentation.py deleted file mode 100644 index 81d225f2d..000000000 --- a/advanced/image_processing/examples/plot_histo_segmentation.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Histogram segmentation -====================== - -This example does simple histogram analysis to perform segmentation. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) -n = 10 -l = 256 -im = np.zeros((l, l)) -points = l * rng.random((2, n**2)) -im[(points[0]).astype(int), (points[1]).astype(int)] = 1 -im = sp.ndimage.gaussian_filter(im, sigma=l / (4.0 * n)) - -mask = (im > im.mean()).astype(float) - -mask += 0.1 * im - -img = mask + 0.2 * rng.normal(size=mask.shape) - -hist, bin_edges = np.histogram(img, bins=60) -bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:]) - -binary_img = img > 0.5 - -plt.figure(figsize=(11, 4)) - -plt.subplot(131) -plt.imshow(img) -plt.axis("off") -plt.subplot(132) -plt.plot(bin_centers, hist, lw=2) -plt.axvline(0.5, color="r", ls="--", lw=2) -plt.text(0.57, 0.8, "histogram", fontsize=20, transform=plt.gca().transAxes) -plt.yticks([]) -plt.subplot(133) -plt.imshow(binary_img, cmap="gray", interpolation="nearest") -plt.axis("off") - -plt.subplots_adjust(wspace=0.02, hspace=0.3, top=1, bottom=0.1, left=0, right=1) -plt.show() diff --git a/advanced/image_processing/examples/plot_interpolation_face.py b/advanced/image_processing/examples/plot_interpolation_face.py deleted file mode 100644 index e89f25a99..000000000 --- a/advanced/image_processing/examples/plot_interpolation_face.py +++ /dev/null @@ -1,24 +0,0 @@ -""" -Image interpolation -===================== - -The example demonstrates image interpolation on a Raccoon face. -""" - -import scipy as sp -import matplotlib.pyplot as plt - -f = sp.datasets.face(gray=True) - -plt.figure(figsize=(8, 4)) - -plt.subplot(1, 2, 1) -plt.imshow(f[320:340, 510:530], cmap="gray") -plt.axis("off") - -plt.subplot(1, 2, 2) -plt.imshow(f[320:340, 510:530], cmap="gray", interpolation="nearest") -plt.axis("off") - -plt.subplots_adjust(wspace=0.02, hspace=0.02, top=1, bottom=0, left=0, right=1) -plt.show() diff --git a/advanced/image_processing/examples/plot_measure_data.py b/advanced/image_processing/examples/plot_measure_data.py deleted file mode 100644 index 91ef02b87..000000000 --- a/advanced/image_processing/examples/plot_measure_data.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Measurements from images -========================== - -This examples shows how to measure quantities from various images. - -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) -n = 10 -l = 256 -im = np.zeros((l, l)) -points = l * rng.random((2, n**2)) -im[(points[0]).astype(int), (points[1]).astype(int)] = 1 -im = sp.ndimage.gaussian_filter(im, sigma=l / (4.0 * n)) - -mask = im > im.mean() - -label_im, nb_labels = sp.ndimage.label(mask) - -sizes = sp.ndimage.sum(mask, label_im, range(nb_labels + 1)) -mask_size = sizes < 1000 -remove_pixel = mask_size[label_im] -label_im[remove_pixel] = 0 -labels = np.unique(label_im) -label_clean = np.searchsorted(labels, label_im) - - -plt.figure(figsize=(6, 3)) - -plt.subplot(121) -plt.imshow(label_im, cmap="nipy_spectral") -plt.axis("off") -plt.subplot(122) -plt.imshow(label_clean, vmax=nb_labels, cmap="nipy_spectral") -plt.axis("off") - -plt.subplots_adjust(wspace=0.01, hspace=0.01, top=1, bottom=0, left=0, right=1) -plt.show() diff --git a/advanced/image_processing/examples/plot_numpy_array.py b/advanced/image_processing/examples/plot_numpy_array.py deleted file mode 100644 index 4a8a32417..000000000 --- a/advanced/image_processing/examples/plot_numpy_array.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Image manipulation and NumPy arrays -==================================== - -This example shows how to do image manipulation using common NumPy arrays -tricks. - -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -face = sp.datasets.face(gray=True) -face[10:13, 20:23] -face[100:120] = 255 - -lx, ly = face.shape -X, Y = np.ogrid[0:lx, 0:ly] -mask = (X - lx / 2) ** 2 + (Y - ly / 2) ** 2 > lx * ly / 4 -face[mask] = 0 -face[range(400), range(400)] = 255 - -plt.figure(figsize=(3, 3)) -plt.axes((0, 0, 1, 1)) -plt.imshow(face, cmap="gray") -plt.axis("off") - -plt.show() diff --git a/advanced/image_processing/examples/plot_propagation.py b/advanced/image_processing/examples/plot_propagation.py deleted file mode 100644 index 9a98c2636..000000000 --- a/advanced/image_processing/examples/plot_propagation.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Opening, erosion, and propagation -================================== - -This example shows simple operations of mathematical morphology. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -square = np.zeros((32, 32)) -square[10:-10, 10:-10] = 1 -rng = np.random.default_rng(27446968) -x, y = (32 * rng.random((2, 20))).astype(int) -square[x, y] = 1 - -open_square = sp.ndimage.binary_opening(square) - -eroded_square = sp.ndimage.binary_erosion(square) -reconstruction = sp.ndimage.binary_propagation(eroded_square, mask=square) - -plt.figure(figsize=(9.5, 3)) -plt.subplot(131) -plt.imshow(square, cmap="gray", interpolation="nearest") -plt.axis("off") -plt.subplot(132) -plt.imshow(open_square, cmap="gray", interpolation="nearest") -plt.axis("off") -plt.subplot(133) -plt.imshow(reconstruction, cmap="gray", interpolation="nearest") -plt.axis("off") - -plt.subplots_adjust(wspace=0, hspace=0.02, top=0.99, bottom=0.01, left=0.01, right=0.99) -plt.show() diff --git a/advanced/image_processing/examples/plot_radial_mean.py b/advanced/image_processing/examples/plot_radial_mean.py deleted file mode 100644 index 6f8373d44..000000000 --- a/advanced/image_processing/examples/plot_radial_mean.py +++ /dev/null @@ -1,27 +0,0 @@ -""" -Radial mean -============ - -This example shows how to do a radial mean with scikit-image. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -f = sp.datasets.face(gray=True) -sx, sy = f.shape -X, Y = np.ogrid[0:sx, 0:sy] - - -r = np.hypot(X - sx / 2, Y - sy / 2) - -rbin = (20 * r / r.max()).astype(int) -radial_mean = sp.ndimage.mean(f, labels=rbin, index=np.arange(1, rbin.max() + 1)) - -plt.figure(figsize=(5, 5)) -plt.axes((0, 0, 1, 1)) -plt.imshow(rbin, cmap="nipy_spectral") -plt.axis("off") - -plt.show() diff --git a/advanced/image_processing/examples/plot_sharpen.py b/advanced/image_processing/examples/plot_sharpen.py deleted file mode 100644 index 8f8e65a5a..000000000 --- a/advanced/image_processing/examples/plot_sharpen.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -Image sharpening -================= - -This example shows how to sharpen an image in noiseless situation by -applying the filter inverse to the blur. -""" - -import scipy as sp -import matplotlib.pyplot as plt - -f = sp.datasets.face(gray=True).astype(float) -blurred_f = sp.ndimage.gaussian_filter(f, 3) - -filter_blurred_f = sp.ndimage.gaussian_filter(blurred_f, 1) - -alpha = 30 -sharpened = blurred_f + alpha * (blurred_f - filter_blurred_f) - -plt.figure(figsize=(12, 4)) - -plt.subplot(131) -plt.imshow(f, cmap="gray") -plt.axis("off") -plt.subplot(132) -plt.imshow(blurred_f, cmap="gray") -plt.axis("off") -plt.subplot(133) -plt.imshow(sharpened, cmap="gray") -plt.axis("off") - -plt.tight_layout() -plt.show() diff --git a/advanced/image_processing/examples/plot_synthetic_data.py b/advanced/image_processing/examples/plot_synthetic_data.py deleted file mode 100644 index 1c5e47ce0..000000000 --- a/advanced/image_processing/examples/plot_synthetic_data.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Synthetic data -=============== - -The example generates and displays simple synthetic data. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) -n = 10 -l = 256 -im = np.zeros((l, l)) -points = l * rng.random((2, n**2)) -im[(points[0]).astype(int), (points[1]).astype(int)] = 1 -im = sp.ndimage.gaussian_filter(im, sigma=l / (4.0 * n)) - -mask = im > im.mean() - -label_im, nb_labels = sp.ndimage.label(mask) - -plt.figure(figsize=(9, 3)) - -plt.subplot(131) -plt.imshow(im) -plt.axis("off") -plt.subplot(132) -plt.imshow(mask, cmap="gray") -plt.axis("off") -plt.subplot(133) -plt.imshow(label_im, cmap="nipy_spectral") -plt.axis("off") - -plt.subplots_adjust(wspace=0.02, hspace=0.02, top=1, bottom=0, left=0, right=1) -plt.show() diff --git a/advanced/image_processing/index.md b/advanced/image_processing/index.md new file mode 100644 index 000000000..a104707b3 --- /dev/null +++ b/advanced/image_processing/index.md @@ -0,0 +1,1026 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(basic-image)= + +# Image manipulation and processing using NumPy and SciPy + +**Authors**: _Emmanuelle Gouillart, Gaël Varoquaux_ + +```{code-cell} +:tags: [hide-input] + +# Our usual imports. +import numpy as np +import matplotlib.pyplot as plt +``` + +This section addresses basic image manipulation and processing using the +core scientific modules NumPy and SciPy. Some of the operations covered +by this tutorial may be useful for other kinds of multidimensional array +processing than image processing. In particular, the submodule +{mod}`scipy.ndimage` provides functions operating on n-dimensional NumPy +arrays. + +:::{admonition} See also + +For more advanced image processing and image-specific routines, see the +tutorial {ref}`scikit-image`, dedicated to the {mod}`skimage` module. + +::: + +:::{admonition} Image = 2-D numerical array +(or 3-D: CT, MRI, 2D + time; 4-D, ...) + +Here, **image == NumPy array** `np.array` + +::: + +**Tools used in this tutorial**: + +- `numpy`: basic array manipulation +- `scipy`: `scipy.ndimage` submodule dedicated to image processing + (n-dimensional images). See the [documentation](https://docs.scipy.org/doc/scipy/tutorial/ndimage.html): + +```{code-cell} +import scipy as sp +``` + +**Common tasks in image processing**: + +- Input/Output, displaying images +- Basic manipulations: cropping, flipping, rotating, ... +- Image filtering: denoising, sharpening +- Image segmentation: labeling pixels corresponding to different objects +- Classification +- Feature extraction +- Registration + +- ... + +## Opening and writing to image files + +Writing an array to an image file: + +```{code-cell} +import scipy as sp +import imageio.v3 as iio + +f = sp.datasets.face() +iio.imwrite("face.png", f) # uses the Image module (PIL) + +plt.imshow(f) +``` + +```{code-cell} +face = iio.imread('face.png') +type(face) +``` + +```{code-cell} +face.shape, face.dtype +``` + +`dtype` is `uint8` for 8-bit images (0-255) + +Opening raw files (camera, 3-D images) + +```{code-cell} +face.tofile('face.raw') # Create raw file +face_from_raw = np.fromfile('face.raw', dtype=np.uint8) +face_from_raw.shape +face_from_raw.shape = (768, 1024, 3) +``` + +Need to know the shape and dtype of the image (how to separate data +bytes). + +For large data, use `np.memmap` for memory mapping: + +```{code-cell} +face_memmap = np.memmap('face.raw', dtype=np.uint8, shape=(768, 1024, 3)) +``` + +(data are read from the file, and not loaded into memory) + +Working on a list of image files + +```{code-cell} +rng = np.random.default_rng(27446968) +for i in range(10): + im = rng.integers(0, 256, 10000, dtype=np.uint8).reshape((100, 100)) + iio.imwrite(f'random_{i:02d}.png', im) +from glob import glob +filelist = sorted(glob('random*.png')) +filelist +``` + +## Displaying images + +Use `matplotlib` and `imshow` to display an image inside a +`matplotlib figure`: + +```{code-cell} +f = sp.datasets.face(gray=True) # retrieve a grayscale image +plt.imshow(f, cmap=plt.cm.gray) +``` + +Increase contrast by setting min and max values: + +```{code-cell} +plt.imshow(f, cmap=plt.cm.gray, vmin=30, vmax=200) +# Remove axes and ticks. +# Semicolon ends line to suppress repr of Matplotlib objects. +plt.axis('off'); +``` + +Draw contour lines: + +```{code-cell} +plt.imshow(f, cmap=plt.cm.gray, vmin=30, vmax=200) +plt.contour(f, [50, 200]) +plt.axis('off'); +``` + +For smooth intensity variations, use `interpolation='bilinear'`. For fine inspection of intensity variations, use +`interpolation='nearest'`: + +```{code-cell} +fix, axes = plt.subplots(1, 2) +axes[0].imshow(f[320:340, 510:530], cmap=plt.cm.gray, interpolation='bilinear') +axes[0].axis('off') +axes[0].set_title('Bilinear interpolation') +axes[1].imshow(f[320:340, 510:530], cmap=plt.cm.gray, interpolation='nearest') +axes[1].set_title('Nearest interpolation') +axes[1].axis('off'); +``` + +:::{admonition} See also + +More interpolation methods are in [Matplotlib's examples](https://matplotlib.org/examples/images_contours_and_fields/interpolation_methods.html). +::: + +## Basic manipulations + +Images are arrays: use the whole `numpy` machinery. + +![](axis_convention.png) + +```{code-cell} +face = sp.datasets.face(gray=True) +face[0, 40] +``` + +```{code-cell} +# Slicing +face[10:13, 20:23] +``` + +```{code-cell} +face[100:120] = 255 +``` + +```{code-cell} +lx, ly = face.shape +X, Y = np.ogrid[0:lx, 0:ly] +mask = (X - lx / 2) ** 2 + (Y - ly / 2) ** 2 > lx * ly / 4 +# Masks +face[mask] = 0 +# Fancy indexing +face[range(400), range(400)] = 255 +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(3, 3)) +plt.axes((0, 0, 1, 1)) +plt.imshow(face, cmap="gray") +plt.axis("off"); +``` + +### Statistical information + +```{code-cell} +face = sp.datasets.face(gray=True) +face.mean() +``` + +```{code-cell} +face.max(), face.min() +``` + +`np.histogram` + +::: {exercise-start} +:label: img-proc-logo-ex +:class: dropdown +::: + +- Open as an array the `scikit-image` logo + (), or an + image that you have on your computer. +- Crop a meaningful part of the image, for example the python circle + in the logo. +- Display the image array using `matplotlib`. Change the + interpolation method and zoom to see the difference. +- Transform your image to greyscale +- Increase the contrast of the image by changing its minimum and + maximum values. **Optional**: use `scipy.stats.scoreatpercentile` + (read the docstring!) to saturate 5% of the darkest pixels and 5% + of the lightest pixels. +- Save the array to two different file formats (png, jpg, tiff) + +![](scikit_image_logo.png) + +::: {exercise-end} +::: + +### Geometrical transformations + +```{code-cell} +face = sp.datasets.face(gray=True) +lx, ly = face.shape +# Cropping +crop_face = face[lx // 4: - lx // 4, ly // 4: - ly // 4] +# up <-> down flip +flip_ud_face = np.flipud(face) +# rotation +rotate_face = sp.ndimage.rotate(face, 45) +rotate_face_noreshape = sp.ndimage.rotate(face, 45, reshape=False) +``` + +```{code-cell} +:tags: [hide-input] + +# Plot the transformed face. +fig, axes = plt.subplots(1, 5, figsize=(12.5, 2.5)) +for i, img_arr in enumerate([face, crop_face, flip_ud_face, + rotate_face, rotate_face_noreshape]): + axes[i].imshow(img_arr, cmap="gray") + axes[i].axis('off') + +plt.subplots_adjust(wspace=0.02, hspace=0.3, top=1, bottom=0.1, left=0, right=1); +``` + +## Image filtering + +**Local filters**: replace the value of pixels by a function of the values of +neighboring pixels. + +Neighbourhood: square (choose size), disk, or more complicated _structuring +element_. + +:::{figure} kernels.png +:align: center +:scale: 90 +::: + +### Blurring/smoothing + +**Gaussian filter** from `scipy.ndimage`: + +```{code-cell} +face = sp.datasets.face(gray=True) +blurred_face = sp.ndimage.gaussian_filter(face, sigma=3) +very_blurred = sp.ndimage.gaussian_filter(face, sigma=5) +``` + +**Uniform filter** + +```{code-cell} +local_mean = sp.ndimage.uniform_filter(face, size=11) +``` + +```{code-cell} +:tags: [hide-input] + +# Plot the figures. +fig, axes = plt.subplots(1, 3, figsize=(9, 3)) +for i, img_arr in enumerate([blurred_face, very_blurred, local_mean]): + axes[i].imshow(blurred_face, cmap="gray") + axes[i].axis("off") + +plt.subplots_adjust(wspace=0, hspace=0.0, top=0.99, bottom=0.01, left=0.01, right=0.99); +``` + +### Sharpening + +Sharpen a blurred image: + +```{code-cell} +face = sp.datasets.face(gray=True).astype(float) +blurred_f = sp.ndimage.gaussian_filter(face, 3) +``` + +Increase the weight of edges by adding an approximation of the +Laplacian: + +```{code-cell} +filter_blurred_f = sp.ndimage.gaussian_filter(blurred_f, 1) +alpha = 30 +sharpened = blurred_f + alpha * (blurred_f - filter_blurred_f) +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 3, figsize=(12, 4)) +for i, img_arr in enumerate([f, blurred_f, sharpened]): + axes[i].imshow(blurred_face, cmap="gray") + axes[i].axis("off") + +plt.tight_layout(); +``` + +### Denoising + +Noisy face: + +```{code-cell} +f = sp.datasets.face(gray=True) +f = f[230:290, 220:320] + +rng = np.random.default_rng() +noisy = f + 0.4 * f.std() * rng.random(f.shape) +``` + +A **Gaussian filter** smoothes the noise out... and the edges as well: + +```{code-cell} +gauss_denoised = sp.ndimage.gaussian_filter(noisy, 2) +``` + +Most local linear isotropic filters blur the image (`scipy.ndimage.uniform_filter`) + +A **median filter** preserves better the edges: + +```{code-cell} +med_denoised = sp.ndimage.median_filter(noisy, 3) +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 3, figsize=(12, 2.8)) +for i, (name, img_arr) in enumerate([ + ['noisy', noisy], + ['Gaussian filter', gauss_denoised], + ['Median filter', med_denoised]]): + axes[i].imshow(img_arr, cmap="gray", vmin=40, vmax=220) + axes[i].axis("off") + axes[i].set_title(name, fontsize=20) + +plt.subplots_adjust(wspace=0.02, hspace=0.02, top=0.9, bottom=0, left=0, right=1); +``` + +Median filter: better result for straight boundaries (**low curvature**): + +```{code-cell} +im = np.zeros((20, 20)) +im[5:-5, 5:-5] = 1 +im = sp.ndimage.distance_transform_bf(im) +rng = np.random.default_rng() +im_noise = im + 0.2 * rng.standard_normal(im.shape) +im_med = sp.ndimage.median_filter(im_noise, 3) +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 4, figsize=(16, 5)) +for i, (name, img_arr) in enumerate([ + ['Original image', im], + ['Noisy image', im_noise], + ['Median filter', im_med]]): + axes[i].imshow(img_arr, vmin=0, vmax=5) + axes[i].axis("off") + axes[i].set_title(name, fontsize=10) +axes[-1].imshow(np.abs(im - im_med), cmap="hot", interpolation="nearest") +axes[-1].axis("off") +axes[-1].set_title('Error', fontsize=10) + +plt.subplots_adjust(wspace=0.02, hspace=0.02, top=0.9, bottom=0, left=0, right=1) +``` + +Other rank filter: `scipy.ndimage.maximum_filter`, +`scipy.ndimage.percentile_filter` + +Other local non-linear filters: Wiener (`scipy.signal.wiener`), etc. + +**Non-local filters** + +::: {exercise-start} +:label: img-proc-denoise-ex +:class: dropdown +::: + +- Create a binary image (of 0s and 1s) with several objects (circles, + ellipses, squares, or random shapes). +- Add some noise (e.g., 20% of noise) +- Try two different denoising methods for denoising the image: + gaussian filtering and median filtering. +- Compare the histograms of the two different denoised images. + Which one is the closest to the histogram of the original (noise-free) + image? + +::: {exercise-end} +::: + +:::{admonition} See also + +More denoising filters are available in {mod}`skimage.denoising`, +see the {ref}`scikit-image` tutorial. +::: + +### Mathematical morphology + +See [wikipedia](https://en.wikipedia.org/wiki/Mathematical_morphology) +for a definition of mathematical morphology. + +Probe an image with a simple shape (a **structuring element**), and +modify this image according to how the shape locally fits or misses the +image. + +**Structuring element**: + +```{code-cell} +el = sp.ndimage.generate_binary_structure(2, 1) +el +``` + +```{code-cell} +el.astype(int) +``` + +![](diamond_kernel.png) + +**Erosion** = minimum filter. Replace the value of a pixel by the minimal value covered by the structuring element.: + +```{code-cell} +a = np.zeros((7,7), dtype=int) +a[1:6, 2:5] = 1 +a +``` + +```{code-cell} +sp.ndimage.binary_erosion(a).astype(a.dtype) +``` + +```{code-cell} +# Erosion removes objects smaller than the structure +sp.ndimage.binary_erosion(a, structure=np.ones((5,5))).astype(a.dtype) +``` + +![](morpho_mat.png) + +**Dilation**: maximum filter: + +```{code-cell} +a = np.zeros((5, 5)) +a[2, 2] = 1 +a +``` + +```{code-cell} +sp.ndimage.binary_dilation(a).astype(a.dtype) +``` + +Also works for grey-valued images: + +```{code-cell} +rng = np.random.default_rng(27446968) +im = np.zeros((64, 64)) +x, y = (63*rng.random((2, 8))).astype(int) +im[x, y] = np.arange(8) +``` + +```{code-cell} +bigger_points = sp.ndimage.grey_dilation(im, size=(5, 5), structure=np.ones((5, 5))) +``` + +```{code-cell} +square = np.zeros((16, 16)) +square[4:-4, 4:-4] = 1 +dist = sp.ndimage.distance_transform_bf(square) +dilate_dist = sp.ndimage.grey_dilation(dist, size=(3, 3), \ + structure=np.ones((3, 3))) +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 4, figsize=(12.5, 3)) +for i, img_arr in enumerate([im, bigger_points, dist, dilate_dist]): + axes[i].imshow(img_arr, interpolation='nearest', cmap='nipy_spectral') + axes[i].axis("off") + +plt.subplots_adjust(wspace=0, hspace=0.02, top=0.99, bottom=0.01, left=0.01, right=0.99) +``` + +#### **Opening**: erosion + dilation: + +```{code-cell} +a = np.zeros((5,5), dtype=int) +a[1:4, 1:4] = 1; a[4, 4] = 1 +a +``` + +```{code-cell} +# Opening removes small objects +sp.ndimage.binary_opening(a, structure=np.ones((3,3))).astype(int) +``` + +```{code-cell} +# Opening can also smooth corners +sp.ndimage.binary_opening(a).astype(int) +``` + +#### **Application**: remove noise: + +```{code-cell} +square = np.zeros((32, 32)) +square[10:-10, 10:-10] = 1 +rng = np.random.default_rng(27446968) +x, y = (32*rng.random((2, 20))).astype(int) +square[x, y] = 1 +``` + +```{code-cell} +open_square = sp.ndimage.binary_opening(square) +``` + +```{code-cell} +eroded_square = sp.ndimage.binary_erosion(square) +reconstruction = sp.ndimage.binary_propagation(eroded_square, mask=square) +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 3, figsize=(9.5, 3)) +for i, img_arr in enumerate([square, open_square, reconstruction]): + axes[i].imshow(img_arr, interpolation='nearest', cmap='gray') + axes[i].axis("off") + +plt.subplots_adjust(wspace=0, hspace=0.02, top=0.99, bottom=0.01, left=0.01, right=0.99) +``` + +#### **Closing**: dilation + erosion + +Many other mathematical morphology operations: hit and miss transform, tophat, +etc. + +## Feature extraction + +### Edge detection + +Synthetic data: + +```{code-cell} +im = np.zeros((256, 256)) +im[64:-64, 64:-64] = 1 +im = sp.ndimage.rotate(im, 15, mode='constant') +im = sp.ndimage.gaussian_filter(im, 8) +``` + +Use a **gradient operator** (**Sobel**) to find high intensity variations: + +```{code-cell} +# Filter x and y. +sx = sp.ndimage.sobel(im, axis=0, mode="constant") +sy = sp.ndimage.sobel(im, axis=1, mode="constant") +# Combine x and y. +sob = np.hypot(sx, sy) +``` + +```{code-cell} +# Make a noisy image. +# Set random seed. +rng = np.random.default_rng(27446968) + +noisy_im = im + 0.07 * rng.random(im.shape) + +# Filter x and y. +n_sx = sp.ndimage.sobel(noisy_im, axis=0, mode="constant") +n_sy = sp.ndimage.sobel(noisy_im, axis=1, mode="constant") +# Combine x and y. +noisy_sob = np.hypot(n_sx, n_sy) +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 4, figsize=(16, 5)) +for i, (name, img_arr) in enumerate([ + ['Square', im], + ['Sobel (x direction)', sx], + ['Sobel filter', sob], + ['Sobel for noisy image', noisy_sob]]): + axes[i].imshow(img_arr, cmap='gray') + axes[i].axis("off") + axes[i].set_title(name, fontsize=10) + +plt.subplots_adjust(wspace=0.02, hspace=0.02, top=1, bottom=0, left=0, right=0.9); +``` + +### Segmentation + +#### **Histogram-based** segmentation (no spatial information) + +```{code-cell} +n = 10 +l = 256 +im = np.zeros((l, l)) +rng = np.random.default_rng(27446968) +points = l*rng.random((2, n**2)) +im[(points[0]).astype(int), (points[1]).astype(int)] = 1 +im = sp.ndimage.gaussian_filter(im, sigma=l/(4.*n)) +``` + +```{code-cell} +mask = (im > im.mean()).astype(float) +mask += 0.1 * im +img = mask + 0.2*rng.standard_normal(mask.shape) +``` + +```{code-cell} +hist, bin_edges = np.histogram(img, bins=60) +bin_centers = 0.5*(bin_edges[:-1] + bin_edges[1:]) +binary_img = img > 0.5 +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 3, figsize=(11, 4)) +axes[0].imshow(im) +axes[0].axis("off") +axes[1].plot(bin_centers, hist, lw=2) +axes[1].axvline(0.5, color="r", ls="--", lw=2) +axes[1].text(0.57, 0.8, "histogram", fontsize=20, transform=axes[1].transAxes) +axes[1].set_yticks([]) +axes[2].imshow(binary_img, cmap="gray", interpolation="nearest") +axes[2].axis("off") + +plt.subplots_adjust(wspace=0.02, hspace=0.3, top=1, bottom=0.1, left=0, right=1) +``` + +Use mathematical morphology to clean up the result: + +```{code-cell} +# Remove small white regions +open_img = sp.ndimage.binary_opening(binary_img) +# Remove small black hole +close_img = sp.ndimage.binary_closing(open_img) +``` + +```{code-cell} +:tags: [hide-input] + +L = 128 + +fig, axes = plt.subplots(1, 4, figsize=(12, 3)) +for i, img_arr in enumerate([binary_img, open_img, close_img, mask]): + axes[i].imshow(img_arr[:L, :L], cmap='gray') + axes[i].axis("off") + +axes[-1].contour(close_img[:L, :L], [0.5], linewidths=2, colors="r") + +plt.subplots_adjust(wspace=0.02, hspace=0.3, top=1, bottom=0.1, left=0, right=1) +``` + +::: {exercise-start} +:label: img-proc-erode-ex +:class: dropdown +::: + +Check that reconstruction operations (erosion + propagation) produce a +better result than opening/closing. Start with: + +```{code-cell} +eroded_img = sp.ndimage.binary_erosion(binary_img) +reconstruct_img = sp.ndimage.binary_propagation(eroded_img, mask=binary_img) +``` + +::: {exercise-end} +::: + +::: {solution-start} img-proc-erode-ex +:class: dropdown +::: + +```{code-cell} +eroded_img = sp.ndimage.binary_erosion(binary_img) +reconstruct_img = sp.ndimage.binary_propagation(eroded_img, mask=binary_img) +tmp = np.logical_not(reconstruct_img) +eroded_tmp = sp.ndimage.binary_erosion(tmp) +reconstruct_final = np.logical_not(sp.ndimage.binary_propagation(eroded_tmp, mask=tmp)) +np.abs(mask - close_img).mean() +``` + +```{code-cell} +np.abs(mask - reconstruct_final).mean() +``` + +::: {solution-end} +::: + +::: {exercise-start} +:label: img-proc-denoise-hist-ex +:class: dropdown +::: + +Check how a first denoising step (e.g. with a median filter) modifies the +histogram, and check that the resulting histogram-based segmentation is more +accurate. + +::: {exercise-end} +::: + +:::{admonition} See also + +More advanced segmentation algorithms are found in the +`scikit-image`: see {ref}`scikit-image`. +::: + ++++ + +### Useful algorithms from other packages + +Other Scientific Packages provide algorithms that can be useful for +image processing. In this example, we use the spectral clustering +function of the `scikit-learn` in order to segment glued objects. + + + +```{code-cell} +from sklearn.feature_extraction import image +from sklearn.cluster import spectral_clustering +``` + +```{code-cell} +l = 100 +x, y = np.indices((l, l)) +``` + +```{code-cell} +center1 = (28, 24) +center2 = (40, 50) +center3 = (67, 58) +center4 = (24, 70) +radius1, radius2, radius3, radius4 = 16, 14, 15, 14 +``` + +```{code-cell} +circle1 = (x - center1[0])**2 + (y - center1[1])**2 < radius1**2 +circle2 = (x - center2[0])**2 + (y - center2[1])**2 < radius2**2 +circle3 = (x - center3[0])**2 + (y - center3[1])**2 < radius3**2 +circle4 = (x - center4[0])**2 + (y - center4[1])**2 < radius4**2 +``` + +```{code-cell} +# 4 circles +img = circle1 + circle2 + circle3 + circle4 +mask = img.astype(bool) +img = img.astype(float) +``` + +```{code-cell} +rng = np.random.default_rng() +img += 1 + 0.2*rng.standard_normal(img.shape) +# Convert the image into a graph with the value of the gradient on +# the edges. +graph = image.img_to_graph(img, mask=mask) +``` + +```{code-cell} +# Take a decreasing function of the gradient: we take it weakly +# dependent from the gradient the segmentation is close to a voronoi +graph.data = np.exp(-graph.data/graph.data.std()) +``` + +```{code-cell} +labels = spectral_clustering(graph, n_clusters=4, eigen_solver='arpack') +label_im = -np.ones(mask.shape) +label_im[mask] = labels +``` + +![](image_spectral_clustering.png) + +## Measuring object properties: `scipy.ndimage.measurements` + +Synthetic data: + +```{code-cell} +n = 10 +l = 256 +im = np.zeros((l, l)) +rng = np.random.default_rng(27446968) +points = l * rng.random((2, n**2)) +im[(points[0]).astype(int), (points[1]).astype(int)] = 1 +im = sp.ndimage.gaussian_filter(im, sigma=l/(4.*n)) +mask = im > im.mean() +``` + +### Analysis of connected components + +Label connected components: `scipy.dimage.label`: + +```{code-cell} +label_im, nb_labels = sp.ndimage.label(mask) +nb_labels # how many regions? +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 3, figsize=(9, 3)) +for i, (img_arr, cmap) in enumerate([ + [im, 'viridis'], + [mask, 'gray'], + [label_im, 'nipy_spectral']]): + axes[i].imshow(img_arr, cmap=cmap) + axes[i].axis("off") + +plt.subplots_adjust(wspace=0.02, hspace=0.02, top=1, bottom=0, left=0, right=1); +``` + +Compute size, mean_value, etc. of each region: + +```{code-cell} +sizes = sp.ndimage.sum(mask, label_im, range(nb_labels + 1)) +mean_vals = sp.ndimage.sum(im, label_im, range(1, nb_labels + 1)) +``` + +Clean up small connect components: + +```{code-cell} +mask_size = sizes < 1000 +remove_pixel = mask_size[label_im] +remove_pixel.shape +``` + +```{code-cell} +label_im[remove_pixel] = 0 +``` + +Now reassign labels with `np.searchsorted`: + +```{code-cell} +labels = np.unique(label_im) +label_im = np.searchsorted(labels, label_im) +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 2, figsize=(6, 3)) +axes[0].imshow(label_im, cmap="nipy_spectral") +axes[0].axis("off") +axes[1].imshow(label_im, vmax=nb_labels, cmap="nipy_spectral") +axes[1].axis("off") + +plt.subplots_adjust(wspace=0.01, hspace=0.01, top=1, bottom=0, left=0, right=1) +``` + +Find region of interest enclosing object: + +```{code-cell} +slice_x, slice_y = sp.ndimage.find_objects(label_im)[3] +roi = im[slice_x, slice_y] +plt.imshow(roi); +``` + +Other spatial measures: `scipy.ndimage.center_of_mass`, +`scipy.ndimage.maximum_position`, etc. + +Can be used outside the limited scope of segmentation applications. + +Example: block mean: + +```{code-cell} +f = sp.datasets.face(gray=True) +sx, sy = f.shape +X, Y = np.ogrid[0:sx, 0:sy] +regions = (sy//6) * (X//4) + (Y//6) # note that we use broadcasting +block_mean = sp.ndimage.mean(f, labels=regions, index=np.arange(1, + regions.max() +1)) +block_mean.shape = (sx // 4, sy // 6) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(5, 5)) +plt.imshow(block_mean, cmap="gray") +plt.axis("off"); +``` + +When regions are regular blocks, it is more efficient to use stride +tricks ({ref}`stride-manipulation-label`). + +Non-regularly-spaced blocks: radial mean: + +```{code-cell} +sx, sy = f.shape +X, Y = np.ogrid[0:sx, 0:sy] +r = np.hypot(X - sx/2, Y - sy/2) +rbin = (20* r/r.max()).astype(int) +radial_mean = sp.ndimage.mean(f, labels=rbin, index=np.arange(1, rbin.max() +1)) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(5, 5)) +plt.axes((0, 0, 1, 1)) +plt.imshow(rbin, cmap="nipy_spectral") +plt.axis("off"); +``` + +### Other measures + +Correlation function, Fourier/wavelet spectrum, etc. + +One example with mathematical morphology: [granulometry](https://en.wikipedia.org/wiki/Granulometry_%28morphology%29) + +```{code-cell} +def disk_structure(n): + struct = np.zeros((2 * n + 1, 2 * n + 1)) + x, y = np.indices((2 * n + 1, 2 * n + 1)) + mask = (x - n)**2 + (y - n)**2 <= n**2 + struct[mask] = 1 + return struct.astype(bool) +``` + +```{code-cell} +def granulometry(data, sizes=None): + s = max(data.shape) + if sizes is None: + sizes = range(1, s/2, 2) + granulo = [sp.ndimage.binary_opening(data, \ + structure=disk_structure(n)).sum() for n in sizes] + return granulo +``` + +```{code-cell} +rng = np.random.default_rng(27446968) +n = 10 +l = 256 +im = np.zeros((l, l)) +points = l*rng.random((2, n**2)) +im[(points[0]).astype(int), (points[1]).astype(int)] = 1 +im = sp.ndimage.gaussian_filter(im, sigma=l/(4.*n)) +``` + +```{code-cell} +mask = im > im.mean() +granulo = granulometry(mask, sizes=np.arange(2, 19, 4)) +``` + +```{code-cell} +:tags: [hide-input] + +# Do the plot. +plt.figure(figsize=(6, 2.2)) +plt.subplot(121) +plt.imshow(mask, cmap="gray") +``` + +```{code-cell} +opened = sp.ndimage.binary_opening(mask, structure=disk_structure(10)) +opened_more = sp.ndimage.binary_opening(mask, structure=disk_structure(14)) +``` + +```{code-cell} +:tags: [hide-input] + +fig, axes = plt.subplots(1, 2, figsize=(6, 2.2)) +axes[0].imshow(mask, cmap="gray") +axes[0].contour(opened, [0.5], colors="b", linewidths=2) +axes[0].contour(opened_more, [0.5], colors="r", linewidths=2) +axes[0].axis("off") +axes[1].plot(np.arange(2, 19, 4), granulo, "ok", ms=8) +``` + +:::{admonition} See also + +More on image-processing: + +- The chapter on {ref}`Scikit-image ` +- Other, more powerful and complete modules: + [OpenCV](https://docs.opencv.org/4.x/d6/d00/tutorial_py_root.html) (Python + bindings), [CellProfiler](https://www.cellprofiler.org), + [ITK](https://itk.org/) with Python bindings + ::: diff --git a/advanced/image_processing/index.rst b/advanced/image_processing/index.rst deleted file mode 100644 index 25afce1bf..000000000 --- a/advanced/image_processing/index.rst +++ /dev/null @@ -1,909 +0,0 @@ -.. for doctests - >>> import numpy as np - >>> import matplotlib.pyplot as plt - - -.. _basic_image: - -======================================================= -Image manipulation and processing using NumPy and SciPy -======================================================= - -**Authors**: *Emmanuelle Gouillart, Gaël Varoquaux* - - -This section addresses basic image manipulation and processing using the -core scientific modules NumPy and SciPy. Some of the operations covered -by this tutorial may be useful for other kinds of multidimensional array -processing than image processing. In particular, the submodule -:mod:`scipy.ndimage` provides functions operating on n-dimensional NumPy -arrays. - -.. seealso:: - - For more advanced image processing and image-specific routines, see the - tutorial :ref:`scikit_image`, dedicated to the :mod:`skimage` module. - -.. topic:: - Image = 2-D numerical array - - (or 3-D: CT, MRI, 2D + time; 4-D, ...) - - Here, **image == NumPy array** ``np.array`` - -**Tools used in this tutorial**: - -* ``numpy``: basic array manipulation - -* ``scipy``: ``scipy.ndimage`` submodule dedicated to image processing - (n-dimensional images). See the `documentation - `_:: - - >>> import scipy as sp - - -**Common tasks in image processing**: - -* Input/Output, displaying images - -* Basic manipulations: cropping, flipping, rotating, ... - -* Image filtering: denoising, sharpening - -* Image segmentation: labeling pixels corresponding to different objects - -* Classification - -* Feature extraction - -* Registration - -* ... - - -.. contents:: Chapters contents - :local: - :depth: 4 - - - -Opening and writing to image files -================================== - -Writing an array to a file: - -.. literalinclude:: examples/plot_face.py - :lines: 8- - -.. image:: examples/face.png - :align: center - :scale: 50 - -Creating a NumPy array from an image file:: - - >>> import imageio.v3 as iio - >>> face = sp.datasets.face() - >>> iio.imwrite('face.png', face) # First we need to create the PNG file - - >>> face = iio.imread('face.png') - >>> type(face) - - >>> face.shape, face.dtype - ((768, 1024, 3), dtype('uint8')) - -dtype is uint8 for 8-bit images (0-255) - -Opening raw files (camera, 3-D images) :: - - >>> face.tofile('face.raw') # Create raw file - >>> face_from_raw = np.fromfile('face.raw', dtype=np.uint8) - >>> face_from_raw.shape - (2359296,) - >>> face_from_raw.shape = (768, 1024, 3) - -Need to know the shape and dtype of the image (how to separate data -bytes). - -For large data, use ``np.memmap`` for memory mapping:: - - >>> face_memmap = np.memmap('face.raw', dtype=np.uint8, shape=(768, 1024, 3)) - -(data are read from the file, and not loaded into memory) - -Working on a list of image files :: - - >>> rng = np.random.default_rng(27446968) - >>> for i in range(10): - ... im = rng.integers(0, 256, 10000, dtype=np.uint8).reshape((100, 100)) - ... iio.imwrite(f'random_{i:02d}.png', im) - >>> from glob import glob - >>> filelist = glob('random*.png') - >>> filelist.sort() - -Displaying images -================= - -Use ``matplotlib`` and ``imshow`` to display an image inside a -``matplotlib figure``:: - - >>> f = sp.datasets.face(gray=True) # retrieve a grayscale image - >>> import matplotlib.pyplot as plt - >>> plt.imshow(f, cmap=plt.cm.gray) - - -Increase contrast by setting min and max values:: - - >>> plt.imshow(f, cmap=plt.cm.gray, vmin=30, vmax=200) - - >>> # Remove axes and ticks - >>> plt.axis('off') - (np.float64(-0.5), np.float64(1023.5), np.float64(767.5), np.float64(-0.5)) - -Draw contour lines:: - - >>> plt.contour(f, [50, 200]) - - - -.. figure:: auto_examples/images/sphx_glr_plot_display_face_001.png - :scale: 80 - :target: auto_examples/plot_display_face.html - -.. only:: html - - [:ref:`Python source code `] - -For smooth intensity variations, use ``interpolation='bilinear'``. For fine inspection of intensity variations, use -``interpolation='nearest'``:: - - >>> plt.imshow(f[320:340, 510:530], cmap=plt.cm.gray, interpolation='bilinear') - - >>> plt.imshow(f[320:340, 510:530], cmap=plt.cm.gray, interpolation='nearest') - - -.. figure:: auto_examples/images/sphx_glr_plot_interpolation_face_001.png - :scale: 80 - :target: auto_examples/plot_interpolation_face.html - -.. only:: html - - [:ref:`Python source code `] - - -.. seealso:: - - More interpolation methods are in `Matplotlib's examples `_. - - - - -Basic manipulations -=================== - -Images are arrays: use the whole ``numpy`` machinery. - -.. image:: axis_convention.png - :align: center - :scale: 65 - -:: - - >>> face = sp.datasets.face(gray=True) - >>> face[0, 40] - np.uint8(127) - >>> # Slicing - >>> face[10:13, 20:23] - array([[141, 153, 145], - [133, 134, 125], - [ 96, 92, 94]], dtype=uint8) - >>> face[100:120] = 255 - >>> - >>> lx, ly = face.shape - >>> X, Y = np.ogrid[0:lx, 0:ly] - >>> mask = (X - lx / 2) ** 2 + (Y - ly / 2) ** 2 > lx * ly / 4 - >>> # Masks - >>> face[mask] = 0 - >>> # Fancy indexing - >>> face[range(400), range(400)] = 255 - -.. figure:: auto_examples/images/sphx_glr_plot_numpy_array_001.png - :scale: 100 - :target: auto_examples/plot_numpy_array.html - -.. only:: html - - [:ref:`Python source code `] - - -Statistical information ------------------------ - -:: - - >>> face = sp.datasets.face(gray=True) - >>> face.mean() - np.float64(113.48026784261067) - >>> face.max(), face.min() - (np.uint8(250), np.uint8(0)) - - -``np.histogram`` - -.. topic:: **Exercise** - :class: green - - - * Open as an array the ``scikit-image`` logo - (https://scikit-image.org/_static/img/logo.png), or an - image that you have on your computer. - - * Crop a meaningful part of the image, for example the python circle - in the logo. - - * Display the image array using ``matplotlib``. Change the - interpolation method and zoom to see the difference. - - * Transform your image to greyscale - - * Increase the contrast of the image by changing its minimum and - maximum values. **Optional**: use ``scipy.stats.scoreatpercentile`` - (read the docstring!) to saturate 5% of the darkest pixels and 5% - of the lightest pixels. - - * Save the array to two different file formats (png, jpg, tiff) - - .. image:: scikit_image_logo.png - :align: center - - -Geometrical transformations ---------------------------- -:: - - >>> face = sp.datasets.face(gray=True) - >>> lx, ly = face.shape - >>> # Cropping - >>> crop_face = face[lx // 4: - lx // 4, ly // 4: - ly // 4] - >>> # up <-> down flip - >>> flip_ud_face = np.flipud(face) - >>> # rotation - >>> rotate_face = sp.ndimage.rotate(face, 45) - >>> rotate_face_noreshape = sp.ndimage.rotate(face, 45, reshape=False) - -.. figure:: auto_examples/images/sphx_glr_plot_geom_face_001.png - :scale: 65 - :target: auto_examples/plot_geom_face.html - -.. only:: html - - [:ref:`Python source code `] - -Image filtering -=============== - -**Local filters**: replace the value of pixels by a function of the values of -neighboring pixels. - -Neighbourhood: square (choose size), disk, or more complicated *structuring -element*. - -.. figure:: kernels.png - :align: center - :scale: 90 - -Blurring/smoothing ------------------- - -**Gaussian filter** from ``scipy.ndimage``:: - - >>> face = sp.datasets.face(gray=True) - >>> blurred_face = sp.ndimage.gaussian_filter(face, sigma=3) - >>> very_blurred = sp.ndimage.gaussian_filter(face, sigma=5) - -**Uniform filter** :: - - >>> local_mean = sp.ndimage.uniform_filter(face, size=11) - -.. figure:: auto_examples/images/sphx_glr_plot_blur_001.png - :scale: 90 - :target: auto_examples/plot_blur.html - -.. only:: html - - [:ref:`Python source code `] - -Sharpening ----------- - -Sharpen a blurred image:: - - >>> face = sp.datasets.face(gray=True).astype(float) - >>> blurred_f = sp.ndimage.gaussian_filter(face, 3) - -increase the weight of edges by adding an approximation of the -Laplacian:: - - >>> filter_blurred_f = sp.ndimage.gaussian_filter(blurred_f, 1) - >>> alpha = 30 - >>> sharpened = blurred_f + alpha * (blurred_f - filter_blurred_f) - -.. figure:: auto_examples/images/sphx_glr_plot_sharpen_001.png - :scale: 65 - :target: auto_examples/plot_sharpen.html - -.. only:: html - - [:ref:`Python source code `] - - -Denoising ---------- - -Noisy face:: - - >>> f = sp.datasets.face(gray=True) - >>> f = f[230:290, 220:320] - >>> rng = np.random.default_rng() - >>> noisy = f + 0.4 * f.std() * rng.random(f.shape) - -A **Gaussian filter** smoothes the noise out... and the edges as well:: - - >>> gauss_denoised = sp.ndimage.gaussian_filter(noisy, 2) - -Most local linear isotropic filters blur the image (``scipy.ndimage.uniform_filter``) - -A **median filter** preserves better the edges:: - - >>> med_denoised = sp.ndimage.median_filter(noisy, 3) - -.. figure:: auto_examples/images/sphx_glr_plot_face_denoise_001.png - :scale: 60 - :target: auto_examples/plot_face_denoise.html - -.. only:: html - - [:ref:`Python source code `] - - -Median filter: better result for straight boundaries (**low curvature**):: - - >>> im = np.zeros((20, 20)) - >>> im[5:-5, 5:-5] = 1 - >>> im = sp.ndimage.distance_transform_bf(im) - >>> rng = np.random.default_rng() - >>> im_noise = im + 0.2 * rng.standard_normal(im.shape) - >>> im_med = sp.ndimage.median_filter(im_noise, 3) - -.. figure:: auto_examples/images/sphx_glr_plot_denoising_001.png - :scale: 50 - :target: auto_examples/plot_denoising.html - -.. only:: html - - [:ref:`Python source code `] - - -Other rank filter: ``scipy.ndimage.maximum_filter``, -``scipy.ndimage.percentile_filter`` - -Other local non-linear filters: Wiener (``scipy.signal.wiener``), etc. - -**Non-local filters** - -.. topic:: **Exercise: denoising** - :class: green - - * Create a binary image (of 0s and 1s) with several objects (circles, - ellipses, squares, or random shapes). - - * Add some noise (e.g., 20% of noise) - - * Try two different denoising methods for denoising the image: - gaussian filtering and median filtering. - - * Compare the histograms of the two different denoised images. - Which one is the closest to the histogram of the original (noise-free) - image? - -.. seealso:: - - More denoising filters are available in :mod:`skimage.denoising`, - see the :ref:`scikit_image` tutorial. - - - -Mathematical morphology ------------------------ - -See `wikipedia `_ -for a definition of mathematical morphology. - -Probe an image with a simple shape (a **structuring element**), and -modify this image according to how the shape locally fits or misses the -image. - -**Structuring element**:: - - >>> el = sp.ndimage.generate_binary_structure(2, 1) - >>> el - array([[False, True, False], - [ True, True, True], - [False, True, False]]) - >>> el.astype(int) - array([[0, 1, 0], - [1, 1, 1], - [0, 1, 0]]) - -.. figure:: diamond_kernel.png - :align: center - -**Erosion** = minimum filter. Replace the value of a pixel by the minimal value covered by the structuring element.:: - - >>> a = np.zeros((7,7), dtype=int) - >>> a[1:6, 2:5] = 1 - >>> a - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 0, 0, 0, 0, 0]]) - >>> sp.ndimage.binary_erosion(a).astype(a.dtype) - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]]) - >>> # Erosion removes objects smaller than the structure - >>> sp.ndimage.binary_erosion(a, structure=np.ones((5,5))).astype(a.dtype) - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]]) - - -.. image:: morpho_mat.png - :align: center - - -**Dilation**: maximum filter:: - - >>> a = np.zeros((5, 5)) - >>> a[2, 2] = 1 - >>> a - array([[0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0.], - [0., 0., 1., 0., 0.], - [0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0.]]) - >>> sp.ndimage.binary_dilation(a).astype(a.dtype) - array([[0., 0., 0., 0., 0.], - [0., 0., 1., 0., 0.], - [0., 1., 1., 1., 0.], - [0., 0., 1., 0., 0.], - [0., 0., 0., 0., 0.]]) - - -Also works for grey-valued images:: - - >>> rng = np.random.default_rng(27446968) - >>> im = np.zeros((64, 64)) - >>> x, y = (63*rng.random((2, 8))).astype(int) - >>> im[x, y] = np.arange(8) - - >>> bigger_points = sp.ndimage.grey_dilation(im, size=(5, 5), structure=np.ones((5, 5))) - - >>> square = np.zeros((16, 16)) - >>> square[4:-4, 4:-4] = 1 - >>> dist = sp.ndimage.distance_transform_bf(square) - >>> dilate_dist = sp.ndimage.grey_dilation(dist, size=(3, 3), \ - ... structure=np.ones((3, 3))) - - -.. figure:: auto_examples/images/sphx_glr_plot_greyscale_dilation_001.png - :scale: 40 - :target: auto_examples/plot_greyscale_dilation.html - -.. only:: html - - [:ref:`Python source code `] - -**Opening**: erosion + dilation:: - - >>> a = np.zeros((5,5), dtype=int) - >>> a[1:4, 1:4] = 1; a[4, 4] = 1 - >>> a - array([[0, 0, 0, 0, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 0, 0, 0, 1]]) - >>> # Opening removes small objects - >>> sp.ndimage.binary_opening(a, structure=np.ones((3,3))).astype(int) - array([[0, 0, 0, 0, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 0, 0, 0, 0]]) - >>> # Opening can also smooth corners - >>> sp.ndimage.binary_opening(a).astype(int) - array([[0, 0, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 1, 1, 1, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 0, 0]]) - -**Application**: remove noise:: - - >>> square = np.zeros((32, 32)) - >>> square[10:-10, 10:-10] = 1 - >>> rng = np.random.default_rng(27446968) - >>> x, y = (32*rng.random((2, 20))).astype(int) - >>> square[x, y] = 1 - - >>> open_square = sp.ndimage.binary_opening(square) - - >>> eroded_square = sp.ndimage.binary_erosion(square) - >>> reconstruction = sp.ndimage.binary_propagation(eroded_square, mask=square) - -.. figure:: auto_examples/images/sphx_glr_plot_propagation_001.png - :scale: 40 - :target: auto_examples/plot_propagation.html - -.. only:: html - - [:ref:`Python source code `] - -**Closing**: dilation + erosion - -Many other mathematical morphology operations: hit and miss transform, tophat, -etc. - -Feature extraction -================== - -Edge detection --------------- - -Synthetic data:: - - >>> im = np.zeros((256, 256)) - >>> im[64:-64, 64:-64] = 1 - >>> - >>> im = sp.ndimage.rotate(im, 15, mode='constant') - >>> im = sp.ndimage.gaussian_filter(im, 8) - -Use a **gradient operator** (**Sobel**) to find high intensity variations:: - - >>> sx = sp.ndimage.sobel(im, axis=0, mode='constant') - >>> sy = sp.ndimage.sobel(im, axis=1, mode='constant') - >>> sob = np.hypot(sx, sy) - -.. figure:: auto_examples/images/sphx_glr_plot_find_edges_001.png - :scale: 40 - :target: auto_examples/plot_find_edges.html - -.. only:: html - - [:ref:`Python source code `] - - -Segmentation ------------- - -* **Histogram-based** segmentation (no spatial information) - -:: - - >>> n = 10 - >>> l = 256 - >>> im = np.zeros((l, l)) - >>> rng = np.random.default_rng(27446968) - >>> points = l*rng.random((2, n**2)) - >>> im[(points[0]).astype(int), (points[1]).astype(int)] = 1 - >>> im = sp.ndimage.gaussian_filter(im, sigma=l/(4.*n)) - - >>> mask = (im > im.mean()).astype(float) - >>> mask += 0.1 * im - >>> img = mask + 0.2*rng.standard_normal(mask.shape) - - >>> hist, bin_edges = np.histogram(img, bins=60) - >>> bin_centers = 0.5*(bin_edges[:-1] + bin_edges[1:]) - - >>> binary_img = img > 0.5 - -.. figure:: auto_examples/images/sphx_glr_plot_histo_segmentation_001.png - :scale: 65 - :target: auto_examples/plot_histo_segmentation.html - -.. only:: html - - [:ref:`Python source code `] - -Use mathematical morphology to clean up the result:: - - >>> # Remove small white regions - >>> open_img = sp.ndimage.binary_opening(binary_img) - >>> # Remove small black hole - >>> close_img = sp.ndimage.binary_closing(open_img) - -.. figure:: auto_examples/images/sphx_glr_plot_clean_morpho_001.png - :scale: 65 - :target: auto_examples/plot_clean_morpho.html - -.. only:: html - - [:ref:`Python source code `] - -.. topic:: **Exercise** - :class: green - - Check that reconstruction operations (erosion + propagation) produce a - better result than opening/closing:: - - >>> eroded_img = sp.ndimage.binary_erosion(binary_img) - >>> reconstruct_img = sp.ndimage.binary_propagation(eroded_img, mask=binary_img) - >>> tmp = np.logical_not(reconstruct_img) - >>> eroded_tmp = sp.ndimage.binary_erosion(tmp) - >>> reconstruct_final = np.logical_not(sp.ndimage.binary_propagation(eroded_tmp, mask=tmp)) - >>> np.abs(mask - close_img).mean() - np.float64(0.00640699...) - >>> np.abs(mask - reconstruct_final).mean() - np.float64(0.00082232...) - -.. topic:: **Exercise** - :class: green - - Check how a first denoising step (e.g. with a median filter) - modifies the histogram, and check that the resulting histogram-based - segmentation is more accurate. - - -.. seealso:: - - More advanced segmentation algorithms are found in the - ``scikit-image``: see :ref:`scikit_image`. - -.. seealso:: - - Other Scientific Packages provide algorithms that can be useful for - image processing. In this example, we use the spectral clustering - function of the ``scikit-learn`` in order to segment glued objects. - - - :: - - >>> from sklearn.feature_extraction import image - >>> from sklearn.cluster import spectral_clustering - - >>> l = 100 - >>> x, y = np.indices((l, l)) - - >>> center1 = (28, 24) - >>> center2 = (40, 50) - >>> center3 = (67, 58) - >>> center4 = (24, 70) - >>> radius1, radius2, radius3, radius4 = 16, 14, 15, 14 - - >>> circle1 = (x - center1[0])**2 + (y - center1[1])**2 < radius1**2 - >>> circle2 = (x - center2[0])**2 + (y - center2[1])**2 < radius2**2 - >>> circle3 = (x - center3[0])**2 + (y - center3[1])**2 < radius3**2 - >>> circle4 = (x - center4[0])**2 + (y - center4[1])**2 < radius4**2 - - >>> # 4 circles - >>> img = circle1 + circle2 + circle3 + circle4 - >>> mask = img.astype(bool) - >>> img = img.astype(float) - - >>> rng = np.random.default_rng() - >>> img += 1 + 0.2*rng.standard_normal(img.shape) - >>> # Convert the image into a graph with the value of the gradient on - >>> # the edges. - >>> graph = image.img_to_graph(img, mask=mask) - - >>> # Take a decreasing function of the gradient: we take it weakly - >>> # dependent from the gradient the segmentation is close to a voronoi - >>> graph.data = np.exp(-graph.data/graph.data.std()) - - >>> labels = spectral_clustering(graph, n_clusters=4, eigen_solver='arpack') - >>> label_im = -np.ones(mask.shape) - >>> label_im[mask] = labels - - - .. image:: image_spectral_clustering.png - :align: center - - - -Measuring objects properties: ``scipy.ndimage.measurements`` -============================================================ - -Synthetic data:: - - >>> n = 10 - >>> l = 256 - >>> im = np.zeros((l, l)) - >>> rng = np.random.default_rng(27446968) - >>> points = l * rng.random((2, n**2)) - >>> im[(points[0]).astype(int), (points[1]).astype(int)] = 1 - >>> im = sp.ndimage.gaussian_filter(im, sigma=l/(4.*n)) - >>> mask = im > im.mean() - -* **Analysis of connected components** - -Label connected components: ``scipy.dimage.label``:: - - >>> label_im, nb_labels = sp.ndimage.label(mask) - >>> nb_labels # how many regions? - 28 - >>> plt.imshow(label_im) - - -.. figure:: auto_examples/images/sphx_glr_plot_synthetic_data_001.png - :scale: 90 - :target: auto_examples/plot_synthetic_data.html - -.. only:: html - - [:ref:`Python source code `] - -Compute size, mean_value, etc. of each region:: - - >>> sizes = sp.ndimage.sum(mask, label_im, range(nb_labels + 1)) - >>> mean_vals = sp.ndimage.sum(im, label_im, range(1, nb_labels + 1)) - -Clean up small connect components:: - - >>> mask_size = sizes < 1000 - >>> remove_pixel = mask_size[label_im] - >>> remove_pixel.shape - (256, 256) - >>> label_im[remove_pixel] = 0 - >>> plt.imshow(label_im) - - -Now reassign labels with ``np.searchsorted``:: - - >>> labels = np.unique(label_im) - >>> label_im = np.searchsorted(labels, label_im) - -.. figure:: auto_examples/images/sphx_glr_plot_measure_data_001.png - :scale: 90 - :target: auto_examples/plot_measure_data.html - -.. only:: html - - [:ref:`Python source code `] - -Find region of interest enclosing object:: - - >>> slice_x, slice_y = sp.ndimage.find_objects(label_im)[3] - >>> roi = im[slice_x, slice_y] - >>> plt.imshow(roi) - - -.. figure:: auto_examples/images/sphx_glr_plot_find_object_001.png - :scale: 130 - :target: auto_examples/plot_find_object.html - -.. only:: html - - [:ref:`Python source code `] - -Other spatial measures: ``scipy.ndimage.center_of_mass``, -``scipy.ndimage.maximum_position``, etc. - -Can be used outside the limited scope of segmentation applications. - -Example: block mean:: - - >>> f = sp.datasets.face(gray=True) - >>> sx, sy = f.shape - >>> X, Y = np.ogrid[0:sx, 0:sy] - >>> regions = (sy//6) * (X//4) + (Y//6) # note that we use broadcasting - >>> block_mean = sp.ndimage.mean(f, labels=regions, index=np.arange(1, - ... regions.max() +1)) - >>> block_mean.shape = (sx // 4, sy // 6) - -.. figure:: auto_examples/images/sphx_glr_plot_block_mean_001.png - :scale: 70 - :target: auto_examples/plot_block_mean.html - -.. only:: html - - [:ref:`Python source code `] - -When regions are regular blocks, it is more efficient to use stride -tricks (:ref:`stride-manipulation-label`). - -Non-regularly-spaced blocks: radial mean:: - - >>> sx, sy = f.shape - >>> X, Y = np.ogrid[0:sx, 0:sy] - >>> r = np.hypot(X - sx/2, Y - sy/2) - >>> rbin = (20* r/r.max()).astype(int) - >>> radial_mean = sp.ndimage.mean(f, labels=rbin, index=np.arange(1, rbin.max() +1)) - -.. figure:: auto_examples/images/sphx_glr_plot_radial_mean_001.png - :scale: 70 - :target: auto_examples/plot_radial_mean.html - -.. only:: html - - [:ref:`Python source code `] - - -* **Other measures** - -Correlation function, Fourier/wavelet spectrum, etc. - -One example with mathematical morphology: `granulometry -`_ - -:: - - >>> def disk_structure(n): - ... struct = np.zeros((2 * n + 1, 2 * n + 1)) - ... x, y = np.indices((2 * n + 1, 2 * n + 1)) - ... mask = (x - n)**2 + (y - n)**2 <= n**2 - ... struct[mask] = 1 - ... return struct.astype(bool) - ... - >>> - >>> def granulometry(data, sizes=None): - ... s = max(data.shape) - ... if sizes is None: - ... sizes = range(1, s/2, 2) - ... granulo = [sp.ndimage.binary_opening(data, \ - ... structure=disk_structure(n)).sum() for n in sizes] - ... return granulo - ... - >>> - >>> rng = np.random.default_rng(27446968) - >>> n = 10 - >>> l = 256 - >>> im = np.zeros((l, l)) - >>> points = l*rng.random((2, n**2)) - >>> im[(points[0]).astype(int), (points[1]).astype(int)] = 1 - >>> im = sp.ndimage.gaussian_filter(im, sigma=l/(4.*n)) - >>> - >>> mask = im > im.mean() - >>> - >>> granulo = granulometry(mask, sizes=np.arange(2, 19, 4)) - - -.. figure:: auto_examples/images/sphx_glr_plot_granulo_001.png - :scale: 100 - :target: auto_examples/plot_granulo.html - -.. only:: html - - [:ref:`Python source code `] - - -Full code examples -================== - -.. include the gallery. Skip the first line to avoid the "orphan" - declaration - -.. include:: auto_examples/index.rst - :start-line: 1 - -| - - -.. seealso:: More on image-processing: - - * The chapter on :ref:`Scikit-image ` - - * Other, more powerful and complete modules: `OpenCV - `_ - (Python bindings), `CellProfiler `_, - `ITK `_ with Python bindings diff --git a/advanced/index.md b/advanced/index.md new file mode 100644 index 000000000..5bb45fd55 --- /dev/null +++ b/advanced/index.md @@ -0,0 +1,13 @@ +--- +orphan: true +--- + +(advanced-topics-part)= + +# Introduction to advanced topics + +This part of the _Scientific Python Lectures_ is dedicated to advanced usage. +It strives to educate the proficient Python coder to be an expert and +tackles various specific topics. + +See the "Advanced topics" section in the table of contents. diff --git a/advanced/index.rst b/advanced/index.rst deleted file mode 100644 index fd8b6026a..000000000 --- a/advanced/index.rst +++ /dev/null @@ -1,26 +0,0 @@ -.. _advanced_topics_part: - -Advanced topics -================ - -This part of the *Scientific Python Lectures* is dedicated to advanced usage. -It strives to educate the proficient Python coder to be an expert and -tackles various specific topics. - -| - -.. include:: ../includes/big_toc_css.rst - :start-line: 1 - -.. rst-class:: tune - - .. toctree:: - - advanced_python/index.rst - advanced_numpy/index.rst - debugging/index.rst - optimizing/index.rst - scipy_sparse/index.rst - image_processing/index.rst - mathematical_optimization/index.rst - interfacing_with_c/interfacing_with_c.rst diff --git a/advanced/interfacing_with_c/interfacing_with_c.md b/advanced/interfacing_with_c/interfacing_with_c.md new file mode 100644 index 000000000..c65efb4bd --- /dev/null +++ b/advanced/interfacing_with_c/interfacing_with_c.md @@ -0,0 +1,907 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Interfacing with C + +**Author**: _Valentin Haenel_ + + + +This chapter contains an _introduction_ to the many different routes for +making your native code (primarily `C/C++`) available from Python, a +process commonly referred to _wrapping_. The goal of this chapter is to +give you a flavour of what technologies exist and what their respective +merits and shortcomings are, so that you can select the appropriate one +for your specific needs. In any case, once you do start wrapping, you +almost certainly will want to consult the respective documentation for +your selected technique. + +## Introduction + +This chapter covers the following techniques: + +- [Python-C-Api](https://docs.python.org/3/c-api/) +- [Ctypes](https://docs.python.org/3/library/ctypes.html) +- [SWIG (Simplified Wrapper and Interface Generator)](https://www.swig.org/) +- [Cython](https://cython.org/) + +These four techniques are perhaps the most well known ones, of which Cython is +probably the most advanced one and the one you should consider using first. The +others are also important, if you want to understand the wrapping problem from +different angles. Having said that, there are other alternatives out there, +but having understood the basics of the ones above, you will be in a position +to evaluate the technique of your choice to see if it fits your needs. + +The following criteria may be useful when evaluating a technology: + +- Are additional libraries required? +- Is the code autogenerated? +- Does it need to be compiled? +- Is there good support for interacting with NumPy arrays? +- Does it support C++? + +Before you set out, you should consider your use case. When interfacing with +native code, there are usually two use-cases that come up: + +- Existing code in C/C++ that needs to be leveraged, either because it already + exists, or because it is faster. +- Python code too slow, push inner loops to native code + +Each technology is demonstrated by wrapping the `cos` function from +`math.h`. While this is a mostly a trivial example, it should serve us well +to demonstrate the basics of the wrapping solution. Since each technique also +includes some form of NumPy support, this is also demonstrated using an +example where the cosine is computed on some kind of array. + +Last but not least, two small warnings: + +- All of these techniques may crash (segmentation fault) the Python + interpreter, which is (usually) due to bugs in the C code. +- All the examples have been done on Linux, they _should_ be possible on other + operating systems. +- You will need a C compiler for most of the examples. + +## Python-C-Api + +The [Python-C-API](https://docs.python.org/3/c-api/) is the backbone of the +standard Python interpreter (a.k.a _CPython_). Using this API it is possible to +write Python extension module in C and C++. Obviously, these extension modules +can, by virtue of language compatibility, call any function written in C or +C++. + +When using the Python-C-API, one usually writes much boilerplate code, first to +parse the arguments that were given to a function, and later to construct the +return type. + +**Advantages** + +- Requires no additional libraries +- Lots of low-level control +- Entirely usable from C++ + +**Disadvantages** + +- May require a substantial amount of effort +- Much overhead in the code +- Must be compiled +- High maintenance cost +- No forward compatibility across Python versions as C-API changes +- Reference count bugs are easy to create and very hard to track down. + +:::{note} +The Python-C-Api example here serves mainly for didactic reasons. Many of +the other techniques actually depend on this, so it is good to have a +high-level understanding of how it works. In 99% of the use-cases you will +be better off, using an alternative technique. +::: + +:::{note} +Since reference counting bugs are easy to create and hard to track down, +anyone really needing to use the Python C-API should read the [section +about objects, types and reference counts](https://docs.python.org/3/c-api/intro.html#objects-types-and-reference-counts) +from the official python documentation. Additionally, there is a tool by the +name of [cpychecker](https://gcc-python-plugin.readthedocs.io/en/latest/cpychecker.html) +which can help discover common errors with reference counting. +::: + +### Example + +The following C-extension module, make the `cos` function from the standard +math library available to Python: + +```{literalinclude} python_c_api/cos_module.c +:language: c +``` + +As you can see, there is much boilerplate, both to «massage» the arguments and +return types into place and for the module initialisation. Although some of +this is amortised, as the extension grows, the boilerplate required for each +function(s) remains. + +The standard python build system, `setuptools`, supports compiling +C-extensions via a `setup.py` file: + +```{literalinclude} python_c_api/setup.py +:language: python +``` + +The setup file is called as follows: + +```console +$ cd advanced/interfacing_with_c/python_c_api + +$ ls +cos_module.c setup.py + +$ python setup.py build_ext --inplace +running build_ext +building 'cos_module' extension +creating build +creating build/temp.linux-x86_64-2.7 +gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/include/python2.7 -c cos_module.c -o build/temp.linux-x86_64-2.7/cos_module.o +gcc -pthread -shared build/temp.linux-x86_64-2.7/cos_module.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/python_c_api/cos_module.so + +$ ls +build/ cos_module.c cos_module.so setup.py +``` + +- `build_ext` is to build extension modules +- `--inplace` will output the compiled extension module into the current directory + +The file `cos_module.so` contains the compiled extension, which we can now load in the IPython interpreter: + +:::{note} +In Python 3, the filename for compiled modules includes metadata on the Python +interpreter (see [PEP 3149](https://peps.python.org/pep-3149/)) and is thus +longer. The import statement is not affected by this. +::: + +```ipython +In [1]: import cos_module + +In [2]: cos_module? +Type: module +String Form: +File: /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/python_c_api/cos_module.so +Docstring: + +In [3]: dir(cos_module) +Out[3]: ['__doc__', '__file__', '__name__', '__package__', 'cos_func'] + +In [4]: cos_module.cos_func(1.0) +Out[4]: 0.5403023058681398 + +In [5]: cos_module.cos_func(0.0) +Out[5]: 1.0 + +In [6]: cos_module.cos_func(3.14159265359) +Out[7]: -1.0 +``` + +Now let's see how robust this is: + +```ipython +In [10]: cos_module.cos_func('foo') +--------------------------------------------------------------------------- +TypeError Traceback (most recent call last) + in () +----> 1 cos_module.cos_func('foo') +TypeError: a float is required +``` + +### NumPy Support + +Analog to the Python-C-API, NumPy, which is itself implemented as a +C-extension, comes with the [NumPy-C-API](https://numpy.org/doc/stable/reference/c-api). This API can be used +to create and manipulate NumPy arrays from C, when writing a custom +C-extension. See also: {ref}`advanced-numpy`. + +:::{note} +If you do ever need to use the NumPy C-API refer to the documentation about +[Arrays](https://numpy.org/doc/stable/reference/c-api/array.html) and +[Iterators](https://numpy.org/doc/stable/reference/c-api/iterator.html). +::: + +The following example shows how to pass NumPy arrays as arguments to functions +and how to iterate over NumPy arrays using the (old) NumPy-C-API. It simply +takes an array as argument applies the cosine function from the `math.h` and +returns a resulting new array. + +```{literalinclude} numpy_c_api/cos_module_np.c +:language: c +``` + +To compile this we can use `setuptools` again. However we need to be sure to +include the NumPy headers by using {func}`numpy.get_include`. + +```{literalinclude} numpy_c_api/setup.py +:language: python +``` + +To convince ourselves if this does actually works, we run the following test +script: + +```{literalinclude} numpy_c_api/test_cos_module_np.py +:language: numpy +``` + +And this should result in the following figure: + +![](numpy_c_api/test_cos_module_np.png) + +## Ctypes + +[Ctypes](https://docs.python.org/3/library/ctypes.html) is a _foreign +function library_ for Python. It provides C compatible data types, and allows +calling functions in DLLs or shared libraries. It can be used to wrap these +libraries in pure Python. + +**Advantages** + +- Part of the Python standard library +- Does not need to be compiled +- Wrapping code entirely in Python + +**Disadvantages** + +- Requires code to be wrapped to be available as a shared library + (roughly speaking `*.dll` in Windows `*.so` in Linux and `*.dylib` in Mac OSX.) +- No good support for C++ + +### Example + +As advertised, the wrapper code is in pure Python. + +```{literalinclude} ctypes/cos_module.py +:language: python +``` + +- Finding and loading the library may vary depending on your operating system, + check [the documentation](https://docs.python.org/3/library/ctypes.html#loading-dynamic-link-libraries) + for details +- This may be somewhat deceptive, since the math library exists in compiled + form on the system already. If you were to wrap a in-house library, you would + have to compile it first, which may or may not require some additional effort. + +We may now use this, as before: + +```ipython +In [1]: import cos_module + +In [2]: cos_module? +Type: module +String Form: +File: /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/ctypes/cos_module.py +Docstring: + +In [3]: dir(cos_module) +Out[3]: +['__builtins__', + '__doc__', + '__file__', + '__name__', + '__package__', + 'cos_func', + 'ctypes', + 'find_library', + 'libm'] + +In [4]: cos_module.cos_func(1.0) +Out[4]: 0.5403023058681398 + +In [5]: cos_module.cos_func(0.0) +Out[5]: 1.0 + +In [6]: cos_module.cos_func(3.14159265359) +Out[6]: -1.0 +``` + +As with the previous example, this code is somewhat robust, although the error +message is not quite as helpful, since it does not tell us what the type should be. + +```python + + +cos_module.cos_func('foo') +--------------------------------------------------------------------------- +ArgumentError Traceback (most recent call last) + in () +----> 1 cos_module.cos_func('foo') +/home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/ctypes/cos_module.py in cos_func(arg) + 12 def cos_func(arg): + 13 ''' Wrapper for cos from math.h ''' +---> 14 return libm.cos(arg) +ArgumentError: argument 1: : wrong type +``` + +### NumPy Support + +NumPy contains some support for interfacing with ctypes. In particular there is +support for exporting certain attributes of a NumPy array as ctypes data-types +and there are functions to convert from C arrays to NumPy arrays and back. + + + +For more information, consult the corresponding section in the [NumPy Cookbook](https://www.scipy.org/Cookbook/Ctypes) and the API documentation for +[numpy.ndarray.ctypes](https://numpy.org/doc/stable/reference/generated/numpy.ndarray.ctypes.html) +and [numpy.ctypeslib](https://numpy.org/doc/stable/reference/routines.ctypeslib.html). + +For the following example, let's consider a C function in a library that takes +an input and an output array, computes the cosine of the input array and +stores the result in the output array. + +The library consists of the following header file (although this is not +strictly needed for this example, we list it for completeness): + +```{literalinclude} ctypes_numpy/cos_doubles.h +:language: c +``` + +The function implementation resides in the following C source file: + +```{literalinclude} ctypes_numpy/cos_doubles.c +:language: c +``` + +And since the library is pure C, we can't use `setuptools` to compile it, but +must use a combination of `make` and `gcc`: + +```{literalinclude} ctypes_numpy/makefile +:language: make +``` + +We can then compile this (on Linux) into the shared library +`libcos_doubles.so`: + +```console +$ ls +cos_doubles.c cos_doubles.h cos_doubles.py makefile test_cos_doubles.py +$ make +gcc -c -fPIC cos_doubles.c -o cos_doubles.o +gcc -shared -Wl,-soname,libcos_doubles.so -o libcos_doubles.so cos_doubles.o +$ ls +cos_doubles.c cos_doubles.o libcos_doubles.so* test_cos_doubles.py +cos_doubles.h cos_doubles.py makefile +``` + +Now we can proceed to wrap this library via ctypes with direct support for +(certain kinds of) NumPy arrays: + +```{literalinclude} ctypes_numpy/cos_doubles.py +:language: numpy +``` + +- Note the inherent limitation of contiguous single dimensional NumPy arrays, + since the C functions requires this kind of buffer. +- Also note that the output array must be preallocated, for example with + {func}`numpy.zeros` and the function will write into it's buffer. +- Although the original signature of the `cos_doubles` function is `ARRAY, +ARRAY, int` the final `cos_doubles_func` takes only two NumPy arrays as + arguments. + +And, as before, we convince ourselves that it worked: + +```{literalinclude} ctypes_numpy/test_cos_doubles.py +:language: numpy +``` + +![](ctypes_numpy/test_cos_doubles.png) + +## SWIG + +[SWIG](https://www.swig.org/), the Simplified Wrapper Interface Generator, +is a software development tool that connects programs written in C and C++ +with a variety of high-level programming languages, including Python. The +important thing with SWIG is, that it can autogenerate the wrapper code for you. +While this is an advantage in terms of development time, it can also be a +burden. The generated file tend to be quite large and may not be too human +readable and the multiple levels of indirection which are a result of +the wrapping process, may be a bit tricky to understand. + +:::{note} +The autogenerated C code uses the Python-C-Api. +::: + +**Advantages** + +- Can automatically wrap entire libraries given the headers +- Works nicely with C++ + +**Disadvantages** + +- Autogenerates enormous files +- Hard to debug if something goes wrong +- Steep learning curve + +### Example + +Let's imagine that our `cos` function lives in a `cos_module` which has +been written in `c` and consists of the source file `cos_module.c`: + +```{literalinclude} swig/cos_module.c +:language: c +``` + +and the header file `cos_module.h`: + +```{literalinclude} swig/cos_module.h +:language: c +``` + +And our goal is to expose the `cos_func` to Python. To achieve this with +SWIG, we must write an _interface file_ which contains the instructions for SWIG. + +```{literalinclude} swig/cos_module.i +:language: c +``` + +As you can see, not too much code is needed here. For this simple example it is +enough to simply include the header file in the interface file, to expose the +function to Python. However, SWIG does allow for more fine grained +inclusion/exclusion of functions found in header files, check the documentation +for details. + +Generating the compiled wrappers is a two stage process: + +1. Run the `swig` executable on the interface file to generate the files + `cos_module_wrap.c`, which is the source file for the autogenerated Python + C-extension and `cos_module.py`, which is the autogenerated pure python + module. +2. Compile the `cos_module_wrap.c` into the `_cos_module.so`. Luckily, + `setuptools` knows how to handle SWIG interface files, so that our + `setup.py` is simply: + +```{literalinclude} swig/setup.py +:language: python +``` + +```console +$ cd advanced/interfacing_with_c/swig + +$ ls +cos_module.c cos_module.h cos_module.i setup.py + +$ python setup.py build_ext --inplace +running build_ext +building '_cos_module' extension +swigging cos_module.i to cos_module_wrap.c +swig -python -o cos_module_wrap.c cos_module.i +creating build +creating build/temp.linux-x86_64-2.7 +gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/include/python2.7 -c cos_module.c -o build/temp.linux-x86_64-2.7/cos_module.o +gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/include/python2.7 -c cos_module_wrap.c -o build/temp.linux-x86_64-2.7/cos_module_wrap.o +gcc -pthread -shared build/temp.linux-x86_64-2.7/cos_module.o build/temp.linux-x86_64-2.7/cos_module_wrap.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/swig/_cos_module.so + +$ ls +build/ cos_module.c cos_module.h cos_module.i cos_module.py _cos_module.so* cos_module_wrap.c setup.py +``` + +We can now load and execute the `cos_module` as we have done in the previous examples: + +```ipython +In [1]: import cos_module + +In [2]: cos_module? +Type: module +String Form: +File: /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/swig/cos_module.py +Docstring: + +In [3]: dir(cos_module) +Out[3]: +['__builtins__', + '__doc__', + '__file__', + '__name__', + '__package__', + '_cos_module', + '_newclass', + '_object', + '_swig_getattr', + '_swig_property', + '_swig_repr', + '_swig_setattr', + '_swig_setattr_nondynamic', + 'cos_func'] + +In [4]: cos_module.cos_func(1.0) +Out[4]: 0.5403023058681398 + +In [5]: cos_module.cos_func(0.0) +Out[5]: 1.0 + +In [6]: cos_module.cos_func(3.14159265359) +Out[6]: -1.0 +``` + +Again we test for robustness, and we see that we get a better error message +(although, strictly speaking in Python there is no `double` type): + +```ipython +In [7]: cos_module.cos_func('foo') +--------------------------------------------------------------------------- +TypeError Traceback (most recent call last) + in () +----> 1 cos_module.cos_func('foo') +TypeError: in method 'cos_func', argument 1 of type 'double' +``` + +### NumPy Support + +NumPy provides [support for SWIG](https://numpy.org/doc/stable/reference/swig.html) with the `numpy.i` +file. This interface file defines various so-called _typemaps_ which support +conversion between NumPy arrays and C-Arrays. In the following example we will +take a quick look at how such typemaps work in practice. + +We have the same `cos_doubles` function as in the ctypes example: + +```{literalinclude} swig_numpy/cos_doubles.h +:language: c +``` + +```{literalinclude} swig_numpy/cos_doubles.c +:language: c +``` + +This is wrapped as `cos_doubles_func` using the following SWIG interface +file: + +```{literalinclude} swig_numpy/cos_doubles.i +:language: c +``` + +- To use the NumPy typemaps, we need include the `numpy.i` file. +- Observe the call to `import_array()` which we encountered already in the + NumPy-C-API example. +- Since the type maps only support the signature `ARRAY, SIZE` we need to + wrap the `cos_doubles` as `cos_doubles_func` which takes two arrays + including sizes as input. +- As opposed to the simple SWIG example, we don't include the `cos_doubles.h` + header, There is nothing there that we wish to expose to Python since we + expose the functionality through `cos_doubles_func`. + +And, as before we can use `setuptools` to wrap this: + +```{literalinclude} swig_numpy/setup.py +:language: python +``` + +As previously, we need to use `include_dirs` to specify the location. + +```console +$ ls +cos_doubles.c cos_doubles.h cos_doubles.i numpy.i setup.py test_cos_doubles.py +$ python setup.py build_ext -i +running build_ext +building '_cos_doubles' extension +swigging cos_doubles.i to cos_doubles_wrap.c +swig -python -o cos_doubles_wrap.c cos_doubles.i +cos_doubles.i:24: Warning(490): Fragment 'NumPy_Backward_Compatibility' not found. +cos_doubles.i:24: Warning(490): Fragment 'NumPy_Backward_Compatibility' not found. +cos_doubles.i:24: Warning(490): Fragment 'NumPy_Backward_Compatibility' not found. +creating build +creating build/temp.linux-x86_64-2.7 +gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include -I/home/esc/anaconda/include/python2.7 -c cos_doubles.c -o build/temp.linux-x86_64-2.7/cos_doubles.o +gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include -I/home/esc/anaconda/include/python2.7 -c cos_doubles_wrap.c -o build/temp.linux-x86_64-2.7/cos_doubles_wrap.o +In file included from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/ndarraytypes.h:1722, + from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:17, + from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/arrayobject.h:15, + from cos_doubles_wrap.c:2706: +/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/npy_deprecated_api.h:11:2: warning: #warning "Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" +gcc -pthread -shared build/temp.linux-x86_64-2.7/cos_doubles.o build/temp.linux-x86_64-2.7/cos_doubles_wrap.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/swig_numpy/_cos_doubles.so +$ ls +build/ cos_doubles.h cos_doubles.py cos_doubles_wrap.c setup.py +cos_doubles.c cos_doubles.i _cos_doubles.so* numpy.i test_cos_doubles.py +``` + +And, as before, we convince ourselves that it worked: + +```{literalinclude} swig_numpy/test_cos_doubles.py +:language: numpy +``` + +![](swig_numpy/test_cos_doubles.png) + +## Cython + +[Cython](https://cython.org/) is both a Python-like language for writing +C-extensions and an advanced compiler for this language. The Cython _language_ +is a superset of Python, which comes with additional constructs that allow you +call C functions and annotate variables and class attributes with c types. In +this sense one could also call it a _Python with types_. + +In addition to the basic use case of wrapping native code, Cython supports an +additional use-case, namely interactive optimization. Basically, one starts out +with a pure-Python script and incrementally adds Cython types to the bottleneck +code to optimize only those code paths that really matter. + +In this sense it is quite similar to SWIG, since the code can be autogenerated +but in a sense it also quite similar to ctypes since the wrapping code can +(almost) be written in Python. + +While others solutions that autogenerate code can be quite difficult to debug +(for example SWIG) Cython comes with an extension to the GNU debugger that +helps debug Python, Cython and C code. + +:::{note} +The autogenerated C code uses the Python-C-Api. +::: + +**Advantages** + +- Python like language for writing C-extensions +- Autogenerated code +- Supports incremental optimization +- Includes a GNU debugger extension +- Support for C++ (Since version 0.13) + +**Disadvantages** + +- Must be compiled +- Requires an additional library ( but only at build time, at this problem can be + overcome by shipping the generated C files) + +### Example + +The main Cython code for our `cos_module` is contained in the file +`cos_module.pyx`: + +```{literalinclude} cython/cos_module.pyx +:language: cython +``` + +Note the additional keywords such as `cdef` and `extern`. Also the +`cos_func` is then pure Python. + +Again we can use the standard `setuptools` module, but this time we need some +additional pieces from `Cython.Build`: + +```{literalinclude} cython/setup.py + +``` + +Compiling this: + +```console +$ cd advanced/interfacing_with_c/cython +$ ls +cos_module.pyx setup.py +$ python setup.py build_ext --inplace +running build_ext +cythoning cos_module.pyx to cos_module.c +building 'cos_module' extension +creating build +creating build/temp.linux-x86_64-2.7 +gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/include/python2.7 -c cos_module.c -o build/temp.linux-x86_64-2.7/cos_module.o +gcc -pthread -shared build/temp.linux-x86_64-2.7/cos_module.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/cython/cos_module.so +$ ls +build/ cos_module.c cos_module.pyx cos_module.so* setup.py +``` + +And running it: + +```ipython +In [1]: import cos_module + +In [2]: cos_module? +Type: module +String Form: +File: /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/cython/cos_module.so +Docstring: + +In [3]: dir(cos_module) +Out[3]: +['__builtins__', + '__doc__', + '__file__', + '__name__', + '__package__', + '__test__', + 'cos_func'] + +In [4]: cos_module.cos_func(1.0) +Out[4]: 0.5403023058681398 + +In [5]: cos_module.cos_func(0.0) +Out[5]: 1.0 + +In [6]: cos_module.cos_func(3.14159265359) +Out[6]: -1.0 +``` + +And, testing a little for robustness, we can see that we get good error messages: + +```ipython +In [7]: cos_module.cos_func('foo') +--------------------------------------------------------------------------- +TypeError Traceback (most recent call last) + in () +----> 1 cos_module.cos_func('foo') +/home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/cython/cos_module.so in cos_module.cos_func (cos_module.c:506)() +TypeError: a float is required +``` + +Additionally, it is worth noting that `Cython` ships with complete +declarations for the C math library, which simplifies the code above to become: + +```{literalinclude} cython_simple/cos_module.pyx +:language: cython +``` + +In this case the `cimport` statement is used to import the `cos` function. + +### NumPy Support + +Cython has support for NumPy via the `numpy.pyx` file which allows you to add +the NumPy array type to your Cython code. I.e. like specifying that variable +`i` is of type `int`, you can specify that variable `a` is of type +`numpy.ndarray` with a given `dtype`. Also, certain optimizations such as +bounds checking are supported. Look at the corresponding section in the [Cython +documentation](https://docs.cython.org/en/latest/src/tutorial/numpy.html). In case you +want to pass NumPy arrays as C arrays to your Cython wrapped C functions, there +is a [section about this in the Cython documentation](https://docs.cython.org/en/latest/src/userguide/memoryviews.html#pass-data-from-a-c-function-via-pointer). + +In the following example, we will show how to wrap the familiar `cos_doubles` +function using Cython. + +```{literalinclude} cython_numpy/cos_doubles.h +:language: c +``` + +```{literalinclude} cython_numpy/cos_doubles.c +:language: c +``` + +This is wrapped as `cos_doubles_func` using the following Cython code: + +```{literalinclude} cython_numpy/_cos_doubles.pyx +:language: cython +``` + +And can be compiled using `setuptools`: + +```{literalinclude} cython_numpy/setup.py +:language: python +``` + +- As with the previous compiled NumPy examples, we need the `include_dirs` option. + +```console +$ ls +cos_doubles.c cos_doubles.h _cos_doubles.pyx setup.py test_cos_doubles.py +$ python setup.py build_ext -i +running build_ext +cythoning _cos_doubles.pyx to _cos_doubles.c +building 'cos_doubles' extension +creating build +creating build/temp.linux-x86_64-2.7 +gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include -I/home/esc/anaconda/include/python2.7 -c _cos_doubles.c -o build/temp.linux-x86_64-2.7/_cos_doubles.o +In file included from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/ndarraytypes.h:1722, + from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:17, + from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/arrayobject.h:15, + from _cos_doubles.c:253: +/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/npy_deprecated_api.h:11:2: warning: #warning "Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" +/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/__ufunc_api.h:236: warning: ‘_import_umath’ defined but not used +gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include -I/home/esc/anaconda/include/python2.7 -c cos_doubles.c -o build/temp.linux-x86_64-2.7/cos_doubles.o +gcc -pthread -shared build/temp.linux-x86_64-2.7/_cos_doubles.o build/temp.linux-x86_64-2.7/cos_doubles.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/cython_numpy/cos_doubles.so +$ ls +build/ _cos_doubles.c cos_doubles.c cos_doubles.h _cos_doubles.pyx cos_doubles.so* setup.py test_cos_doubles.py +``` + +And, as before, we convince ourselves that it worked: + +```{literalinclude} cython_numpy/test_cos_doubles.py +:language: numpy +``` + +![](cython_numpy/test_cos_doubles.png) + +## Summary + +In this section four different techniques for interfacing with native code +have been presented. The table below roughly summarizes some of the aspects of +the techniques. + +| x | Part of CPython | Compiled | Autogenerated | NumPy Support | +| ------------ | --------------- | -------- | ------------- | ------------- | +| Python-C-API | `True` | `True` | `False` | `True` | +| Ctypes | `True` | `False` | `False` | `True` | +| Swig | `False` | `True` | `True` | `True` | +| Cython | `False` | `True` | `True` | `True` | + +Of all three presented techniques, Cython is the most modern and advanced. In +particular, the ability to optimize code incrementally by adding types to your +Python code is unique. + +## Further Reading and References + +- [Gaël Varoquaux's blog post about avoiding data copies](https://gael-varoquaux.info/programming/cython-example-of-exposing-c-computed-arrays-in-python-without-data-copies.html) provides some insight on how to + handle memory management cleverly. If you ever run into issues with large + datasets, this is a reference to come back to for some inspiration. + +## Exercises + +Since this is a brand new section, the exercises are considered more as +pointers as to what to look at next, so pick the ones that you find more +interesting. If you have good ideas for exercises, please let us know! + +1. Download the source code for each example and compile and run them on your + machine. + +2. Make trivial changes to each example and convince yourself that this works. ( + E.g. change `cos` for `sin`.) + +3. Most of the examples, especially the ones involving NumPy may still be + fragile and respond badly to input errors. Look for ways to crash the + examples, figure what the problem is and devise a potential solution. + Here are some ideas: + + 1. Numerical overflow. + 2. Input and output arrays that have different lengths. + 3. Multidimensional array. + 4. Empty array + 5. Arrays with non-`double` types + +4. Use the `%timeit` IPython magic to measure the execution time of the + various solutions + +### Python-C-API + +1. Modify the NumPy example such that the function takes two input arguments, where + the second is the preallocated output array, making it similar to the other NumPy examples. +2. Modify the example such that the function only takes a single input array + and modifies this in place. +3. Try to fix the example to use the new [NumPy iterator protocol](https://numpy.org/doc/stable/reference/c-api/iterator.html). If you + manage to obtain a working solution, please submit a pull-request on github. +4. You may have noticed, that the NumPy-C-API example is the only NumPy example + that does not wrap `cos_doubles` but instead applies the `cos` function + directly to the elements of the NumPy array. Does this have any advantages + over the other techniques. +5. Can you wrap `cos_doubles` using only the NumPy-C-API. You may need to + ensure that the arrays have the correct type, are one dimensional and + contiguous in memory. + +### Ctypes + +1. Modify the NumPy example such that `cos_doubles_func` handles the preallocation for + you, thus making it more like the NumPy-C-API example. + +### SWIG + +1. Look at the code that SWIG autogenerates, how much of it do you + understand? +2. Modify the NumPy example such that `cos_doubles_func` handles the preallocation for + you, thus making it more like the NumPy-C-API example. +3. Modify the `cos_doubles` C function so that it returns an allocated array. + Can you wrap this using SWIG typemaps? If not, why not? Is there a + workaround for this specific situation? (Hint: you know the size of the + output array, so it may be possible to construct a NumPy array from the + returned `double *`.) + +### Cython + +1. Look at the code that Cython autogenerates. Take a closer look at some of the + comments that Cython inserts. What do you see? +2. Look at the section [Working with NumPy](https://docs.cython.org/en/latest/src/tutorial/numpy.html) from the Cython + documentation to learn how to incrementally optimize a pure python script that uses NumPy. +3. Modify the NumPy example such that `cos_doubles_func` handles the preallocation for + you, thus making it more like the NumPy-C-API example. diff --git a/advanced/interfacing_with_c/interfacing_with_c.rst b/advanced/interfacing_with_c/interfacing_with_c.rst deleted file mode 100644 index 8cb261948..000000000 --- a/advanced/interfacing_with_c/interfacing_with_c.rst +++ /dev/null @@ -1,916 +0,0 @@ -================== -Interfacing with C -================== - -**Author**: *Valentin Haenel* - -.. TODO: - - * Download links - * Timing? - * Additional documentation - * What about overflow? - -This chapter contains an *introduction* to the many different routes for -making your native code (primarily ``C/C++``) available from Python, a -process commonly referred to *wrapping*. The goal of this chapter is to -give you a flavour of what technologies exist and what their respective -merits and shortcomings are, so that you can select the appropriate one -for your specific needs. In any case, once you do start wrapping, you -almost certainly will want to consult the respective documentation for -your selected technique. - -.. contents:: Chapters contents - :local: - :depth: 1 - -Introduction -============ - -This chapter covers the following techniques: - -* `Python-C-Api `_ -* `Ctypes `_ -* `SWIG (Simplified Wrapper and Interface Generator) `_ -* `Cython `__ - -These four techniques are perhaps the most well known ones, of which Cython is -probably the most advanced one and the one you should consider using first. The -others are also important, if you want to understand the wrapping problem from -different angles. Having said that, there are other alternatives out there, -but having understood the basics of the ones above, you will be in a position -to evaluate the technique of your choice to see if it fits your needs. - -The following criteria may be useful when evaluating a technology: - -* Are additional libraries required? -* Is the code autogenerated? -* Does it need to be compiled? -* Is there good support for interacting with NumPy arrays? -* Does it support C++? - -Before you set out, you should consider your use case. When interfacing with -native code, there are usually two use-cases that come up: - -* Existing code in C/C++ that needs to be leveraged, either because it already - exists, or because it is faster. -* Python code too slow, push inner loops to native code - -Each technology is demonstrated by wrapping the ``cos`` function from -``math.h``. While this is a mostly a trivial example, it should serve us well -to demonstrate the basics of the wrapping solution. Since each technique also -includes some form of NumPy support, this is also demonstrated using an -example where the cosine is computed on some kind of array. - -Last but not least, two small warnings: - -* All of these techniques may crash (segmentation fault) the Python - interpreter, which is (usually) due to bugs in the C code. -* All the examples have been done on Linux, they *should* be possible on other - operating systems. -* You will need a C compiler for most of the examples. - - -Python-C-Api -============ - -The `Python-C-API `_ is the backbone of the -standard Python interpreter (a.k.a *CPython*). Using this API it is possible to -write Python extension module in C and C++. Obviously, these extension modules -can, by virtue of language compatibility, call any function written in C or -C++. - -When using the Python-C-API, one usually writes much boilerplate code, first to -parse the arguments that were given to a function, and later to construct the -return type. - -**Advantages** - -* Requires no additional libraries -* Lots of low-level control -* Entirely usable from C++ - -**Disadvantages** - -* May require a substantial amount of effort -* Much overhead in the code -* Must be compiled -* High maintenance cost -* No forward compatibility across Python versions as C-API changes -* Reference count bugs are easy to create and very hard to track down. - -.. note:: - - The Python-C-Api example here serves mainly for didactic reasons. Many of - the other techniques actually depend on this, so it is good to have a - high-level understanding of how it works. In 99% of the use-cases you will - be better off, using an alternative technique. - -.. note:: - - Since reference counting bugs are easy to create and hard to track down, - anyone really needing to use the Python C-API should read the `section - about objects, types and reference counts - `_ - from the official python documentation. Additionally, there is a tool by the - name of `cpychecker - `_ - which can help discover common errors with reference counting. - -Example -------- - -The following C-extension module, make the ``cos`` function from the standard -math library available to Python: - -.. literalinclude:: python_c_api/cos_module.c - :language: c - -As you can see, there is much boilerplate, both to «massage» the arguments and -return types into place and for the module initialisation. Although some of -this is amortised, as the extension grows, the boilerplate required for each -function(s) remains. - -The standard python build system, ``setuptools``, supports compiling -C-extensions via a ``setup.py`` file: - -.. literalinclude:: python_c_api/setup.py - :language: python - -The setup file is called as follows: - -.. sourcecode:: console - - $ cd advanced/interfacing_with_c/python_c_api - - $ ls - cos_module.c setup.py - - $ python setup.py build_ext --inplace - running build_ext - building 'cos_module' extension - creating build - creating build/temp.linux-x86_64-2.7 - gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/include/python2.7 -c cos_module.c -o build/temp.linux-x86_64-2.7/cos_module.o - gcc -pthread -shared build/temp.linux-x86_64-2.7/cos_module.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/python_c_api/cos_module.so - - $ ls - build/ cos_module.c cos_module.so setup.py - -* ``build_ext`` is to build extension modules -* ``--inplace`` will output the compiled extension module into the current directory - -The file ``cos_module.so`` contains the compiled extension, which we can now load in the IPython interpreter: - -.. note:: - - In Python 3, the filename for compiled modules includes metadata on the Python - interpreter (see `PEP 3149 `_) and is thus - longer. The import statement is not affected by this. - -.. ipython:: - :verbatim: - - In [1]: import cos_module - - In [2]: cos_module? - Type: module - String Form: - File: /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/python_c_api/cos_module.so - Docstring: - - In [3]: dir(cos_module) - Out[3]: ['__doc__', '__file__', '__name__', '__package__', 'cos_func'] - - In [4]: cos_module.cos_func(1.0) - Out[4]: 0.5403023058681398 - - In [5]: cos_module.cos_func(0.0) - Out[5]: 1.0 - - In [6]: cos_module.cos_func(3.14159265359) - Out[7]: -1.0 - -Now let's see how robust this is: - -.. ipython:: - :verbatim: - - In [10]: cos_module.cos_func('foo') - --------------------------------------------------------------------------- - TypeError Traceback (most recent call last) - in () - ----> 1 cos_module.cos_func('foo') - TypeError: a float is required - -NumPy Support -------------- - -Analog to the Python-C-API, NumPy, which is itself implemented as a -C-extension, comes with the `NumPy-C-API -`_. This API can be used -to create and manipulate NumPy arrays from C, when writing a custom -C-extension. See also: :ref:`advanced_numpy`. - -.. note:: - - If you do ever need to use the NumPy C-API refer to the documentation about - `Arrays `_ and - `Iterators - `_. - -The following example shows how to pass NumPy arrays as arguments to functions -and how to iterate over NumPy arrays using the (old) NumPy-C-API. It simply -takes an array as argument applies the cosine function from the ``math.h`` and -returns a resulting new array. - -.. literalinclude:: numpy_c_api/cos_module_np.c - :language: c - -To compile this we can use ``setuptools`` again. However we need to be sure to -include the NumPy headers by using :func:`numpy.get_include`. - -.. literalinclude:: numpy_c_api/setup.py - :language: python - -To convince ourselves if this does actually works, we run the following test -script: - -.. literalinclude:: numpy_c_api/test_cos_module_np.py - :language: numpy - -And this should result in the following figure: - -.. image:: numpy_c_api/test_cos_module_np.png - :scale: 50 - - -Ctypes -====== - -`Ctypes `_ is a *foreign -function library* for Python. It provides C compatible data types, and allows -calling functions in DLLs or shared libraries. It can be used to wrap these -libraries in pure Python. - -**Advantages** - -* Part of the Python standard library -* Does not need to be compiled -* Wrapping code entirely in Python - -**Disadvantages** - -* Requires code to be wrapped to be available as a shared library - (roughly speaking ``*.dll`` in Windows ``*.so`` in Linux and ``*.dylib`` in Mac OSX.) -* No good support for C++ - -Example -------- - -As advertised, the wrapper code is in pure Python. - -.. literalinclude:: ctypes/cos_module.py - :language: python - -* Finding and loading the library may vary depending on your operating system, - check `the documentation - `_ - for details -* This may be somewhat deceptive, since the math library exists in compiled - form on the system already. If you were to wrap a in-house library, you would - have to compile it first, which may or may not require some additional effort. - -We may now use this, as before: - -.. ipython:: - :verbatim: - - In [1]: import cos_module - - In [2]: cos_module? - Type: module - String Form: - File: /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/ctypes/cos_module.py - Docstring: - - In [3]: dir(cos_module) - Out[3]: - ['__builtins__', - '__doc__', - '__file__', - '__name__', - '__package__', - 'cos_func', - 'ctypes', - 'find_library', - 'libm'] - - In [4]: cos_module.cos_func(1.0) - Out[4]: 0.5403023058681398 - - In [5]: cos_module.cos_func(0.0) - Out[5]: 1.0 - - In [6]: cos_module.cos_func(3.14159265359) - Out[6]: -1.0 - -As with the previous example, this code is somewhat robust, although the error -message is not quite as helpful, since it does not tell us what the type should be. - -.. ipython:: - :verbatim: - - In [7]: cos_module.cos_func('foo') - --------------------------------------------------------------------------- - ArgumentError Traceback (most recent call last) - in () - ----> 1 cos_module.cos_func('foo') - /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/ctypes/cos_module.py in cos_func(arg) - 12 def cos_func(arg): - 13 ''' Wrapper for cos from math.h ''' - ---> 14 return libm.cos(arg) - ArgumentError: argument 1: : wrong type - -NumPy Support -------------- - -NumPy contains some support for interfacing with ctypes. In particular there is -support for exporting certain attributes of a NumPy array as ctypes data-types -and there are functions to convert from C arrays to NumPy arrays and back. - -.. XXX Should use :mod: and :class: - -For more information, consult the corresponding section in the `NumPy Cookbook -`_ and the API documentation for -`numpy.ndarray.ctypes `_ -and `numpy.ctypeslib `_. - -For the following example, let's consider a C function in a library that takes -an input and an output array, computes the cosine of the input array and -stores the result in the output array. - -The library consists of the following header file (although this is not -strictly needed for this example, we list it for completeness): - -.. literalinclude:: ctypes_numpy/cos_doubles.h - :language: c - -The function implementation resides in the following C source file: - -.. literalinclude:: ctypes_numpy/cos_doubles.c - :language: c - -And since the library is pure C, we can't use ``setuptools`` to compile it, but -must use a combination of ``make`` and ``gcc``: - -.. literalinclude:: ctypes_numpy/makefile - :language: make - -We can then compile this (on Linux) into the shared library -``libcos_doubles.so``: - -.. sourcecode:: console - - $ ls - cos_doubles.c cos_doubles.h cos_doubles.py makefile test_cos_doubles.py - $ make - gcc -c -fPIC cos_doubles.c -o cos_doubles.o - gcc -shared -Wl,-soname,libcos_doubles.so -o libcos_doubles.so cos_doubles.o - $ ls - cos_doubles.c cos_doubles.o libcos_doubles.so* test_cos_doubles.py - cos_doubles.h cos_doubles.py makefile - -Now we can proceed to wrap this library via ctypes with direct support for -(certain kinds of) NumPy arrays: - -.. literalinclude:: ctypes_numpy/cos_doubles.py - :language: numpy - -* Note the inherent limitation of contiguous single dimensional NumPy arrays, - since the C functions requires this kind of buffer. -* Also note that the output array must be preallocated, for example with - :func:`numpy.zeros` and the function will write into it's buffer. -* Although the original signature of the ``cos_doubles`` function is ``ARRAY, - ARRAY, int`` the final ``cos_doubles_func`` takes only two NumPy arrays as - arguments. - -And, as before, we convince ourselves that it worked: - -.. literalinclude:: ctypes_numpy/test_cos_doubles.py - :language: numpy - -.. image:: ctypes_numpy/test_cos_doubles.png - :scale: 50 - -SWIG -==== - -`SWIG `_, the Simplified Wrapper Interface Generator, -is a software development tool that connects programs written in C and C++ -with a variety of high-level programming languages, including Python. The -important thing with SWIG is, that it can autogenerate the wrapper code for you. -While this is an advantage in terms of development time, it can also be a -burden. The generated file tend to be quite large and may not be too human -readable and the multiple levels of indirection which are a result of -the wrapping process, may be a bit tricky to understand. - -.. note:: - - The autogenerated C code uses the Python-C-Api. - -**Advantages** - -* Can automatically wrap entire libraries given the headers -* Works nicely with C++ - -**Disadvantages** - -* Autogenerates enormous files -* Hard to debug if something goes wrong -* Steep learning curve - -Example -------- - -Let's imagine that our ``cos`` function lives in a ``cos_module`` which has -been written in ``c`` and consists of the source file ``cos_module.c``: - -.. literalinclude:: swig/cos_module.c - :language: c - -and the header file ``cos_module.h``: - -.. literalinclude:: swig/cos_module.h - :language: c - -And our goal is to expose the ``cos_func`` to Python. To achieve this with -SWIG, we must write an *interface file* which contains the instructions for SWIG. - -.. literalinclude:: swig/cos_module.i - :language: c - -As you can see, not too much code is needed here. For this simple example it is -enough to simply include the header file in the interface file, to expose the -function to Python. However, SWIG does allow for more fine grained -inclusion/exclusion of functions found in header files, check the documentation -for details. - -Generating the compiled wrappers is a two stage process: - -#. Run the ``swig`` executable on the interface file to generate the files - ``cos_module_wrap.c``, which is the source file for the autogenerated Python - C-extension and ``cos_module.py``, which is the autogenerated pure python - module. - -#. Compile the ``cos_module_wrap.c`` into the ``_cos_module.so``. Luckily, - ``setuptools`` knows how to handle SWIG interface files, so that our - ``setup.py`` is simply: - -.. literalinclude:: swig/setup.py - :language: python - -.. sourcecode:: console - - $ cd advanced/interfacing_with_c/swig - - $ ls - cos_module.c cos_module.h cos_module.i setup.py - - $ python setup.py build_ext --inplace - running build_ext - building '_cos_module' extension - swigging cos_module.i to cos_module_wrap.c - swig -python -o cos_module_wrap.c cos_module.i - creating build - creating build/temp.linux-x86_64-2.7 - gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/include/python2.7 -c cos_module.c -o build/temp.linux-x86_64-2.7/cos_module.o - gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/include/python2.7 -c cos_module_wrap.c -o build/temp.linux-x86_64-2.7/cos_module_wrap.o - gcc -pthread -shared build/temp.linux-x86_64-2.7/cos_module.o build/temp.linux-x86_64-2.7/cos_module_wrap.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/swig/_cos_module.so - - $ ls - build/ cos_module.c cos_module.h cos_module.i cos_module.py _cos_module.so* cos_module_wrap.c setup.py - -We can now load and execute the ``cos_module`` as we have done in the previous examples: - -.. ipython:: - :verbatim: - - In [1]: import cos_module - - In [2]: cos_module? - Type: module - String Form: - File: /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/swig/cos_module.py - Docstring: - - In [3]: dir(cos_module) - Out[3]: - ['__builtins__', - '__doc__', - '__file__', - '__name__', - '__package__', - '_cos_module', - '_newclass', - '_object', - '_swig_getattr', - '_swig_property', - '_swig_repr', - '_swig_setattr', - '_swig_setattr_nondynamic', - 'cos_func'] - - In [4]: cos_module.cos_func(1.0) - Out[4]: 0.5403023058681398 - - In [5]: cos_module.cos_func(0.0) - Out[5]: 1.0 - - In [6]: cos_module.cos_func(3.14159265359) - Out[6]: -1.0 - -Again we test for robustness, and we see that we get a better error message -(although, strictly speaking in Python there is no ``double`` type): - -.. ipython:: - :verbatim: - - In [7]: cos_module.cos_func('foo') - --------------------------------------------------------------------------- - TypeError Traceback (most recent call last) - in () - ----> 1 cos_module.cos_func('foo') - TypeError: in method 'cos_func', argument 1 of type 'double' - -NumPy Support -------------- - -NumPy provides `support for SWIG -`_ with the ``numpy.i`` -file. This interface file defines various so-called *typemaps* which support -conversion between NumPy arrays and C-Arrays. In the following example we will -take a quick look at how such typemaps work in practice. - -We have the same ``cos_doubles`` function as in the ctypes example: - -.. literalinclude:: swig_numpy/cos_doubles.h - :language: c - -.. literalinclude:: swig_numpy/cos_doubles.c - :language: c - -This is wrapped as ``cos_doubles_func`` using the following SWIG interface -file: - -.. literalinclude:: swig_numpy/cos_doubles.i - :language: c - -* To use the NumPy typemaps, we need include the ``numpy.i`` file. -* Observe the call to ``import_array()`` which we encountered already in the - NumPy-C-API example. -* Since the type maps only support the signature ``ARRAY, SIZE`` we need to - wrap the ``cos_doubles`` as ``cos_doubles_func`` which takes two arrays - including sizes as input. -* As opposed to the simple SWIG example, we don't include the ``cos_doubles.h`` - header, There is nothing there that we wish to expose to Python since we - expose the functionality through ``cos_doubles_func``. - -And, as before we can use ``setuptools`` to wrap this: - -.. literalinclude:: swig_numpy/setup.py - :language: python - -As previously, we need to use ``include_dirs`` to specify the location. - -.. sourcecode:: console - - $ ls - cos_doubles.c cos_doubles.h cos_doubles.i numpy.i setup.py test_cos_doubles.py - $ python setup.py build_ext -i - running build_ext - building '_cos_doubles' extension - swigging cos_doubles.i to cos_doubles_wrap.c - swig -python -o cos_doubles_wrap.c cos_doubles.i - cos_doubles.i:24: Warning(490): Fragment 'NumPy_Backward_Compatibility' not found. - cos_doubles.i:24: Warning(490): Fragment 'NumPy_Backward_Compatibility' not found. - cos_doubles.i:24: Warning(490): Fragment 'NumPy_Backward_Compatibility' not found. - creating build - creating build/temp.linux-x86_64-2.7 - gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include -I/home/esc/anaconda/include/python2.7 -c cos_doubles.c -o build/temp.linux-x86_64-2.7/cos_doubles.o - gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include -I/home/esc/anaconda/include/python2.7 -c cos_doubles_wrap.c -o build/temp.linux-x86_64-2.7/cos_doubles_wrap.o - In file included from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/ndarraytypes.h:1722, - from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:17, - from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/arrayobject.h:15, - from cos_doubles_wrap.c:2706: - /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/npy_deprecated_api.h:11:2: warning: #warning "Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" - gcc -pthread -shared build/temp.linux-x86_64-2.7/cos_doubles.o build/temp.linux-x86_64-2.7/cos_doubles_wrap.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/swig_numpy/_cos_doubles.so - $ ls - build/ cos_doubles.h cos_doubles.py cos_doubles_wrap.c setup.py - cos_doubles.c cos_doubles.i _cos_doubles.so* numpy.i test_cos_doubles.py - -And, as before, we convince ourselves that it worked: - -.. literalinclude:: swig_numpy/test_cos_doubles.py - :language: numpy - -.. image:: swig_numpy/test_cos_doubles.png - :scale: 50 - - -Cython -====== - -`Cython `__ is both a Python-like language for writing -C-extensions and an advanced compiler for this language. The Cython *language* -is a superset of Python, which comes with additional constructs that allow you -call C functions and annotate variables and class attributes with c types. In -this sense one could also call it a *Python with types*. - -In addition to the basic use case of wrapping native code, Cython supports an -additional use-case, namely interactive optimization. Basically, one starts out -with a pure-Python script and incrementally adds Cython types to the bottleneck -code to optimize only those code paths that really matter. - -In this sense it is quite similar to SWIG, since the code can be autogenerated -but in a sense it also quite similar to ctypes since the wrapping code can -(almost) be written in Python. - -While others solutions that autogenerate code can be quite difficult to debug -(for example SWIG) Cython comes with an extension to the GNU debugger that -helps debug Python, Cython and C code. - -.. note:: - - The autogenerated C code uses the Python-C-Api. - -**Advantages** - -* Python like language for writing C-extensions -* Autogenerated code -* Supports incremental optimization -* Includes a GNU debugger extension -* Support for C++ (Since version 0.13) - -**Disadvantages** - -* Must be compiled -* Requires an additional library ( but only at build time, at this problem can be - overcome by shipping the generated C files) - -Example -------- - -The main Cython code for our ``cos_module`` is contained in the file -``cos_module.pyx``: - -.. literalinclude:: cython/cos_module.pyx - :language: cython - -Note the additional keywords such as ``cdef`` and ``extern``. Also the -``cos_func`` is then pure Python. - -Again we can use the standard ``setuptools`` module, but this time we need some -additional pieces from ``Cython.Build``: - -.. literalinclude:: cython/setup.py - -Compiling this: - -.. sourcecode:: console - - $ cd advanced/interfacing_with_c/cython - $ ls - cos_module.pyx setup.py - $ python setup.py build_ext --inplace - running build_ext - cythoning cos_module.pyx to cos_module.c - building 'cos_module' extension - creating build - creating build/temp.linux-x86_64-2.7 - gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/include/python2.7 -c cos_module.c -o build/temp.linux-x86_64-2.7/cos_module.o - gcc -pthread -shared build/temp.linux-x86_64-2.7/cos_module.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/cython/cos_module.so - $ ls - build/ cos_module.c cos_module.pyx cos_module.so* setup.py - -And running it: - -.. ipython:: - :verbatim: - - In [1]: import cos_module - - In [2]: cos_module? - Type: module - String Form: - File: /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/cython/cos_module.so - Docstring: - - In [3]: dir(cos_module) - Out[3]: - ['__builtins__', - '__doc__', - '__file__', - '__name__', - '__package__', - '__test__', - 'cos_func'] - - In [4]: cos_module.cos_func(1.0) - Out[4]: 0.5403023058681398 - - In [5]: cos_module.cos_func(0.0) - Out[5]: 1.0 - - In [6]: cos_module.cos_func(3.14159265359) - Out[6]: -1.0 - -And, testing a little for robustness, we can see that we get good error messages: - -.. ipython:: - :verbatim: - - In [7]: cos_module.cos_func('foo') - --------------------------------------------------------------------------- - TypeError Traceback (most recent call last) - in () - ----> 1 cos_module.cos_func('foo') - /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/cython/cos_module.so in cos_module.cos_func (cos_module.c:506)() - TypeError: a float is required - - -Additionally, it is worth noting that ``Cython`` ships with complete -declarations for the C math library, which simplifies the code above to become: - -.. literalinclude:: cython_simple/cos_module.pyx - :language: cython - -In this case the ``cimport`` statement is used to import the ``cos`` function. - -NumPy Support -------------- - -Cython has support for NumPy via the ``numpy.pyx`` file which allows you to add -the NumPy array type to your Cython code. I.e. like specifying that variable -``i`` is of type ``int``, you can specify that variable ``a`` is of type -``numpy.ndarray`` with a given ``dtype``. Also, certain optimizations such as -bounds checking are supported. Look at the corresponding section in the `Cython -documentation `_. In case you -want to pass NumPy arrays as C arrays to your Cython wrapped C functions, there -is a `section about this in the Cython documentation -`__. - -In the following example, we will show how to wrap the familiar ``cos_doubles`` -function using Cython. - -.. literalinclude:: cython_numpy/cos_doubles.h - :language: c - -.. literalinclude:: cython_numpy/cos_doubles.c - :language: c - -This is wrapped as ``cos_doubles_func`` using the following Cython code: - -.. literalinclude:: cython_numpy/_cos_doubles.pyx - :language: cython - -And can be compiled using ``setuptools``: - -.. literalinclude:: cython_numpy/setup.py - :language: python - -* As with the previous compiled NumPy examples, we need the ``include_dirs`` option. - -.. sourcecode:: console - - $ ls - cos_doubles.c cos_doubles.h _cos_doubles.pyx setup.py test_cos_doubles.py - $ python setup.py build_ext -i - running build_ext - cythoning _cos_doubles.pyx to _cos_doubles.c - building 'cos_doubles' extension - creating build - creating build/temp.linux-x86_64-2.7 - gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include -I/home/esc/anaconda/include/python2.7 -c _cos_doubles.c -o build/temp.linux-x86_64-2.7/_cos_doubles.o - In file included from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/ndarraytypes.h:1722, - from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/ndarrayobject.h:17, - from /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/arrayobject.h:15, - from _cos_doubles.c:253: - /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/npy_deprecated_api.h:11:2: warning: #warning "Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" - /home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include/numpy/__ufunc_api.h:236: warning: ‘_import_umath’ defined but not used - gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/esc/anaconda/lib/python2.7/site-packages/numpy/core/include -I/home/esc/anaconda/include/python2.7 -c cos_doubles.c -o build/temp.linux-x86_64-2.7/cos_doubles.o - gcc -pthread -shared build/temp.linux-x86_64-2.7/_cos_doubles.o build/temp.linux-x86_64-2.7/cos_doubles.o -L/home/esc/anaconda/lib -lpython2.7 -o /home/esc/git-working/scientific-python-lectures/advanced/interfacing_with_c/cython_numpy/cos_doubles.so - $ ls - build/ _cos_doubles.c cos_doubles.c cos_doubles.h _cos_doubles.pyx cos_doubles.so* setup.py test_cos_doubles.py - -And, as before, we convince ourselves that it worked: - -.. literalinclude:: cython_numpy/test_cos_doubles.py - :language: numpy - -.. image:: cython_numpy/test_cos_doubles.png - :scale: 50 - - - -Summary -======= - -In this section four different techniques for interfacing with native code -have been presented. The table below roughly summarizes some of the aspects of -the techniques. - -============ =============== ========= ============= ============= -x Part of CPython Compiled Autogenerated NumPy Support -============ =============== ========= ============= ============= -Python-C-API ``True`` ``True`` ``False`` ``True`` -Ctypes ``True`` ``False`` ``False`` ``True`` -Swig ``False`` ``True`` ``True`` ``True`` -Cython ``False`` ``True`` ``True`` ``True`` -============ =============== ========= ============= ============= - -Of all three presented techniques, Cython is the most modern and advanced. In -particular, the ability to optimize code incrementally by adding types to your -Python code is unique. - -Further Reading and References -============================== - -* `Gaël Varoquaux's blog post about avoiding data copies - `_ provides some insight on how to - handle memory management cleverly. If you ever run into issues with large - datasets, this is a reference to come back to for some inspiration. - -Exercises -========= - -Since this is a brand new section, the exercises are considered more as -pointers as to what to look at next, so pick the ones that you find more -interesting. If you have good ideas for exercises, please let us know! - -#. Download the source code for each example and compile and run them on your - machine. -#. Make trivial changes to each example and convince yourself that this works. ( - E.g. change ``cos`` for ``sin``.) -#. Most of the examples, especially the ones involving NumPy may still be - fragile and respond badly to input errors. Look for ways to crash the - examples, figure what the problem is and devise a potential solution. - Here are some ideas: - - #. Numerical overflow. - #. Input and output arrays that have different lengths. - #. Multidimensional array. - #. Empty array - #. Arrays with non-``double`` types - -#. Use the ``%timeit`` IPython magic to measure the execution time of the - various solutions - - -Python-C-API ------------- - -#. Modify the NumPy example such that the function takes two input arguments, where - the second is the preallocated output array, making it similar to the other NumPy examples. -#. Modify the example such that the function only takes a single input array - and modifies this in place. -#. Try to fix the example to use the new `NumPy iterator protocol - `_. If you - manage to obtain a working solution, please submit a pull-request on github. -#. You may have noticed, that the NumPy-C-API example is the only NumPy example - that does not wrap ``cos_doubles`` but instead applies the ``cos`` function - directly to the elements of the NumPy array. Does this have any advantages - over the other techniques. -#. Can you wrap ``cos_doubles`` using only the NumPy-C-API. You may need to - ensure that the arrays have the correct type, are one dimensional and - contiguous in memory. - -Ctypes ------- - -#. Modify the NumPy example such that ``cos_doubles_func`` handles the preallocation for - you, thus making it more like the NumPy-C-API example. - -SWIG ----- - -#. Look at the code that SWIG autogenerates, how much of it do you - understand? -#. Modify the NumPy example such that ``cos_doubles_func`` handles the preallocation for - you, thus making it more like the NumPy-C-API example. -#. Modify the ``cos_doubles`` C function so that it returns an allocated array. - Can you wrap this using SWIG typemaps? If not, why not? Is there a - workaround for this specific situation? (Hint: you know the size of the - output array, so it may be possible to construct a NumPy array from the - returned ``double *``.) - -Cython ------- - -#. Look at the code that Cython autogenerates. Take a closer look at some of the - comments that Cython inserts. What do you see? -#. Look at the section `Working with NumPy - `_ from the Cython - documentation to learn how to incrementally optimize a pure python script that uses NumPy. -#. Modify the NumPy example such that ``cos_doubles_func`` handles the preallocation for - you, thus making it more like the NumPy-C-API example. diff --git a/advanced/mathematical_optimization/examples/helper/compare_optimizers_py2.pkl b/advanced/mathematical_optimization/examples/helper/compare_optimizers_py2.pkl deleted file mode 100644 index 1b099db93..000000000 --- a/advanced/mathematical_optimization/examples/helper/compare_optimizers_py2.pkl +++ /dev/null @@ -1,17437 +0,0 @@ -(dp0 -I8 -(dp1 -S'Rosenbrock ' -p2 -(dp3 -S'BFGS' -p4 -(lp5 -cnumpy.core.multiarray -scalar -p6 -(cnumpy -dtype -p7 -(S'f8' -p8 -I0 -I1 -tp9 -Rp10 -(I3 -S'<' -p11 -NNNI-1 -I-1 -I0 -tp12 -bS'\x1c\x9d]\x0b&v\xe0?' -p13 -tp14 -Rp15 -ag6 -(g10 -S'\xee\x03\xb6F\xcc\xf8\xec?' -p16 -tp17 -Rp18 -ag6 -(g10 -S'\x02i\xdd\xe9\xe68\xec?' -p19 -tp20 -Rp21 -ag6 -(g10 -S'\x19\x07d-\xf2\xc3\xe6?' -p22 -tp23 -Rp24 -ag6 -(g10 -S'd\x83\x0c\xb8\x1d\x95\xe6?' -p25 -tp26 -Rp27 -ag6 -(g10 -S'\xc2t\x18297\xe4?' -p28 -tp29 -Rp30 -ag6 -(g10 -S'\xe7\xcd$\x98HD\xe9?' -p31 -tp32 -Rp33 -ag6 -(g10 -S'\x843KS\xbaP\xe7?' -p34 -tp35 -Rp36 -ag6 -(g10 -S'B\xd9\xfb\x9a\x10\x94\xed?' -p37 -tp38 -Rp39 -ag6 -(g10 -S'\xa2\x87\xd3\xd3U\x83\xe6?' -p40 -tp41 -Rp42 -ag6 -(g10 -S'\xa8\xeb\xd3\xf5\xe9\xfa\xe4?' -p43 -tp44 -Rp45 -ag6 -(g10 -S'[\xe7\x15\xd0\xb8[\xed?' -p46 -tp47 -Rp48 -ag6 -(g10 -S'\x04\xc8.+\x14u\xe3?' -p49 -tp50 -Rp51 -ag6 -(g10 -S"'\xb8\x913\x08s\xe5?" -p52 -tp53 -Rp54 -ag6 -(g10 -S'\xf6\xe8+)\x94\xf9\xeb?' -p55 -tp56 -Rp57 -ag6 -(g10 -S'\x1a\xf3\xf2\x19\xf3\xf2\xe9?' -p58 -tp59 -Rp60 -ag6 -(g10 -S'\xa8[\xae\x98\xb0\xc9\xe0?' -p61 -tp62 -Rp63 -ag6 -(g10 -S'N\xe6A\xdfp\xfb\xe4?' -p64 -tp65 -Rp66 -ag6 -(g10 -S'\xe0 \xe4\x0e\xfd\xa8\xec?' -p67 -tp68 -Rp69 -ag6 -(g10 -S'\xac\xb1\xc3q\xdf\xd4\xec?' -p70 -tp71 -Rp72 -asS'Nelder-mead' -p73 -(lp74 -g6 -(g10 -S'\xfa\xda\xbf>\xc4s\x02@' -p75 -tp76 -Rp77 -ag6 -(g10 -S'\x14\x83\xfd\xd0lK\x08@' -p78 -tp79 -Rp80 -ag6 -(g10 -S'\xd1\xb9\xf4\xae,\xbb\x05@' -p81 -tp82 -Rp83 -ag6 -(g10 -S'\xb5\xea\xd3w)\xb4\x0b@' -p84 -tp85 -Rp86 -ag6 -(g10 -S'\x94\xac\xdf9s\xa9\x10@' -p87 -tp88 -Rp89 -ag6 -(g10 -S'=\xfd\x0e\x8a\x12\t\x01@' -p90 -tp91 -Rp92 -ag6 -(g10 -S'#\xe6\xfdM\xb5\t\x0c@' -p93 -tp94 -Rp95 -ag6 -(g10 -S'\x8f]\xc6\xe3\x9e+\xff?' -p96 -tp97 -Rp98 -ag6 -(g10 -S'\xf7T\xf1x\x07\xe9\x00@' -p99 -tp100 -Rp101 -ag6 -(g10 -S'\xc0O\x9a\xd1\x06\xae\x06@' -p102 -tp103 -Rp104 -ag6 -(g10 -S'}0\x0beR\xff\x08@' -p105 -tp106 -Rp107 -ag6 -(g10 -S'q\xb7\xce+\xa0\xb1\x05@' -p108 -tp109 -Rp110 -ag6 -(g10 -S'\xf0\xc9\xe12\xe0\x93\x0b@' -p111 -tp112 -Rp113 -ag6 -(g10 -S'\x8d(\xcc\xb2D\xa6\x06@' -p114 -tp115 -Rp116 -ag6 -(g10 -S']\x18\xb2\xae*$\x07@' -p117 -tp118 -Rp119 -ag6 -(g10 -S'Q\x07uP\x07u\x10@' -p120 -tp121 -Rp122 -ag6 -(g10 -S'7H\xf7\x91\x1d)\x07@' -p123 -tp124 -Rp125 -ag6 -(g10 -S'MO\xea\xe0t\x9a\x04@' -p126 -tp127 -Rp128 -ag6 -(g10 -S'\xb1D\x1f\x01s\xb0\x06@' -p129 -tp130 -Rp131 -ag6 -(g10 -S'X\xf1]\x06\xa9[\x07@' -p132 -tp133 -Rp134 -asS'Newton\nw Hessian ' -p135 -(lp136 -g6 -(g10 -S'\xf0Zi\xc1\xb2\x10w?' -p137 -tp138 -Rp139 -asS'Conjugate gradient' -p140 -(lp141 -g6 -(g10 -S'e\xd0\xdc\xed&\xd8\xf0?' -p142 -tp143 -Rp144 -ag6 -(g10 -S'@)w\xe6`4\xf0?' -p145 -tp146 -Rp147 -ag6 -(g10 -S'\xfc\xd9\xc3\xaa\xe3\xdf\xef?' -p148 -tp149 -Rp150 -ag6 -(g10 -S'^\xa7\x00\xb5v\x85\xef?' -p151 -tp152 -Rp153 -ag6 -(g10 -S"r/\xaf\x10\xa0'\xec?" -p154 -tp155 -Rp156 -ag6 -(g10 -S'\xbd \x05\x9b\xff\xb8\xf3?' -p157 -tp158 -Rp159 -ag6 -(g10 -S'\xe7\xcd$\x98HD\xe9?' -p160 -tp161 -Rp162 -ag6 -(g10 -S'\xa0\xa3\x93\xd4\x1bC\xf4?' -p163 -tp164 -Rp165 -ag6 -(g10 -S'x\xdbS\xc5\xe3\x1d\xf2?' -p166 -tp167 -Rp168 -ag6 -(g10 -S'\x1c\xfb\x1e\x91\x13\x84\xeb?' -p169 -tp170 -Rp171 -ag6 -(g10 -S'\xf4\x10z\x08=\x84\xee?' -p172 -tp173 -Rp174 -ag6 -(g10 -S'o\x9dW@\xe3n\xeb?' -p175 -tp176 -Rp177 -ag6 -(g10 -S'7F\xa3[\xa3 \xee?' -p178 -tp179 -Rp180 -ag6 -(g10 -S'\x03$N\xfe\xa7\x93\xf3?' -p181 -tp182 -Rp183 -ag6 -(g10 -S'\xdf*\xd6|\x9a\xc4\xf2?' -p184 -tp185 -Rp186 -ag6 -(g10 -S'\x98\x81\xe6\x97\x81\xe6\xe7?' -p187 -tp188 -Rp189 -ag6 -(g10 -S'5\xec\xc7\x0b\xcc\xec\xee?' -p190 -tp191 -Rp192 -ag6 -(g10 -S'\xb3\xda%\x86Y\xd8\xf2?' -p193 -tp194 -Rp195 -ag6 -(g10 -S'\xe8\xd8\x90\x18\x06\xaf\xed?' -p196 -tp197 -Rp198 -ag6 -(g10 -S'\xac\xb1\xc3q\xdf\xd4\xec?' -p199 -tp200 -Rp201 -asS'Powell' -p202 -(lp203 -g6 -(g10 -S'\xc6\xda\x81\x08\xc2~\x11@' -p204 -tp205 -Rp206 -ag6 -(g10 -S'V\xdb\x07l\x8d\x98\t@' -p207 -tp208 -Rp209 -ag6 -(g10 -S'\xef\x862(\xc7\xf2\x0b@' -p210 -tp211 -Rp212 -ag6 -(g10 -S'\xf5\xd1X\xc5#\x1d\x08@' -p213 -tp214 -Rp215 -ag6 -(g10 -S'\xb7\xca\xd8G\tq\x04@' -p216 -tp217 -Rp218 -ag6 -(g10 -S'\x1f\xe9\xe9wP\x94\x10@' -p219 -tp220 -Rp221 -ag6 -(g10 -S'\xc5$+\x1bV\x15\t@' -p222 -tp223 -Rp224 -ag6 -(g10 -S'}DUP\xd9\x83\x10@' -p225 -tp226 -Rp227 -ag6 -(g10 -S'\xb4\x8d\x00\x96\xcat\x0f@' -p228 -tp229 -Rp230 -ag6 -(g10 -S't\x8e\x81\xea\xae\x9c\x0f@' -p231 -tp232 -Rp233 -ag6 -(g10 -S'^\x9f\xaeO\xd7\xa7\x0b@' -p234 -tp235 -Rp236 -ag6 -(g10 -S'\xc9\xf7\xb1\x9a=I\r@' -p237 -tp238 -Rp239 -ag6 -(g10 -S'\xcf\xbe\x88j\x9d}\t@' -p240 -tp241 -Rp242 -ag6 -(g10 -S'\x8a\x8f\x0b\xc6P\x99\n@' -p243 -tp244 -Rp245 -ag6 -(g10 -S'\rH\xc3\xf4 \xd5\t@' -p246 -tp247 -Rp248 -ag6 -(g10 -S've\x8bue\x8b\x05@' -p249 -tp250 -Rp251 -ag6 -(g10 -S'"\x8b_9\xcd\x9a\x0e@' -p252 -tp253 -Rp254 -ag6 -(g10 -S'kX\x80\xcb+\xff\r@' -p255 -tp256 -Rp257 -ag6 -(g10 -S'\xa7\x7f\xc7>\xba\xdb\x0b@' -p258 -tp259 -Rp260 -ag6 -(g10 -S'\xb3\xee/\xab\xf5\x91\x0b@' -p261 -tp262 -Rp263 -asS'L-BFGS' -p264 -(lp265 -g6 -(g10 -S'\xe6\x04\xd0\x155\xf8\xd6?' -p266 -tp267 -Rp268 -ag6 -(g10 -S'.\xf68O\x01\x92\xd7?' -p269 -tp270 -Rp271 -ag6 -(g10 -S'\xbd^k5\xe7\xe3\xdb?' -p272 -tp273 -Rp274 -ag6 -(g10 -S'K\xecf*\x927\xd9?' -p275 -tp276 -Rp277 -ag6 -(g10 -S'\xe8\t\x87\xb7\xe0\xb3\xd5?' -p278 -tp279 -Rp280 -ag6 -(g10 -S'\xf6\x10\xed\xaf\xb2\xa3\xd9?' -p281 -tp282 -Rp283 -ag6 -(g10 -S'll]\x17\xa1s\xd8?' -p284 -tp285 -Rp286 -ag6 -(g10 -S'N\x16\x9f:g\xa5\xda?' -p287 -tp288 -Rp289 -ag6 -(g10 -S'\xfc\x9a\x10\x94\xbd\xaf\xd9?' -p290 -tp291 -Rp292 -ag6 -(g10 -S')\x14\x88\x16\x98\x82\xd1?' -p293 -tp294 -Rp295 -ag6 -(g10 -S'\xc7-\xe3\x96q\xcb\xd8?' -p296 -tp297 -Rp298 -ag6 -(g10 -S'\xfd\xe5\x88\x14C\xfd\xd9?' -p299 -tp300 -Rp301 -ag6 -(g10 -S'p\xdd.\xf0\x8e\\\xda?' -p302 -tp303 -Rp304 -ag6 -(g10 -S'\xa7\xda\xb5\xf5{\x00\xe0?' -p305 -tp306 -Rp307 -ag6 -(g10 -S'\xbe\xf1\x00z,?\xd5?' -p308 -tp309 -Rp310 -ag6 -(g10 -S'\x91\\\x12\x91\\\x12\xd1?' -p311 -tp312 -Rp313 -ag6 -(g10 -S'\x00V\x9e\xc0\xa1\x9f\xd9?' -p314 -tp315 -Rp316 -ag6 -(g10 -S'\xc4o\xa3\xf5&\xde\xd8?' -p317 -tp318 -Rp319 -ag6 -(g10 -S'\xc0@1\xe8\xd8\x90\xd8?' -p320 -tp321 -Rp322 -ag6 -(g10 -S'\xe9\x1d\xfd\x87\x9cS\xd8?' -p323 -tp324 -Rp325 -asS"L-BFGS \nw f'" -p326 -(lp327 -g6 -(g10 -S'X\x93r\x93\x91\xae\xb2?' -p328 -tp329 -Rp330 -ag6 -(g10 -S'a\x1e\x08V\xc5?\xb3?' -p331 -tp332 -Rp333 -ag6 -(g10 -S'h\xbaJ\xee\xeb\x93\xb6?' -p334 -tp335 -Rp336 -ag6 -(g10 -S'\xc6\x86\tr/t\xb4?' -p337 -tp338 -Rp339 -ag6 -(g10 -S'\xa7}m\n\xc4\x98\xb1?' -p340 -tp341 -Rp342 -ag6 -(g10 -S'\x95\x1d\xcd8\xf0\xe7\xb4?' -p343 -tp344 -Rp345 -ag6 -(g10 -S'U\x174F*\xe3\xb3?' -p346 -tp347 -Rp348 -ag6 -(g10 -S'Hcy\t\xd0\xc2\xb5?' -p349 -tp350 -Rp351 -ag6 -(g10 -S'\xf8\x80\x86bL\xdc\xb4?' -p352 -tp353 -Rp354 -ag6 -(g10 -S'\xebs\x8e\x81\xea\xae\xac?' -p355 -tp356 -Rp357 -ag6 -(g10 -S'\x08xj\xa2\x9b7\xb4?' -p358 -tp359 -Rp360 -ag6 -(g10 -S'------\xb5?' -p361 -tp362 -Rp363 -ag6 -(g10 -S'g\xfc\xe8`\x1eW\xb5?' -p364 -tp365 -Rp366 -ag6 -(g10 -S'\x1f\\\xd4%\x1a\xe0\xb9?' -p367 -tp368 -Rp369 -ag6 -(g10 -S'&M\x89~\xdcG\xb1?' -p370 -tp371 -Rp372 -ag6 -(g10 -S'\x1cn\xdc\x1bn\xdc\xab?' -p373 -tp374 -Rp375 -ag6 -(g10 -S'\xd0N_\xe3.\xda\xb4?' -p376 -tp377 -Rp378 -ag6 -(g10 -S'\xd0\xca\x94\xa7\x7f4\xb4?' -p379 -tp380 -Rp381 -ag6 -(g10 -S'i\xed\xcc3\xf4\xec\xb3?' -p382 -tp383 -Rp384 -ag6 -(g10 -S'&\x8a6\x9eY\xd2\xb3?' -p385 -tp386 -Rp387 -asS"Conjugate gradient\nw f'" -p388 -(lp389 -g6 -(g10 -S'\x8b\xfb`\n\xa5\x1a\xcb?' -p390 -tp391 -Rp392 -ag6 -(g10 -S'\xd3\xc7n\xa6cM\xcb?' -p393 -tp394 -Rp395 -ag6 -(g10 -S'Y\xfe\xdb\x81\xe8m\xca?' -p396 -tp397 -Rp398 -ag6 -(g10 -S'_\xae\x01\xf6\x9e2\xca?' -p399 -tp400 -Rp401 -ag6 -(g10 -S'<\xdc@>\r\xa7\xc2?' -p402 -tp403 -Rp404 -ag6 -(g10 -S'`\xac3U\xe8[\xcf?' -p405 -tp406 -Rp407 -ag6 -(g10 -S':\x91\xf5_\xe5\x0c\xc4?' -p408 -tp409 -Rp410 -ag6 -(g10 -S'\x0eV\x8c\xa9Un\xd1?' -p411 -tp412 -Rp413 -ag6 -(g10 -S'Y\xc0\x9d\xfe\x88\x05\xcc?' -p414 -tp415 -Rp416 -ag6 -(g10 -S'g\xf7E\xd8\xf3-\xc6?' -p417 -tp418 -Rp419 -ag6 -(g10 -S'\x1e\xb8\xdb\xa8:!\xca?' -p420 -tp421 -Rp422 -ag6 -(g10 -S'#R\x0c\xf5\x97#\xc6?' -p423 -tp424 -Rp425 -ag6 -(g10 -S'\xf4\xa5\xfb\xfb\x0f\xfb\xc8?' -p426 -tp427 -Rp428 -ag6 -(g10 -S'\x16\xd6\xda\xe3%\xd4\xca?' -p429 -tp430 -Rp431 -ag6 -(g10 -S'\x9dj\x01\xb7\xc2\xde\xcf?' -p432 -tp433 -Rp434 -ag6 -(g10 -S'\x94\xc4A\x93\xc4A\xc3?' -p435 -tp436 -Rp437 -ag6 -(g10 -S'\xf9A\x10.\xad\xea\xc8?' -p438 -tp439 -Rp440 -ag6 -(g10 -S'70_{\xbfN\xce?' -p441 -tp442 -Rp443 -ag6 -(g10 -S'\x07$<\x89\xab(\xcb?' -p444 -tp445 -Rp446 -ag6 -(g10 -S'\xa6-\xec&\xd4>\xc7?' -p447 -tp448 -Rp449 -asS"BFGS\nw f'" -p450 -(lp451 -g6 -(g10 -S'\xce\x8a\xfb`\n\xa5\xba?' -p452 -tp453 -Rp454 -ag6 -(g10 -S'vI\xe5\xc3\xb8_\xc7?' -p455 -tp456 -Rp457 -ag6 -(g10 -S'\xb7\xf1\x11\xd0\xeb\xb5\xc6?' -p458 -tp459 -Rp460 -ag6 -(g10 -S'\xbf\x9d+\x998Z\xc2?' -p461 -tp462 -Rp463 -ag6 -(g10 -S'\x97\x8f9\xe8\xbbz\xc1?' -p464 -tp465 -Rp466 -ag6 -(g10 -S'\x04\x7fN1\xab^\xc0?' -p467 -tp468 -Rp469 -ag6 -(g10 -S'\x05\x85x\x93[`\xc4?' -p470 -tp471 -Rp472 -ag6 -(g10 -S'"\x05\x86\xc9\xd3\xdf\xc2?' -p473 -tp474 -Rp475 -ag6 -(g10 -S'\xa9 \x92\xb0\xeap\xc8?' -p476 -tp477 -Rp478 -ag6 -(g10 -S'\xa04\xa3\r\\-\xc2?' -p479 -tp480 -Rp481 -ag6 -(g10 -S'\xa2L\xea\xbf\x8e\xf9\xc0?' -p482 -tp483 -Rp484 -ag6 -(g10 -S'\xae\xf3\nh\xdc\xad\xc7?' -p485 -tp486 -Rp487 -ag6 -(g10 -S'y\xbet\x7f\xffa\xbf?' -p488 -tp489 -Rp490 -ag6 -(g10 -S'v\x12\x9aw\xb0K\xc1?' -p491 -tp492 -Rp493 -ag6 -(g10 -S'g\xf2\xef\xe5\x0b\xce\xc6?' -p494 -tp495 -Rp496 -ag6 -(g10 -S'\x95xY\x94xY\xc4?' -p497 -tp498 -Rp499 -ag6 -(g10 -S'\x10\x03^\n\xc86\xbb?' -p500 -tp501 -Rp502 -ag6 -(g10 -S'\xf1\xf0\xf0\xf0\xf0\xf0\xc0?' -p503 -tp504 -Rp505 -ag6 -(g10 -S'\xe7C\x89b\x87\x10\xc7?' -p506 -tp507 -Rp508 -ag6 -(g10 -S'~x\xf7\xbcY\xf7\xc7?' -p509 -tp510 -Rp511 -assS'Well-conditioned quadratic' -p512 -(dp513 -g4 -(lp514 -g6 -(g10 -S'\xec\x84\xb95;T\xf1?' -p515 -tp516 -Rp517 -ag6 -(g10 -S'\n\xa6)\x89-f\xf0?' -p518 -tp519 -Rp520 -ag6 -(g10 -S'j)\xb5\x94ZJ\xed?' -p521 -tp522 -Rp523 -ag6 -(g10 -S'\xf6(\\\x8f\xc2\xf5\xee?' -p524 -tp525 -Rp526 -ag6 -(g10 -S'\x14(U\xf4\xfdx\xf1?' -p527 -tp528 -Rp529 -ag6 -(g10 -S"\xed'K`\xd3~\xf2?" -p530 -tp531 -Rp532 -ag6 -(g10 -S'\xb9\xc7\xc92\x1e\x04\xf2?' -p533 -tp534 -Rp535 -ag6 -(g10 -S'D\xd8e\xc6\xa7~\xf4?' -p536 -tp537 -Rp538 -ag6 -(g10 -S'\xd7\x01\xdd\x98\xa7\x8f\xf2?' -p539 -tp540 -Rp541 -ag6 -(g10 -S'\x04:\x02\x94u9\xed?' -p542 -tp543 -Rp544 -ag6 -(g10 -S'\xca5\x08\x0c\x96\xb8\xf0?' -p545 -tp546 -Rp547 -ag6 -(g10 -S'\xcd\xbfL\xeeS#\xf1?' -p548 -tp549 -Rp550 -ag6 -(g10 -S'\xf6z\xbd^\xaf\xd7\xf3?' -p551 -tp552 -Rp553 -ag6 -(g10 -S'\xb0\xfe.c=\x91\xed?' -p554 -tp555 -Rp556 -ag6 -(g10 -S'=\xaf\\\xab\x13\x9a\xf1?' -p557 -tp558 -Rp559 -ag6 -(g10 -S'\xea\xa7\xa0=x\xbf\xe9?' -p560 -tp561 -Rp562 -ag6 -(g10 -S'\x83\x80\xa8\xff\xe4\xaa\xf4?' -p563 -tp564 -Rp565 -ag6 -(g10 -S'\xf5Z\x1b\xd8D\x86\xf1?' -p566 -tp567 -Rp568 -ag6 -(g10 -S'G\xe1z\x14\xaeG\xf1?' -p569 -tp570 -Rp571 -ag6 -(g10 -S'o2\xdf\xef\x95\x87\xf0?' -p572 -tp573 -Rp574 -asg73 -(lp575 -g6 -(g10 -S'JX\xc7mE\\\x0c@' -p576 -tp577 -Rp578 -ag6 -(g10 -S'\x05\xd3\x94\xc4\x163\x11@' -p579 -tp580 -Rp581 -ag6 -(g10 -S'\x07E\x83\xa2A\xd1\x12@' -p582 -tp583 -Rp584 -ag6 -(g10 -S'\\\x8f\xc2\xf5(\x1c\x10@' -p585 -tp586 -Rp587 -ag6 -(g10 -S'\x8a\x9b\x19\x9a\xba\x1b\x0e@' -p588 -tp589 -Rp590 -ag6 -(g10 -S'\x1b\xc8\x96\xdf\xa4\x81\x08@' -p591 -tp592 -Rp593 -ag6 -(g10 -S'Y\xa3\x8a\x9e\x95\xce\x07@' -p594 -tp595 -Rp596 -ag6 -(g10 -S'Re\x10e\xb5\xc3\x08@' -p597 -tp598 -Rp599 -ag6 -(g10 -S'1O\x1f\xad&!\x0b@' -p600 -tp601 -Rp602 -ag6 -(g10 -S'O\xeeY\xf6\xd3W\x10@' -p603 -tp604 -Rp605 -ag6 -(g10 -S'\xc1\x12\x17z\xe32\x11@' -p606 -tp607 -Rp608 -ag6 -(g10 -S'B1\x11\xfb#\x90\r@' -p609 -tp610 -Rp611 -ag6 -(g10 -S'\xf4\xf9|>\x9f\xcf\x07@' -p612 -tp613 -Rp614 -ag6 -(g10 -S'\xd8\x18\xe9\xe9R\t\x11@' -p615 -tp616 -Rp617 -ag6 -(g10 -S'\x83+\xaa_m\x83\x0b@' -p618 -tp619 -Rp620 -ag6 -(g10 -S'"a\xf2QZ\x1b\x13@' -p621 -tp622 -Rp623 -ag6 -(g10 -S'L\x85"\xa7\x93\x90\x0b@' -p624 -tp625 -Rp626 -ag6 -(g10 -S'S\xa7\xf6\x021\x03\x06@' -p627 -tp628 -Rp629 -ag6 -(g10 -S'_\xca\xbaU\x12\x01@' -p681 -tp682 -Rp683 -ag6 -(g10 -S'\xb4\xd2G]\x05*\x05@' -p684 -tp685 -Rp686 -ag6 -(g10 -S'h\x0f\xdeo\xfe\x15\xff?' -p687 -tp688 -Rp689 -ag6 -(g10 -S'X6\xc5\xdb\xd1\xc2\xfe?' -p690 -tp691 -Rp692 -ag6 -(g10 -S'#\xc38\r\x02]\x08@' -p693 -tp694 -Rp695 -ag6 -(g10 -S'\x01\xb1\xa94\xe4\xdc\x07@' -p696 -tp697 -Rp698 -ag6 -(g10 -S'\xe6\x99z\xcfr8\x03@' -p699 -tp700 -Rp701 -asg202 -(lp702 -g6 -(g10 -S'\xaa\x7f\x93\x1b\x1f\xfe\xd6?' -p703 -tp704 -Rp705 -ag6 -(g10 -S'#\xc5\x11`\x9fe\xd5?' -p706 -tp707 -Rp708 -ag6 -(g10 -S'\x15\xa8\nT\x05\xaa\xd2?' -p709 -tp710 -Rp711 -ag6 -(g10 -S'\xed|?5^\xba\xd3?' -p712 -tp713 -Rp714 -ag6 -(g10 -S'5\xa7\x1b!\x89Z\xd7?' -p715 -tp716 -Rp717 -ag6 -(g10 -S'L\x7f\xd1\x02\xba\xf4\xd7?' -p718 -tp719 -Rp720 -ag6 -(g10 -S'\xdc\xc0kw\xcaU\xd7?' -p721 -tp722 -Rp723 -ag6 -(g10 -S'\x06\xac\xf7\x9f\xd4k\xe3?' -p724 -tp725 -Rp726 -ag6 -(g10 -S'\x0b.\x95\xed]\x07\xe4?' -p727 -tp728 -Rp729 -ag6 -(g10 -S'\xc8\x14\x10\xf4\xc2\x10\xd3?' -p730 -tp731 -Rp732 -ag6 -(g10 -S'\xd3X\xf9\x9dH>\xe0?' -p733 -tp734 -Rp735 -ag6 -(g10 -S'\x18cZ\x12k\\\xd6?' -p736 -tp737 -Rp738 -ag6 -(g10 -S'y<\x1e\x8f\xc7\xe3\xd9?' -p739 -tp740 -Rp741 -ag6 -(g10 -S'\xf01\x06G\xe3p\xdc?' -p742 -tp743 -Rp744 -ag6 -(g10 -S'\x93|z?q\xcc\xd6?' -p745 -tp746 -Rp747 -ag6 -(g10 -S'\xb9h>\xed\x075\xd1?' -p748 -tp749 -Rp750 -ag6 -(g10 -S'\x97\xd4\x9a\xc7\x98%\xda?' -p751 -tp752 -Rp753 -ag6 -(g10 -S'r\x84\xc9\x04\xd9\x18\xe1?' -p754 -tp755 -Rp756 -ag6 -(g10 -S'\x11\x8f\x05\x07\xb8a\xd6?' -p757 -tp758 -Rp759 -ag6 -(g10 -S'\xb52}\xacr\xe9\xd4?' -p760 -tp761 -Rp762 -asg264 -(lp763 -g6 -(g10 -S'\x8d\xf2\\e\xd5\x98\xe2?' -p764 -tp765 -Rp766 -ag6 -(g10 -S'\xed\xae\xd9X\xe0\x1c\xdc?' -p767 -tp768 -Rp769 -ag6 -(g10 -S'\x83dA\xb2 Y\xe0?' -p770 -tp771 -Rp772 -ag6 -(g10 -S'\x8f\xc2\xf5(\\\x8f\xe0?' -p773 -tp774 -Rp775 -ag6 -(g10 -S'\xcfRY\x07\xe5\x0b\xe1?' -p776 -tp777 -Rp778 -ag6 -(g10 -S'4R1\xb7:#\xe5?' -p779 -tp780 -Rp781 -ag6 -(g10 -S'P\xff\xdc\x06^\xbb\xe3?' -p782 -tp783 -Rp784 -ag6 -(g10 -S'D\xd8e\xc6\xa7~\xe4?' -p785 -tp786 -Rp787 -ag6 -(g10 -S'}\xb4\x9a\x84(\xfe\xe2?' -p788 -tp789 -Rp790 -ag6 -(g10 -S'r\xe7&!\xb8\x9d\xde?' -p791 -tp792 -Rp793 -ag6 -(g10 -S'\\\xe0\xdeS\x7f\xd9\xdf?' -p794 -tp795 -Rp796 -ag6 -(g10 -S'Q\xe1\xee\x8b?\xf4\xe1?' -p797 -tp798 -Rp799 -ag6 -(g10 -S'\xb9\\.\x97\xcb\xe5\xe2?' -p800 -tp801 -Rp802 -ag6 -(g10 -S'@\xffc\xa6G\xe5\xe0?' -p803 -tp804 -Rp805 -ag6 -(g10 -S'\x95\xb7H\xe4\xa6p\xe2?' -p806 -tp807 -Rp808 -ag6 -(g10 -S'V\x00\xe9\xd5\xf4S\xe0?' -p809 -tp810 -Rp811 -ag6 -(g10 -S"'<\x90\x82J\xfe\xe8?" -p812 -tp813 -Rp814 -ag6 -(g10 -S'\x83\x8b\xb4\xf8_\xa9\xe3?' -p815 -tp816 -Rp817 -ag6 -(g10 -S'6\x95\x86\x9c\xfb\xec\xe2?' -p818 -tp819 -Rp820 -ag6 -(g10 -S'\xccl\xa4=-%\xe0?' -p821 -tp822 -Rp823 -asS"L-BFGS \nw f'" -p824 -(lp825 -g6 -(g10 -S'\x15GRwtn\xbe?' -p826 -tp827 -Rp828 -ag6 -(g10 -S'\xd4\x8f\xf1\x81n\x1d\xb7?' -p829 -tp830 -Rp831 -ag6 -(g10 -S'\xd6\xcejg\xb5\xb3\xba?' -p832 -tp833 -Rp834 -ag6 -(g10 -S'\xdfO\x8d\x97n\x12\xbb?' -p835 -tp836 -Rp837 -ag6 -(g10 -S'\x86\xa6\xee\x86\xc9\xf4\xbb?' -p838 -tp839 -Rp840 -ag6 -(g10 -S'3\x145\xaf+C\xc1?' -p841 -tp842 -Rp843 -ag6 -(g10 -S'\xc5=N\x96\xf1 \xc0?' -p844 -tp845 -Rp846 -ag6 -(g10 -S'?\x85\x00\xb6B\xc9\xc0?' -p847 -tp848 -Rp849 -ag6 -(g10 -S'\x9e>ZMB\x14\xbf?' -p850 -tp851 -Rp852 -ag6 -(g10 -S'\xba1\x94\xec\xad\x0c\xb9?' -p853 -tp854 -Rp855 -ag6 -(g10 -S'\xc7\x84,+\xde\x1d\xba?' -p856 -tp857 -Rp858 -ag6 -(g10 -S'\x84\xb6\xcc*"a\xbd?' -p859 -tp860 -Rp861 -ag6 -(g10 -S'\xbf\xdf\xef\xf7\xfb\xfd\xbe?' -p862 -tp863 -Rp864 -ag6 -(g10 -S'`\x98\xe7\xb1\x9f\x98\xbb?' -p865 -tp866 -Rp867 -ag6 -(g10 -S'h,1\x01\xb4,\xbe?' -p868 -tp869 -Rp870 -ag6 -(g10 -S'M\x10\xf6T\x8b\xa0\xba?' -p871 -tp872 -Rp873 -ag6 -(g10 -S'g\xdaen\x11a\xc4?' -p874 -tp875 -Rp876 -ag6 -(g10 -S'kN8\xa3<\x12\xc0?' -p877 -tp878 -Rp879 -ag6 -(g10 -S'\xb56\x0fd~\xf0\xbe?' -p880 -tp881 -Rp882 -ag6 -(g10 -S'\xe5\x83\xcb\x7f\x89r\xba?' -p883 -tp884 -Rp885 -asS"Conjugate gradient\nw f'" -p886 -(lp887 -g6 -(g10 -S'\x85[b-\x1c\xa7\xe0?' -p888 -tp889 -Rp890 -ag6 -(g10 -S'\xd7q\xff\x04\xd3\x94\xdc?' -p891 -tp892 -Rp893 -ag6 -(g10 -S':/\x9d\x97\xceK\xd7?' -p894 -tp895 -Rp896 -ag6 -(g10 -S'\xebQ\xb8\x1e\x85\xeb\xe0?' -p897 -tp898 -Rp899 -ag6 -(g10 -S'\xec\xd2`\xf6\x84G\xe0?' -p900 -tp901 -Rp902 -ag6 -(g10 -S'(,bOw\xc2\xe2?' -p903 -tp904 -Rp905 -ag6 -(g10 -S'\xd3\xf9\xc4=N\x96\xe1?' -p906 -tp907 -Rp908 -ag6 -(g10 -S'\xe5\x18WL\xf0\xe2\xe8?' -p909 -tp910 -Rp911 -ag6 -(g10 -S'I\x88\xe2/r\x86\xdd?' -p912 -tp913 -Rp914 -ag6 -(g10 -S'\xb3o\xd6\xdf\x17z\xde?' -p915 -tp916 -Rp917 -ag6 -(g10 -S'\x19y1\xa5\xa2F\xda?' -p918 -tp919 -Rp920 -ag6 -(g10 -S'\xbcg\x1a\xf4\x83=\xe0?' -p921 -tp922 -Rp923 -ag6 -(g10 -S'D"\x91H$\x12\xe5?' -p924 -tp925 -Rp926 -ag6 -(g10 -S'\xd8~/\xcb#\x0c\xda?' -p927 -tp928 -Rp929 -ag6 -(g10 -S'\xcc\x0fQ\xd0\xdf\x03\xe1?' -p930 -tp931 -Rp932 -ag6 -(g10 -S'S\xcfc:\x01\x01\xdb?' -p933 -tp934 -Rp935 -ag6 -(g10 -S'c\xb1\xbd\x89\x81\xf9\xe6?' -p936 -tp937 -Rp938 -ag6 -(g10 -S'\x01\xb5b%\xf4;\xe3?' -p939 -tp940 -Rp941 -ag6 -(g10 -S'3\xf0!\xa4-,\xe3?' -p942 -tp943 -Rp944 -ag6 -(g10 -S'\x0c\xd9\x0c\x02\xa1\x86\xd7?' -p945 -tp946 -Rp947 -asS"BFGS\nw f'" -p948 -(lp949 -g6 -(g10 -S'-6!\xda\x87\x10\xcc?' -p950 -tp951 -Rp952 -ag6 -(g10 -S'5%\xb1\xc5\x0c\x8d\xca?' -p953 -tp954 -Rp955 -ag6 -(g10 -S'\xbe\xd1\xdeho\xb4\xc7?' -p956 -tp957 -Rp958 -ag6 -(g10 -S'u\x93\x18\x04V\x0e\xc9?' -p959 -tp960 -Rp961 -ag6 -(g10 -S'$Q\xeb\xaa\x10L\xcc?' -p962 -tp963 -Rp964 -ag6 -(g10 -S'\x1f\xdf\x85\x83\xe8\xf1\xcd?' -p965 -tp966 -Rp967 -ag6 -(g10 -S'\x13\xb1F\x15=+\xcd?' -p968 -tp969 -Rp970 -ag6 -(g10 -S'\xeee\x8f\xddJ\x97\xd0?' -p971 -tp972 -Rp973 -ag6 -(g10 -S'\x10\xc5_\xe4\x0c\x0b\xce?' -p974 -tp975 -Rp976 -ag6 -(g10 -S'L\x84o_k\xa8\xc7?' -p977 -tp978 -Rp979 -ag6 -(g10 -S'\xb5>J\x07y\x12\xcb?' -p980 -tp981 -Rp982 -ag6 -(g10 -S'}s\x88\xefJ\xbf\xcb?' -p983 -tp984 -Rp985 -ag6 -(g10 -S'\x04\x02\x81@ \x10\xd0?' -p986 -tp987 -Rp988 -ag6 -(g10 -S'\xf0\xfeb\xd6z\xef\xc7?' -p989 -tp990 -Rp991 -ag6 -(g10 -S'\xb7\x1bY\x8f\x8d\x7f\xcc?' -p992 -tp993 -Rp994 -ag6 -(g10 -S'\x9d\xf4\r\x97{\xd9\xc4?' -p995 -tp996 -Rp997 -ag6 -(g10 -S'Y\xa2\x19\xe9\xee\xb9\xd0?' -p998 -tp999 -Rp1000 -ag6 -(g10 -S'\xbe\xd6\x066\x91a\xcc?' -p1001 -tp1002 -Rp1003 -ag6 -(g10 -S'\xd5\xf2\xc6\x08&\xfa\xcb?' -p1004 -tp1005 -Rp1006 -ag6 -(g10 -S'h\x88\xfa\xa7C\xc1\xca?' -p1007 -tp1008 -Rp1009 -assS'Ill-conditioned Gaussian' -p1010 -(dp1011 -g4 -(lp1012 -g6 -(g10 -S'\xcd\xe0&\x08L\xfb\xed?' -p1013 -tp1014 -Rp1015 -ag6 -(g10 -S'3\xf4\xa9M\xb4\x04\xf9?' -p1016 -tp1017 -Rp1018 -ag6 -(g10 -S'\x10Q$`\x8d\xc8\xf1?' -p1019 -tp1020 -Rp1021 -ag6 -(g10 -S'd\x1a\xc77\xad\xb6\xd4?' -p1022 -tp1023 -Rp1024 -ag6 -(g10 -S'\xc2Su\xfc\x07\xf8\xc1?' -p1025 -tp1026 -Rp1027 -ag6 -(g10 -S'l\x11\xc9\xc0\x97\xc6\xea?' -p1028 -tp1029 -Rp1030 -ag6 -(g10 -S'\x86\x94\xce\xeb\xa7p\xf0?' -p1031 -tp1032 -Rp1033 -ag6 -(g10 -S'E\x8ci\xbcA"\xe6?' -p1034 -tp1035 -Rp1036 -ag6 -(g10 -S'\x11\xf7\xed\x0e[\xcc\xd9?' -p1037 -tp1038 -Rp1039 -ag6 -(g10 -S'2\x99L&\x93\xc9\xf4?' -p1040 -tp1041 -Rp1042 -ag6 -(g10 -S'\xb0\x1ca\x10-\x8e\xce?' -p1043 -tp1044 -Rp1045 -ag6 -(g10 -S'\xcdj\xff\xd3\x05\xb8\xf5?' -p1046 -tp1047 -Rp1048 -ag6 -(g10 -S'\x0c\xe1E\x01\x01\x95\xe0?' -p1049 -tp1050 -Rp1051 -ag6 -(g10 -S'\x1ff\xe47\xd5I\xe7?' -p1052 -tp1053 -Rp1054 -ag6 -(g10 -S'%=IO\xd2\x93\xe4?' -p1055 -tp1056 -Rp1057 -ag6 -(g10 -S'M\xd2\xea\x08hh\xd7?' -p1058 -tp1059 -Rp1060 -ag6 -(g10 -S'\xbf\x0e\x02$v\x93\xd4?' -p1061 -tp1062 -Rp1063 -ag6 -(g10 -S'Z\xebT-\x9c2\xea?' -p1064 -tp1065 -Rp1066 -ag6 -(g10 -S'm5\xad^Nl\xce?' -p1067 -tp1068 -Rp1069 -ag6 -(g10 -S'\xda\xc2\xc4\xf4|\xf3\xb6?' -p1070 -tp1071 -Rp1072 -asg73 -(lp1073 -g6 -(g10 -S'\xfb\xfa\x1f\xb4c\x04\xf4?' -p1074 -tp1075 -Rp1076 -ag6 -(g10 -S'R3(J\xad\xf6\xec?' -p1077 -tp1078 -Rp1079 -ag6 -(g10 -S'e\xb0@\xd3;-\xeb?' -p1080 -tp1081 -Rp1082 -ag6 -(g10 -S'x\xec\xeeB6\xdb\xc8?' -p1083 -tp1084 -Rp1085 -ag6 -(g10 -S'\xc3\xd3w\x9c\r\xa0\xbe?' -p1086 -tp1087 -Rp1088 -ag6 -(g10 -S'"!\xa4D\xafO\xdb?' -p1089 -tp1090 -Rp1091 -ag6 -(g10 -S'\xd036XX!\xf3?' -p1092 -tp1093 -Rp1094 -ag6 -(g10 -S'U!\xf8\xa8[\xaa\xf0?' -p1095 -tp1096 -Rp1097 -ag6 -(g10 -S'\x95\x19\x85l\xb5\xd9\xce?' -p1098 -tp1099 -Rp1100 -ag6 -(g10 -S'\x04\x02\x81@ \x10\xec?' -p1101 -tp1102 -Rp1103 -ag6 -(g10 -S'l\xb56\xa2W\x8e\xd0?' -p1104 -tp1105 -Rp1106 -ag6 -(g10 -S'$I\x92$I\x92\xe4?' -p1107 -tp1108 -Rp1109 -ag6 -(g10 -S'/\xe1{\x03|9\xde?' -p1110 -tp1111 -Rp1112 -ag6 -(g10 -S'`\xbc\x95}\x0e\xa9\xf1?' -p1113 -tp1114 -Rp1115 -ag6 -(g10 -S'\xbb\xddn\xb7\xdb\xed\xd6?' -p1116 -tp1117 -Rp1118 -ag6 -(g10 -S'\x1b\n\x98\xbaM\x02\xcd?' -p1119 -tp1120 -Rp1121 -ag6 -(g10 -S'\xdc\x06x4\x96D\xe0?' -p1122 -tp1123 -Rp1124 -ag6 -(g10 -S'\xa5z\x8a\xc4\xee\xde\xf4?' -p1125 -tp1126 -Rp1127 -ag6 -(g10 -S'\xe4\x87\x06\x89\x82\xe2\xd0?' -p1128 -tp1129 -Rp1130 -ag6 -(g10 -S'\xd7+\x08\xd9 \x8b\xbd?' -p1131 -tp1132 -Rp1133 -asS'Newton\nw Hessian ' -p1134 -(lp1135 -g6 -(g10 -S'\x84\xe3V\x1f\x88\x00I?' -p1136 -tp1137 -Rp1138 -asg140 -(lp1139 -g6 -(g10 -S'\x03\x89V\xd8\x1cH\x04@' -p1140 -tp1141 -Rp1142 -ag6 -(g10 -S'\x99\xf3\xe0\xdaQ3\x08@' -p1143 -tp1144 -Rp1145 -ag6 -(g10 -S'1\xf3l \xa8Y\x11@' -p1146 -tp1147 -Rp1148 -ag6 -(g10 -S'\xac\xd7\xae\x17\xd0\x94\x12@' -p1149 -tp1150 -Rp1151 -ag6 -(g10 -S'\xf8\xfdm{\xb7\xd5 @' -p1152 -tp1153 -Rp1154 -ag6 -(g10 -S'\x94sBF\x1e\xc2\t@' -p1155 -tp1156 -Rp1157 -ag6 -(g10 -S'\x14\xear)\xf8\xac\x0e@' -p1158 -tp1159 -Rp1160 -ag6 -(g10 -S'\x01\xce\x8b\x82v\x00\r@' -p1161 -tp1162 -Rp1163 -ag6 -(g10 -S'\xdb\xc4\x9b\xb3\x95\xc9\x15@' -p1164 -tp1165 -Rp1166 -ag6 -(g10 -S'\xd5j\xb5Z\xadV\r@' -p1167 -tp1168 -Rp1169 -ag6 -(g10 -S'\x91\xbe\x94%\xe4x\x1c@' -p1170 -tp1171 -Rp1172 -ag6 -(g10 -S'{\xe5-\t5\xa4\x11@' -p1173 -tp1174 -Rp1175 -ag6 -(g10 -S'\x9d{\x97wC5\x15@' -p1176 -tp1177 -Rp1178 -ag6 -(g10 -S'\xf7\xc3\x8c\xfc\xa6:\t@' -p1179 -tp1180 -Rp1181 -ag6 -(g10 -S':\x9dN\xa7\xd3\xe9\x10@' -p1182 -tp1183 -Rp1184 -ag6 -(g10 -S'\xed\xb9\x12t\x8e\x17\x13@' -p1185 -tp1186 -Rp1187 -ag6 -(g10 -S'S5%\x0bC\x15\x10@' -p1188 -tp1189 -Rp1190 -ag6 -(g10 -S'\x01\x9a \x0eu\xee\x08@' -p1191 -tp1192 -Rp1193 -ag6 -(g10 -S'4p~\xf5DD\x1b@' -p1194 -tp1195 -Rp1196 -ag6 -(g10 -S'\x86\xd0\x90\x89\x8d\xab\x1d@' -p1197 -tp1198 -Rp1199 -asg202 -(lp1200 -g6 -(g10 -S'\xbbu\xb1\x14\xfc\xb8\xf4?' -p1201 -tp1202 -Rp1203 -ag6 -(g10 -S'e\xa4\xdee)\xdc\xf2?' -p1204 -tp1205 -Rp1206 -ag6 -(g10 -S'\xea\x14\xfd5\x99m\xe4?' -p1207 -tp1208 -Rp1209 -ag6 -(g10 -S'\x03\xf4\x83\xe3\x16b\xd1?' -p1210 -tp1211 -Rp1212 -ag6 -(g10 -S's\x9c\x03 \x08H\xb2?' -p1213 -tp1214 -Rp1215 -ag6 -(g10 -S'D:\x9c\x17\x08+\xdc?' -p1216 -tp1217 -Rp1218 -ag6 -(g10 -S'\\\x8b\xa1\xc0z|\xf1?' -p1219 -tp1220 -Rp1221 -ag6 -(g10 -S'\xc3\xe3\xf4Wr\xe1\xfd?' -p1222 -tp1223 -Rp1224 -ag6 -(g10 -S'\x15c<\xa0<\x0e\xdc?' -p1225 -tp1226 -Rp1227 -ag6 -(g10 -S'H$\x12\x89D"\xf3?' -p1228 -tp1229 -Rp1230 -ag6 -(g10 -S'\xba\x0b5\xee\xf9.\xbd?' -p1231 -tp1232 -Rp1233 -ag6 -(g10 -S'>z\x8d9y\xc0\xe3?' -p1234 -tp1235 -Rp1236 -ag6 -(g10 -S'OY\x97AA\xba\xd4?' -p1237 -tp1238 -Rp1239 -ag6 -(g10 -S'\x9e\xfeu\x17*q\x04@' -p1240 -tp1241 -Rp1242 -ag6 -(g10 -S'\x1f\xba\x87\xee\xa1{\xe8?' -p1243 -tp1244 -Rp1245 -ag6 -(g10 -S'\xba\x12t\x8e\x17W\xcc?' -p1246 -tp1247 -Rp1248 -ag6 -(g10 -S'\xe3\x8a~\xa6\xe1l\xe1?' -p1249 -tp1250 -Rp1251 -ag6 -(g10 -S'\xf8/\xfb\x8eW\x8c\x00@' -p1252 -tp1253 -Rp1254 -ag6 -(g10 -S'$5N\xc8Z\xee\xc4?' -p1255 -tp1256 -Rp1257 -ag6 -(g10 -S'\xa0\x8c7\x97\x87\xe7\xa8?' -p1258 -tp1259 -Rp1260 -asg264 -(lp1261 -g6 -(g10 -S'E#\x7f\xf5"\xc1\x00@' -p1262 -tp1263 -Rp1264 -ag6 -(g10 -S'bY[\x14\xb2>\xf3?' -p1265 -tp1266 -Rp1267 -ag6 -(g10 -S'\x10Q$`\x8d\xc8\xf1?' -p1268 -tp1269 -Rp1270 -ag6 -(g10 -S'\xc0\xca\xb0%d.\xc9?' -p1271 -tp1272 -Rp1273 -ag6 -(g10 -S'p\r<\x1e\x07\x04\xc0?' -p1274 -tp1275 -Rp1276 -ag6 -(g10 -S'a\x06=\xff\x07\xac\xe1?' -p1277 -tp1278 -Rp1279 -ag6 -(g10 -S'\xabH{\xa0x\xec\xe4?' -p1280 -tp1281 -Rp1282 -ag6 -(g10 -S'\x0f\xc7\x02j\xa3\x87\xe3?' -p1283 -tp1284 -Rp1285 -ag6 -(g10 -S'j\xbaT\xe9\xb8\x1f\xd0?' -p1286 -tp1287 -Rp1288 -ag6 -(g10 -S'\xcb\xe5r\xb9\\.\xef?' -p1289 -tp1290 -Rp1291 -ag6 -(g10 -S'\x19D\x8b\xa3\xe7r\xc6?' -p1292 -tp1293 -Rp1294 -ag6 -(g10 -S'@\xd2\x81\xc9\xed \xe6?' -p1295 -tp1296 -Rp1297 -ag6 -(g10 -S'<\xde\xd7\xd2\xe9\x16\xd2?' -p1298 -tp1299 -Rp1300 -ag6 -(g10 -S'\x7f\xdd\x85J\x1c\r\xdf?' -p1301 -tp1302 -Rp1303 -ag6 -(g10 -S'M&\x93\xc9d2\xd9?' -p1304 -tp1305 -Rp1306 -ag6 -(g10 -S'\x86\xc7\x17\xc0k>\xc8?' -p1307 -tp1308 -Rp1309 -ag6 -(g10 -S'"\xd5\x118US\xf2?' -p1310 -tp1311 -Rp1312 -ag6 -(g10 -S'Z\xebT-\x9c2\xea?' -p1313 -tp1314 -Rp1315 -ag6 -(g10 -S'\x00\xcc-\x91s\t\xc1?' -p1316 -tp1317 -Rp1318 -ag6 -(g10 -S'\xbbm\x803\x9aR\xac?' -p1319 -tp1320 -Rp1321 -asS"L-BFGS \nw f'" -p1322 -(lp1323 -g6 -(g10 -S'\xb3\xe7z\xf0Bu\xc4?' -p1324 -tp1325 -Rp1326 -ag6 -(g10 -S'\x19\x9c\x8f\xc1\xf9\x18\xcc?' -p1327 -tp1328 -Rp1329 -ag6 -(g10 -S'\x16\xdf\xe0?b\xe6\xc9?' -p1330 -tp1331 -Rp1332 -ag6 -(g10 -S'\x1e\xf71\x08\xef\xd1\xa3?' -p1333 -tp1334 -Rp1335 -ag6 -(g10 -S'T\x06=N\t\xf0\x94?' -p1336 -tp1337 -Rp1338 -ag6 -(g10 -S'N\xcc\x9e|ck\xb8?' -p1339 -tp1340 -Rp1341 -ag6 -(g10 -S'\xf4\x1e\xf1\x0f8V\xc1?' -p1342 -tp1343 -Rp1344 -ag6 -(g10 -S'J`\xb0\xcb\x08%\xc0?' -p1345 -tp1346 -Rp1347 -ag6 -(g10 -S'\x86B+Q\x02\xab\xad?' -p1348 -tp1349 -Rp1350 -ag6 -(g10 -S'2\x99L&\x93\xc9\xc4?' -p1351 -tp1352 -Rp1353 -ag6 -(g10 -S'\x82\x8f.\xb0?n\x9e?' -p1354 -tp1355 -Rp1356 -ag6 -(g10 -S'P\xb8\xdb\xccj\xff\xc3?' -p1357 -tp1358 -Rp1359 -ag6 -(g10 -S'\x0c\xfc\xc6>\xd3\x8b\xad?' -p1360 -tp1361 -Rp1362 -ag6 -(g10 -S'\x958\x1a>B\xe0\xb9?' -p1363 -tp1364 -Rp1365 -ag6 -(g10 -S'\xa1%h\tZ\x82\xb6?' -p1366 -tp1367 -Rp1368 -ag6 -(g10 -S'T\n\x01\xea\x87\x0f\xa3?' -p1369 -tp1370 -Rp1371 -ag6 -(g10 -S'A\xfd\xcf\xb7\x90\xe4\xaf?' -p1372 -tp1373 -Rp1374 -ag6 -(g10 -S'\x95\xfcu\x88!\xa8\xc5?' -p1375 -tp1376 -Rp1377 -ag6 -(g10 -S'm\xc5\xbb\x1d%\xcb\xa0?' -p1378 -tp1379 -Rp1380 -ag6 -(g10 -S'\x85\xab\xee\xfat|\x85?' -p1381 -tp1382 -Rp1383 -asS"Conjugate gradient\nw f'" -p1384 -(lp1385 -g6 -(g10 -S'\xe6e6\xc5\xd6f\xe0?' -p1386 -tp1387 -Rp1388 -ag6 -(g10 -S'\\\xc3Tq\xc3\x03\xe4?' -p1389 -tp1390 -Rp1391 -ag6 -(g10 -S'm\x1a\x97\x14\x03G\xde?' -p1392 -tp1393 -Rp1394 -ag6 -(g10 -S'\xdc\x1a\x88\xa2\xee\x1a\n@' -p1395 -tp1396 -Rp1397 -ag6 -(g10 -S'\x15\x9a\xae\xda\x08\xec\xb3?' -p1398 -tp1399 -Rp1400 -ag6 -(g10 -S'\xb7\x01\xee<\x80=\n@' -p1401 -tp1402 -Rp1403 -ag6 -(g10 -S'\r`\xab\xda^\x9a\xea?' -p1404 -tp1405 -Rp1406 -ag6 -(g10 -S'?\xb8"\xadz\x1f\xec?' -p1407 -tp1408 -Rp1409 -ag6 -(g10 -S'\x96\xbb\xef<\x1d\x98\x00@' -p1410 -tp1411 -Rp1412 -ag6 -(g10 -S'q8\x1c\x0e\x87\xc3\xe1?' -p1413 -tp1414 -Rp1415 -ag6 -(g10 -S'\x11\x9b\xd1\xe1|N\xf0?' -p1416 -tp1417 -Rp1418 -ag6 -(g10 -S'\x89z\xa36\x9d\xdd\xeb?' -p1419 -tp1420 -Rp1421 -ag6 -(g10 -S'<\xf9X\x10\xbc\r\xff?' -p1422 -tp1423 -Rp1424 -ag6 -(g10 -S'\x82\xf1V\xf69\xa4\xe6?' -p1425 -tp1426 -Rp1427 -ag6 -(g10 -S'\xcc\x103\xc4\x0c1\x03@' -p1428 -tp1429 -Rp1430 -ag6 -(g10 -S'\x80nMv\xa8\xf1\x08@' -p1431 -tp1432 -Rp1433 -ag6 -(g10 -S'\xeb\x0e\x85\x18-\x95\x02@' -p1434 -tp1435 -Rp1436 -ag6 -(g10 -S'P\x9a\xfe\x18\xcfj\xe0?' -p1437 -tp1438 -Rp1439 -ag6 -(g10 -S'\x16\x93:D\xe2\xda\xf4?' -p1440 -tp1441 -Rp1442 -ag6 -(g10 -S'&roN\x9c\xe4\xf3?' -p1443 -tp1444 -Rp1445 -asS"BFGS\nw f'" -p1446 -(lp1447 -g6 -(g10 -S'0\xc8\xe33\xd5\xb0\xc8?' -p1448 -tp1449 -Rp1450 -ag6 -(g10 -S'\xf3u\xadT\xc2 \xd1?' -p1451 -tp1452 -Rp1453 -ag6 -(g10 -S'\xf0%B8o\xf6\xd0?' -p1454 -tp1455 -Rp1456 -ag6 -(g10 -S',Y>;\x8dh\xb0?' -p1457 -tp1458 -Rp1459 -ag6 -(g10 -S'(\xb4Z\x15\x0c0\x9b?' -p1460 -tp1461 -Rp1462 -ag6 -(g10 -S'\x16\x16\x18\x83\x1f5\xc2?' -p1463 -tp1464 -Rp1465 -ag6 -(g10 -S'\xdc8\x0c<\xe4\xe6\xca?' -p1466 -tp1467 -Rp1468 -ag6 -(g10 -S'ud\xcf@T:\xc2?' -p1469 -tp1470 -Rp1471 -ag6 -(g10 -S'\x07\x1fQ\xec\x97H\xb5?' -p1472 -tp1473 -Rp1474 -ag6 -(g10 -S'\x1c\x0e\x87\xc3\xe1p\xd0?' -p1475 -tp1476 -Rp1477 -ag6 -(g10 -S't^\xf0c\xc2\xb2\xa6?' -p1478 -tp1479 -Rp1480 -ag6 -(g10 -S'!\xb6\xb6\xe2\xdc$\xcf?' -p1481 -tp1482 -Rp1483 -ag6 -(g10 -S'}\x04\x11\xca\x18\x06\xb9?' -p1484 -tp1485 -Rp1486 -ag6 -(g10 -S'd\x15[\x94\xf3%\xc3?' -p1487 -tp1488 -Rp1489 -ag6 -(g10 -S"\xa5'\xe9Iz\x92\xbe?" -p1490 -tp1491 -Rp1492 -ag6 -(g10 -S'X&\x8c\xda\x17\xe3\xb0?' -p1493 -tp1494 -Rp1495 -ag6 -(g10 -S'\xc4\xe5\xb4\xdd\xa7\xf9\xb0?' -p1496 -tp1497 -Rp1498 -ag6 -(g10 -S'\x95\xfcu\x88!\xa8\xc5?' -p1499 -tp1500 -Rp1501 -ag6 -(g10 -S'\x92\x8a\x18\xa5V\x18\xa8?' -p1502 -tp1503 -Rp1504 -ag6 -(g10 -S'\xf83=HiV\x93?' -p1505 -tp1506 -Rp1507 -assS'Ill-conditioned quadratic' -p1508 -(dp1509 -g4 -(lp1510 -g6 -(g10 -S'\x04\xf4fq\xcf\x1d\xdf?' -p1511 -tp1512 -Rp1513 -ag6 -(g10 -S'\xb2z\xda\x83;+\xa7?' -p1514 -tp1515 -Rp1516 -ag6 -(g10 -S'\xbaj\xd5\x8fK\x9d\xca?' -p1517 -tp1518 -Rp1519 -ag6 -(g10 -S'\xd8\xf1\x9a\xb1\xcf\x1b\x99?' -p1520 -tp1521 -Rp1522 -ag6 -(g10 -S'\x91\xe6\xe09\x08L\x9b?' -p1523 -tp1524 -Rp1525 -ag6 -(g10 -S'\x9d\x10.\xd1\t\xe1\xe2?' -p1526 -tp1527 -Rp1528 -ag6 -(g10 -S'\n!\xd1\x9fbz\xf0?' -p1529 -tp1530 -Rp1531 -ag6 -(g10 -S'Y\x1f\x1a\xebCc\xdd?' -p1532 -tp1533 -Rp1534 -ag6 -(g10 -S'\xbd\xee=\x1e\xdb\xb2\xad?' -p1535 -tp1536 -Rp1537 -ag6 -(g10 -S'\x85HO\xe1\x0b\x90\xd2?' -p1538 -tp1539 -Rp1540 -ag6 -(g10 -S'\xe5\x88\x82\xcb\x91O\x9c?' -p1541 -tp1542 -Rp1543 -ag6 -(g10 -S'\x02\xdfC\xf3\x97\xf6\xe8?' -p1544 -tp1545 -Rp1546 -ag6 -(g10 -S'\x92\xbah\x83\x13\xa4\xa4?' -p1547 -tp1548 -Rp1549 -ag6 -(g10 -S'\x0fT\xcen\xe1W\xe3?' -p1550 -tp1551 -Rp1552 -ag6 -(g10 -S';\xc5\xa3v\xe0\x98\xa4?' -p1553 -tp1554 -Rp1555 -ag6 -(g10 -S'K\xb2,\x08\xcf\x14\xa3?' -p1556 -tp1557 -Rp1558 -ag6 -(g10 -S'e&\x16y\x1e/\xdb?' -p1559 -tp1560 -Rp1561 -ag6 -(g10 -S'g\xb0t\x84\x95\xb5\xf0?' -p1562 -tp1563 -Rp1564 -ag6 -(g10 -S'\xfe\x7f\x9d\x1c\x05\x16\xa0?' -p1565 -tp1566 -Rp1567 -ag6 -(g10 -S'&[\xd2\xd4n\x93\x97?' -p1568 -tp1569 -Rp1570 -asg73 -(lp1571 -g6 -(g10 -S'\xd8\xc2\x06j\xe7O\xe4?' -p1572 -tp1573 -Rp1574 -ag6 -(g10 -S'\xb1\xd6\xf6t\xacM\xa2?' -p1575 -tp1576 -Rp1577 -ag6 -(g10 -S'zqJ\x8e\x13y\xc1?' -p1578 -tp1579 -Rp1580 -ag6 -(g10 -S'\xa4?w\xad2<\xa2?' -p1581 -tp1582 -Rp1583 -ag6 -(g10 -S'\xeb\xf9\x9c\xa2\x97\xa1\x9a?' -p1584 -tp1585 -Rp1586 -ag6 -(g10 -S'\xfb!\x81\xb7\x1f\x12\xd8?' -p1587 -tp1588 -Rp1589 -ag6 -(g10 -S'\xdd\xfe\xba\x87fB\xf8?' -p1590 -tp1591 -Rp1592 -ag6 -(g10 -S'9\x05/\xa7\xe0\xe5\xe4?' -p1593 -tp1594 -Rp1595 -ag6 -(g10 -S'\xaa\x9a\xa1\xde\x9b\xcd\xa5?' -p1596 -tp1597 -Rp1598 -ag6 -(g10 -S'\\7\x7f\xc6&\x96\xcc?' -p1599 -tp1600 -Rp1601 -ag6 -(g10 -S'\x9eQ\xd8o\xfb\xd0\xa2?' -p1602 -tp1603 -Rp1604 -ag6 -(g10 -S'\x91\xa6\xd9 \x15>\xdb?' -p1605 -tp1606 -Rp1607 -ag6 -(g10 -S'E\xef\xe6\xb9\x8c\xf9\xa1?' -p1608 -tp1609 -Rp1610 -ag6 -(g10 -S'\x06\x88R,ZV\xed?' -p1611 -tp1612 -Rp1613 -ag6 -(g10 -S'\x93\xf08\xc4\xa9\xbb\x9c?' -p1614 -tp1615 -Rp1616 -ag6 -(g10 -S'\x1cC!\xf2\xac\x18\x9e?' -p1617 -tp1618 -Rp1619 -ag6 -(g10 -S'u\x08\xfb\x06\x04\x16\xe4?' -p1620 -tp1621 -Rp1622 -ag6 -(g10 -S'\x16\xe0BR\xc8z\xe4?' -p1623 -tp1624 -Rp1625 -ag6 -(g10 -S'\xa4M\xd8\xde\x84\xb2\xa4?' -p1626 -tp1627 -Rp1628 -ag6 -(g10 -S'\x0b\xe7@\xb2\x9c\t\xa3?' -p1629 -tp1630 -Rp1631 -asS'Newton\nw Hessian ' -p1632 -(lp1633 -g6 -(g10 -S'\x08\x04\x03\xaaZ-.?' -p1634 -tp1635 -Rp1636 -asg140 -(lp1637 -g6 -(g10 -S'\xc2\x88\x83T\xad\xcc\x12@' -p1638 -tp1639 -Rp1640 -ag6 -(g10 -S'\xbcDE\xceIx\x01@' -p1641 -tp1642 -Rp1643 -ag6 -(g10 -S'\xedl\xd8\xc9\xeb\xbb\xf8?' -p1644 -tp1645 -Rp1646 -ag6 -(g10 -S'\xa7\x0fx\x84\x1e\x11\xf3?' -p1647 -tp1648 -Rp1649 -ag6 -(g10 -S'\x05\xfa\x8b\xad\x02m\xe8?' -p1650 -tp1651 -Rp1652 -ag6 -(g10 -S'x\xb8\x10\xbb\x95?' -p1784 -tp1785 -Rp1786 -ag6 -(g10 -S'S2\xa2n\xdd\xfc\xb9?' -p1787 -tp1788 -Rp1789 -ag6 -(g10 -S'j\xa8LF\x8fU\x83?' -p1790 -tp1791 -Rp1792 -ag6 -(g10 -S'}\x0f\xcd_\xdac\xce?' -p1793 -tp1794 -Rp1795 -ag6 -(g10 -S'\xaa\xd9\xa4n\xc1\x14\x88?' -p1796 -tp1797 -Rp1798 -ag6 -(g10 -S'\x17~5&\xd2\x03\xed?' -p1799 -tp1800 -Rp1801 -ag6 -(g10 -S'N\xfe,\xd1\x96\x89\x92?' -p1802 -tp1803 -Rp1804 -ag6 -(g10 -S'\xac!\xf7P\x1a\x05\x8a?' -p1805 -tp1806 -Rp1807 -ag6 -(g10 -S'\x04[\xbe2\xe2)\xc8?' -p1808 -tp1809 -Rp1810 -ag6 -(g10 -S'5\xeb\xf0\x05rG\xd6?' -p1811 -tp1812 -Rp1813 -ag6 -(g10 -S';\xe9\x9d\xcf\xe0*\x8b?' -p1814 -tp1815 -Rp1816 -ag6 -(g10 -S'4\xa6\xf9\x94\xcd\x80\x80?' -p1817 -tp1818 -Rp1819 -asS"L-BFGS \nw f'" -p1820 -(lp1821 -g6 -(g10 -S'\xe1\xe5\x14\xbc\x9c\x82\xa7?' -p1822 -tp1823 -Rp1824 -ag6 -(g10 -S'^e&\xcc:\xe0j?' -p1825 -tp1826 -Rp1827 -ag6 -(g10 -S'\xc8H\x05s\x82\xb2\x8c?' -p1828 -tp1829 -Rp1830 -ag6 -(g10 -S'\x8a\x18C\xd8B ]?' -p1831 -tp1832 -Rp1833 -ag6 -(g10 -S'w\xe4\x827h\xe4^?' -p1834 -tp1835 -Rp1836 -ag6 -(g10 -S'1e^\x11S\xe6\xa5?' -p1837 -tp1838 -Rp1839 -ag6 -(g10 -S'\xa3\x92\x1f\xe6r\xe6\xb8?' -p1840 -tp1841 -Rp1842 -ag6 -(g10 -S'\x8d\xf5\xa1\xb1>4\xa6?' -p1843 -tp1844 -Rp1845 -ag6 -(g10 -S'.{\x11\xf8\xcc\xf6q?' -p1846 -tp1847 -Rp1848 -ag6 -(g10 -S'\xd7rp\xb3_\x88\x95?' -p1849 -tp1850 -Rp1851 -ag6 -(g10 -S'Q\xe30P\x10\x05`?' -p1852 -tp1853 -Rp1854 -ag6 -(g10 -S'YG\x9b\xf7).\xa9?' -p1855 -tp1856 -Rp1857 -ag6 -(g10 -S'\xc0\x1a\xc3\xba\xf0\xf3c?' -p1858 -tp1859 -Rp1860 -ag6 -(g10 -S'\xda-\xfcjL\xa4\xc7?' -p1861 -tp1862 -Rp1863 -ag6 -(g10 -S'u\xe6\xdd\x90\xdb{n?' -p1864 -tp1865 -Rp1866 -ag6 -(g10 -S'\x8do\x02=\xc5\xe5f?' -p1867 -tp1868 -Rp1869 -ag6 -(g10 -S'\xf7f\xbbD\x00\x8a\xa4?' -p1870 -tp1871 -Rp1872 -ag6 -(g10 -S'\x86\xbb"8?\x82\xb2?' -p1873 -tp1874 -Rp1875 -ag6 -(g10 -S'>\xa7\x81.RNf?' -p1876 -tp1877 -Rp1878 -ag6 -(g10 -S'\xa7\xbb\x12*\x1aY[?' -p1879 -tp1880 -Rp1881 -asS"Conjugate gradient\nw f'" -p1882 -(lp1883 -g6 -(g10 -S'k\xa4\xa9\xd8\x7f`\x04@' -p1884 -tp1885 -Rp1886 -ag6 -(g10 -S'\xa7\xfc\xc4\xa0]\xc8\x1a@' -p1887 -tp1888 -Rp1889 -ag6 -(g10 -S'\xea%\xadsM\xc8\x1b@' -p1890 -tp1891 -Rp1892 -ag6 -(g10 -S'qpM\xc2\x1b\xe8\x1e@' -p1893 -tp1894 -Rp1895 -ag6 -(g10 -S'*\x89\x9fG\x81R @' -p1896 -tp1897 -Rp1898 -ag6 -(g10 -S'\xd4\x9d5C\xddY\x0b@' -p1899 -tp1900 -Rp1901 -ag6 -(g10 -S'\xe7\x11\xaa\xcf\xb45\xee?' -p1902 -tp1903 -Rp1904 -ag6 -(g10 -S'\x10\x8d\xf5\xa1\xb1>\x03@' -p1905 -tp1906 -Rp1907 -ag6 -(g10 -S'\x0b\xcd\x08\x0b\xb65\x1b@' -p1908 -tp1909 -Rp1910 -ag6 -(g10 -S'9v\xb9\xc8\xa1\xc9\x00@' -p1911 -tp1912 -Rp1913 -ag6 -(g10 -S'\xae\x818\x84N\x9b\x1a@' -p1914 -tp1915 -Rp1916 -ag6 -(g10 -S'\xf84\xc2rO#\x0c@' -p1917 -tp1918 -Rp1919 -ag6 -(g10 -S'A\xc06\x97 1 @' -p1920 -tp1921 -Rp1922 -ag6 -(g10 -S'\xcen\xe1Wc"\x02@' -p1923 -tp1924 -Rp1925 -ag6 -(g10 -S'\x08\xb6\xcf\xb6\xd65!@' -p1926 -tp1927 -Rp1928 -ag6 -(g10 -S';\x00\xa0EMG!@' -p1929 -tp1930 -Rp1931 -ag6 -(g10 -S'\xee\x9c\x15e\xf5\xb4\x00@' -p1932 -tp1933 -Rp1934 -ag6 -(g10 -S'\xf7P\xb9\x9f\xef\xf6\xd6?' -p1935 -tp1936 -Rp1937 -ag6 -(g10 -S'\xbbL\x99E\x8ex!@' -p1938 -tp1939 -Rp1940 -ag6 -(g10 -S'\x89f\x94\n\x06\x02 @' -p1941 -tp1942 -Rp1943 -asS"BFGS\nw f'" -p1944 -(lp1945 -g6 -(g10 -S'G\x18q\x90\xaa\x95\xb9?' -p1946 -tp1947 -Rp1948 -ag6 -(g10 -S'\x06T\x00^L\xc4\x82?' -p1949 -tp1950 -Rp1951 -ag6 -(g10 -S'\x93\x1a\xab\xdcc\x0f\xa5?' -p1952 -tp1953 -Rp1954 -ag6 -(g10 -S'\xe5C\xb3\xf3\x86Vt?' -p1955 -tp1956 -Rp1957 -ag6 -(g10 -S'\xb6\xb0h\x04\x9a\x1av?' -p1958 -tp1959 -Rp1960 -ag6 -(g10 -S'[X\xe9\xa9\x85\x95\xbe?' -p1961 -tp1962 -Rp1963 -ag6 -(g10 -S'\x93\x1f\xe6r\xe6\x18\xcb?' -p1964 -tp1965 -Rp1966 -ag6 -(g10 -S'\n^N\xc1\xcb)\xb8?' -p1967 -tp1968 -Rp1969 -ag6 -(g10 -S'\x10\xca\xa3}u\x0c\x88?' -p1970 -tp1971 -Rp1972 -ag6 -(g10 -S'\x85\xcc\x8f\xafP\x12\xae?' -p1973 -tp1974 -Rp1975 -ag6 -(g10 -S'\x9bh\x95{\xc3\xecv?' -p1976 -tp1977 -Rp1978 -ag6 -(g10 -S'\x8b\xe7\xc0\x93\x0b0\xc4?' -p1979 -tp1980 -Rp1981 -ag6 -(g10 -S'\x9cc\xb0\x81K\xaf\x80?' -p1982 -tp1983 -Rp1984 -ag6 -(g10 -S'\xdb~\x86\xb0\x17\xcf\xbf?' -p1985 -tp1986 -Rp1987 -ag6 -(g10 -S'\x13\x18B\xbc\x07\xaf\x80?' -p1988 -tp1989 -Rp1990 -ag6 -(g10 -S'\xa3o\x1e7\x82\xe0~?' -p1991 -tp1992 -Rp1993 -ag6 -(g10 -S'\xfe\xe0\xbc;\xf1Y\xb6?' -p1994 -tp1995 -Rp1996 -ag6 -(g10 -S'I\x194\xd4^\xc3\xcb?' -p1997 -tp1998 -Rp1999 -ag6 -(g10 -S'\x1dC\x97\x8b\n\x06z?' -p2000 -tp2001 -Rp2002 -ag6 -(g10 -S'\x8d\xe8\x95_\xb3\x18s?' -p2003 -tp2004 -Rp2005 -assS'Well-conditioned Gaussian' -p2006 -(dp2007 -g4 -(lp2008 -g6 -(g10 -S'rM\x04rM\x04\xf1?' -p2009 -tp2010 -Rp2011 -ag6 -(g10 -S'\x94\xf0FS\xe7\xd7\xee?' -p2012 -tp2013 -Rp2014 -ag6 -(g10 -S'\xb4\x9eV\xc0\xb1\xc2\xec?' -p2015 -tp2016 -Rp2017 -ag6 -(g10 -S'\xf4\xd7\xb7\xa5\xc0l\xee?' -p2018 -tp2019 -Rp2020 -ag6 -(g10 -S'Y\x02\x9b\xf6\x93%\xf0?' -p2021 -tp2022 -Rp2023 -ag6 -(g10 -S'm\xb12|#\n\xf0?' -p2024 -tp2025 -Rp2026 -ag6 -(g10 -S'\x02\x95\x9d\x90sU\xf2?' -p2027 -tp2028 -Rp2029 -ag6 -(g10 -S'e\x96\x10~$\xe2\xf1?' -p2030 -tp2031 -Rp2032 -ag6 -(g10 -S'\xce9\xe7\x9cs\x0e\xf1?' -p2033 -tp2034 -Rp2035 -ag6 -(g10 -S'Iv\x0f\x0cz@\xeb?' -p2036 -tp2037 -Rp2038 -ag6 -(g10 -S'\x92?\xaf\xb28\xa3\xed?' -p2039 -tp2040 -Rp2041 -ag6 -(g10 -S'\xeeeM\xbbtD\xef?' -p2042 -tp2043 -Rp2044 -ag6 -(g10 -S'\x9et\xe6\xe5\xea\xbd\xf2?' -p2045 -tp2046 -Rp2047 -ag6 -(g10 -S'\xc3!B|J\xac\xee?' -p2048 -tp2049 -Rp2050 -ag6 -(g10 -S'\x9e\xa6\xe5Y\xdc\xb5\xf0?' -p2051 -tp2052 -Rp2053 -ag6 -(g10 -S'\xb69]\xe5\x99\xf8\xe8?' -p2054 -tp2055 -Rp2056 -ag6 -(g10 -S'\xe3sNB\x89,\xf1?' -p2057 -tp2058 -Rp2059 -ag6 -(g10 -S'QQQQQQ\xf1?' -p2060 -tp2061 -Rp2062 -ag6 -(g10 -S'-;\x9eSI\x01\xf1?' -p2063 -tp2064 -Rp2065 -ag6 -(g10 -S'\x95&\xa2\x1b\xa1\xa1\xee?' -p2066 -tp2067 -Rp2068 -asg73 -(lp2069 -g6 -(g10 -S'f\xf7\x1be\xf7\x1b\t@' -p2070 -tp2071 -Rp2072 -ag6 -(g10 -S'\xdd\xb1\xaba\xe9E\x0c@' -p2073 -tp2074 -Rp2075 -ag6 -(g10 -S'\x1c\xf0\x0eR\xb9\xf5\x0e@' -p2076 -tp2077 -Rp2078 -ag6 -(g10 -S'^\xa0=qP\xca\x0c@' -p2079 -tp2080 -Rp2081 -ag6 -(g10 -S'N\xfb\xc9\x12\xd8\xb4\t@' -p2082 -tp2083 -Rp2084 -ag6 -(g10 -S'\xafV=\x7fmh\x03@' -p2085 -tp2086 -Rp2087 -ag6 -(g10 -S'o+\x17M\xc0\x1e\x06@' -p2088 -tp2089 -Rp2090 -ag6 -(g10 -S'\xd2k3\xed=p\x03@' -p2091 -tp2092 -Rp2093 -ag6 -(g10 -S'\x94RJ)\xa5\x84\x06@' -p2094 -tp2095 -Rp2096 -ag6 -(g10 -S'\xfd\xba\x0c\x0f\xc4<\x0b@' -p2097 -tp2098 -Rp2099 -ag6 -(g10 -S'\x13/_\xb3\x86\xb8\x0b@' -p2100 -tp2101 -Rp2102 -ag6 -(g10 -S'o}tXh\x85\x08@' -p2103 -tp2104 -Rp2105 -ag6 -(g10 -S'\x15B\xad\xe8\xd1\x9e\x03@' -p2106 -tp2107 -Rp2108 -ag6 -(g10 -S'\xba(j\xe2\xd5\x8d\x0f@' -p2109 -tp2110 -Rp2111 -ag6 -(g10 -S'\x1b*C\x84\x00\xc2\x07@' -p2112 -tp2113 -Rp2114 -ag6 -(g10 -S'\x94\xce\x06\x89\xd1\xbc\x10@' -p2115 -tp2116 -Rp2117 -ag6 -(g10 -S'\x9cv\xb52\xc44\x05@' -p2118 -tp2119 -Rp2120 -ag6 -(g10 -S'=\xa3\tp\xd6<\x03@' -p2121 -tp2122 -Rp2123 -ag6 -(g10 -S'\xd0\xad\xe3\xfe\t\xf2?' -p2233 -tp2234 -Rp2235 -ag6 -(g10 -S',\xfci!\xc0P\xf0?' -p2236 -tp2237 -Rp2238 -ag6 -(g10 -S"/\x15\x12\x86'y\xf1?" -p2239 -tp2240 -Rp2241 -ag6 -(g10 -S'\\\n\xfdI\xc6\xa2\xea?' -p2242 -tp2243 -Rp2244 -ag6 -(g10 -S'\x1d\x14\xc1s0\xc6\xf6?' -p2245 -tp2246 -Rp2247 -ag6 -(g10 -S'\x89U"\xef\xbb\x88\xf5?' -p2248 -tp2249 -Rp2250 -ag6 -(g10 -S'aBj\x81#\x92\xf0?' -p2251 -tp2252 -Rp2253 -ag6 -(g10 -S'H\xe2j\xd9]\xe4\xee?' -p2254 -tp2255 -Rp2256 -asg264 -(lp2257 -g6 -(g10 -S'\xed~\xa3\xec~\xa3\xe0?' -p2258 -tp2259 -Rp2260 -ag6 -(g10 -S'\xdd\xb1\xaba\xe9E\xdc?' -p2261 -tp2262 -Rp2263 -ag6 -(g10 -S'\xdcz\x1fD\xcbs\xde?' -p2264 -tp2265 -Rp2266 -ag6 -(g10 -S'Q!\xdd\x1d\x99{\xdc?' -p2267 -tp2268 -Rp2269 -ag6 -(g10 -S'\x17\x0e\xa2\xc7w\xe1\xe0?' -p2270 -tp2271 -Rp2272 -ag6 -(g10 -S'\xebg\x8b\x95\xe1\x1b\xe1?' -p2273 -tp2274 -Rp2275 -ag6 -(g10 -S'\x02\x95\x9d\x90sU\xe2?' -p2276 -tp2277 -Rp2278 -ag6 -(g10 -S'\x1b\x01\xa251\xa9\xe2?' -p2279 -tp2280 -Rp2281 -ag6 -(g10 -S'k\xad\xb5\xd6Zk\xe1?' -p2282 -tp2283 -Rp2284 -ag6 -(g10 -S'\xad\xc91\xb6\xa7&\xde?' -p2285 -tp2286 -Rp2287 -ag6 -(g10 -S'\xbd\xca\xe2\x8cv\x0f\xdb?' -p2288 -tp2289 -Rp2290 -ag6 -(g10 -S'\x1b@\x07\xa8o\xe8\xdd?' -p2291 -tp2292 -Rp2293 -ag6 -(g10 -S'\x9et\xe6\xe5\xea\xbd\xe2?' -p2294 -tp2295 -Rp2296 -ag6 -(g10 -S'!\xbc\xf9\xdb\xf0h\xdb?' -p2297 -tp2298 -Rp2299 -ag6 -(g10 -S'L(\x1c\xcd\xdao\xe1?' -p2300 -tp2301 -Rp2302 -ag6 -(g10 -S'\\\n\xfdI\xc6\xa2\xda?' -p2303 -tp2304 -Rp2305 -ag6 -(g10 -S'\xbf\xc0(\xfa\xd7\xaa\xe2?' -p2306 -tp2307 -Rp2308 -ag6 -(g10 -S'\x12\x12\x12\x12\x12\x12\xe2?' -p2309 -tp2310 -Rp2311 -ag6 -(g10 -S"*) \xe1'\x17\xe2?" -p2312 -tp2313 -Rp2314 -ag6 -(g10 -S'VQ,A\xc9\xfa\xdd?' -p2315 -tp2316 -Rp2317 -asS"L-BFGS \nw f'" -p2318 -(lp2319 -g6 -(g10 -S'\x84\x15:\x83\x15:\xbb?' -p2320 -tp2321 -Rp2322 -ag6 -(g10 -S'o4u~\xed!\xb7?' -p2323 -tp2324 -Rp2325 -ag6 -(g10 -S'\xff\x9c,\xe2 \xd0\xb8?' -p2326 -tp2327 -Rp2328 -ag6 -(g10 -S'\xa0\x8f@^\xdaM\xb7?' -p2329 -tp2330 -Rp2331 -ag6 -(g10 -S'\x8b\xb9\xd5\x19\xa9\x98\xbb?' -p2332 -tp2333 -Rp2334 -ag6 -(g10 -S'\x01n\x1fR\xce\xf1\xbb?' -p2335 -tp2336 -Rp2337 -ag6 -(g10 -S'\xe0\x93\xed\xd3\xc5\xf8\xbd?' -p2338 -tp2339 -Rp2340 -ag6 -(g10 -S'\xf1#\x11O\xbfz\xbe?' -p2341 -tp2342 -Rp2343 -ag6 -(g10 -S's\xce9\xe7\x9cs\xbc?' -p2344 -tp2345 -Rp2346 -ag6 -(g10 -S'\xa5\xd7\x182\xac\x95\xb8?' -p2347 -tp2348 -Rp2349 -ag6 -(g10 -S'(S\xde\x11\xec)\xb6?' -p2350 -tp2351 -Rp2352 -ag6 -(g10 -S'\xd0\xa8\xeeZ[x\xb8?' -p2353 -tp2354 -Rp2355 -ag6 -(g10 -S'y\xbeMD\x99\x9c\xbe?' -p2356 -tp2357 -Rp2358 -ag6 -(g10 -S' 6wm5s\xb6?' -p2359 -tp2360 -Rp2361 -ag6 -(g10 -S'\x05u%q\xf6z\xbc?' -p2362 -tp2363 -Rp2364 -ag6 -(g10 -S'\xed?(x\xaa\xc0\xb5?' -p2365 -tp2366 -Rp2367 -ag6 -(g10 -S'W\x86\x98\xa6\x12w\xbe?' -p2368 -tp2369 -Rp2370 -ag6 -(g10 -S'\x84\x1d\xb7P\xea\x83\xbd?' -p2371 -tp2372 -Rp2373 -ag6 -(g10 -S'\xe2\x91c\x1fB5\xbb?' -p2374 -tp2375 -Rp2376 -ag6 -(g10 -S'\xaa\xeb\xb4\xafM\x81\xb8?' -p2377 -tp2378 -Rp2379 -asS"Conjugate gradient\nw f'" -p2380 -(lp2381 -g6 -(g10 -S'\xc6|\xea\xc5|\xea\xdf?' -p2382 -tp2383 -Rp2384 -ag6 -(g10 -S'\x0e\x02n}6\xe3\xdb?' -p2385 -tp2386 -Rp2387 -ag6 -(g10 -S'\xf3a\xaa\xa3\x85\x92\xd7?' -p2388 -tp2389 -Rp2390 -ag6 -(g10 -S'\xa5\x12dD\x90\xea\x0b\xf3?' -p2558 -tp2559 -Rp2560 -ag6 -(g10 -S'zm\xec#\xd6N\xf1?' -p2561 -tp2562 -Rp2563 -ag6 -(g10 -S'\x05\x00\xb1\x10n\x8d\xec?' -p2564 -tp2565 -Rp2566 -asg73 -(lp2567 -g6 -(g10 -S'\x01X\xf3.\xbds\xfc?' -p2568 -tp2569 -Rp2570 -ag6 -(g10 -S'\x04\xaa\x81:\x82\xfd\xff?' -p2571 -tp2572 -Rp2573 -ag6 -(g10 -S'\x1e|\xa8\x90d0\xfe?' -p2574 -tp2575 -Rp2576 -ag6 -(g10 -S'5\x05\xadq\xe6\xdb\xfd?' -p2577 -tp2578 -Rp2579 -ag6 -(g10 -S'\xfc\xcf\xb4k\xbdE\xfc?' -p2580 -tp2581 -Rp2582 -ag6 -(g10 -S'S\x17\xea\x8c\xf1K\x01@' -p2583 -tp2584 -Rp2585 -ag6 -(g10 -S"'\x92F=[\x98\x01@" -p2586 -tp2587 -Rp2588 -ag6 -(g10 -S'jHv$x\x05\xfe?' -p2589 -tp2590 -Rp2591 -ag6 -(g10 -S'\xd2\xd3>d\x00\xb9\xfc?' -p2592 -tp2593 -Rp2594 -ag6 -(g10 -S'\x1e\xa6\x00\x0b\x8d\xc3\xff?' -p2595 -tp2596 -Rp2597 -ag6 -(g10 -S'#\xc3.\xb9\x0e\n\x03@' -p2598 -tp2599 -Rp2600 -ag6 -(g10 -S'4\x9a8\x86J\xd4\x01@' -p2601 -tp2602 -Rp2603 -ag6 -(g10 -S'\x9de8QWK\xfe?' -p2604 -tp2605 -Rp2606 -ag6 -(g10 -S'\x81\xc5\xbdcl1\xfd?' -p2607 -tp2608 -Rp2609 -ag6 -(g10 -S'])\xe8N\xf5\xb0\xfd?' -p2610 -tp2611 -Rp2612 -ag6 -(g10 -S'\x11+\xccUB\xcf\x02@' -p2613 -tp2614 -Rp2615 -ag6 -(g10 -S'\xbf\x92\x17;\xbd\xa8\xfd?' -p2616 -tp2617 -Rp2618 -ag6 -(g10 -S'12L\x9dcG\x00@' -p2619 -tp2620 -Rp2621 -ag6 -(g10 -S'\x03~R\x92\xde\xe1\x00@' -p2622 -tp2623 -Rp2624 -ag6 -(g10 -S'\xc2L\xfbp\xad\xac\xfc?' -p2625 -tp2626 -Rp2627 -asS'Newton\nw Hessian ' -p2628 -(lp2629 -g6 -(g10 -S'r\xdf&\xc9\x99\xffC?' -p2630 -tp2631 -Rp2632 -asg140 -(lp2633 -g6 -(g10 -S'g\x80~C\x9a?\xda?' -p2634 -tp2635 -Rp2636 -ag6 -(g10 -S'&\x9d6J\xfb1\xd8?' -p2637 -tp2638 -Rp2639 -ag6 -(g10 -S'4\x96\xe1\xaaw\xfb\xdc?' -p2640 -tp2641 -Rp2642 -ag6 -(g10 -S'\xbe\xa5\xcc\x94\xf7-\xdd?' -p2643 -tp2644 -Rp2645 -ag6 -(g10 -S'\xd4V2\xea\x9c\x9f\xd5?' -p2646 -tp2647 -Rp2648 -ag6 -(g10 -S'\xce*\xdb\xf5\xf5\xe9\xdb?' -p2649 -tp2650 -Rp2651 -ag6 -(g10 -S'\xdf\xff\x17\xa5\x08\xfd\xe0?' -p2652 -tp2653 -Rp2654 -ag6 -(g10 -S'\x0e\xbd[\\\xa7\x1a\xd9?' -p2655 -tp2656 -Rp2657 -ag6 -(g10 -S't\x0e\xc9}[J\xdd?' -p2658 -tp2659 -Rp2660 -ag6 -(g10 -S'kH2I\x0c\x8c\xe0?' -p2661 -tp2662 -Rp2663 -ag6 -(g10 -S'\xf10\x08\x1d\xc8\xbe\xe0?' -p2664 -tp2665 -Rp2666 -ag6 -(g10 -S'\xa0\xc4\xb29\xab\xe0\xe1?' -p2667 -tp2668 -Rp2669 -ag6 -(g10 -S'\xcb\x8b\xb6k,\x84\xd5?' -p2670 -tp2671 -Rp2672 -ag6 -(g10 -S'\x16AR+s\xf8\xe2?' -p2673 -tp2674 -Rp2675 -ag6 -(g10 -S'(\xdc\x89I\x96\xbb\xd7?' -p2676 -tp2677 -Rp2678 -ag6 -(g10 -S'\xc8\xf0\xbf=F\xac\xe0?' -p2679 -tp2680 -Rp2681 -ag6 -(g10 -S'\xa9p\xd5\x89\xd9\x18\xdc?' -p2682 -tp2683 -Rp2684 -ag6 -(g10 -S'\xceL\x8e\xbd\x90\n\xdd?' -p2685 -tp2686 -Rp2687 -ag6 -(g10 -S';_\xc6*\x8am\xe0?' -p2688 -tp2689 -Rp2690 -ag6 -(g10 -S'}D\xd1/b\xe0\xde?' -p2691 -tp2692 -Rp2693 -asg202 -(lp2694 -g6 -(g10 -S'`\x17\xe3\xffR\xb3\x16@' -p2695 -tp2696 -Rp2697 -ag6 -(g10 -S'\x9b\x05\xe0J\x99\xbc\x15@' -p2698 -tp2699 -Rp2700 -ag6 -(g10 -S'\x02\xc2\x18\x90\xb3A\x16@' -p2701 -tp2702 -Rp2703 -ag6 -(g10 -S'\xfcB`\xb9\xc8\xf4\x15@' -p2704 -tp2705 -Rp2706 -ag6 -(g10 -S'\x91\xae\xb9\\\x13)\x17@' -p2707 -tp2708 -Rp2709 -ag6 -(g10 -S'6T\xa3\xdd?|\x14@' -p2710 -tp2711 -Rp2712 -ag6 -(g10 -S'\xfa\xc3W\t\xf9\x84\x13@' -p2713 -tp2714 -Rp2715 -ag6 -(g10 -S'[+\xe7\xac"M\x16@' -p2716 -tp2717 -Rp2718 -ag6 -(g10 -S'\x9e\x1a\xe7F\x84\x1d\x16@' -p2719 -tp2720 -Rp2721 -ag6 -(g10 -S'@\xae\xac\xe1\x0e\xb8\x14@' -p2722 -tp2723 -Rp2724 -ag6 -(g10 -S'\x14\xe6\xf1\xe7\xc7\x18\x13@' -p2725 -tp2726 -Rp2727 -ag6 -(g10 -S'\x90A-\x168\xde\x12@' -p2728 -tp2729 -Rp2730 -ag6 -(g10 -S'\x15_\x0e\xf6.\x7f\x16@' -p2731 -tp2732 -Rp2733 -ag6 -(g10 -S'\x9cu\x1b\xbd\xb55\x15@' -p2734 -tp2735 -Rp2736 -ag6 -(g10 -S'z\xf7c\xa7E\x86\x16@' -p2737 -tp2738 -Rp2739 -ag6 -(g10 -S'L\x8d\xc0\x0b\x06.\x13@' -p2740 -tp2741 -Rp2742 -ag6 -(g10 -S'\x997\xc1\xaa\xc7\xca\x15@' -p2743 -tp2744 -Rp2745 -ag6 -(g10 -S'\xbc\xbe\xdd\x93x\x1b\x14@' -p2746 -tp2747 -Rp2748 -ag6 -(g10 -S'\x15Fa3\x03\xd6\x13@' -p2749 -tp2750 -Rp2751 -ag6 -(g10 -S'\xc7\xac\xb7\x8a\xaf\x00\x16@' -p2752 -tp2753 -Rp2754 -asg264 -(lp2755 -g6 -(g10 -S'\x10hL\xdc1\xec\xc7?' -p2756 -tp2757 -Rp2758 -ag6 -(g10 -S'\x16D\x16\x88\xb0\xd2\xc7?' -p2759 -tp2760 -Rp2761 -ag6 -(g10 -S'!Y\x15\x8f\x99\xbe\xc7?' -p2762 -tp2763 -Rp2764 -ag6 -(g10 -S'\x92If\x12\x8c\x89\xc5?' -p2765 -tp2766 -Rp2767 -ag6 -(g10 -S'\xf4[_\xfa\xdd\xf7\xc4?' -p2768 -tp2769 -Rp2770 -ag6 -(g10 -S'\x8f[<\xc3\x9eQ\xca?' -p2771 -tp2772 -Rp2773 -ag6 -(g10 -S'\x9f\xb6u\xe9\xc6\x8c\xcf?' -p2774 -tp2775 -Rp2776 -ag6 -(g10 -S'\xa3\xc2\xfei\x19\x03\xc7?' -p2777 -tp2778 -Rp2779 -ag6 -(g10 -S'$\r\xceC\xb0\xa0\xca?' -p2780 -tp2781 -Rp2782 -ag6 -(g10 -S'a\xba;"J\xe3\xce?' -p2783 -tp2784 -Rp2785 -ag6 -(g10 -S'\xf3D\x1b\xc5\xd73\xcc?' -p2786 -tp2787 -Rp2788 -ag6 -(g10 -S'\xa5V@^np\xd0?' -p2789 -tp2790 -Rp2791 -ag6 -(g10 -S'\x98\xb9\xe5E\xdb5\xc6?' -p2792 -tp2793 -Rp2794 -ag6 -(g10 -S'\\\\\xfd\xdb\xabB\xd0?' -p2795 -tp2796 -Rp2797 -ag6 -(g10 -S'EH\x8f\xfa\x88c\xc7?' -p2798 -tp2799 -Rp2800 -ag6 -(g10 -S'\xcbCt1P\xb7\xcb?' -p2801 -tp2802 -Rp2803 -ag6 -(g10 -S'\xcb\xea\x14\x16ji\xcb?' -p2804 -tp2805 -Rp2806 -ag6 -(g10 -S'\xcfI\xf1\x07a#\xc8?' -p2807 -tp2808 -Rp2809 -ag6 -(g10 -S'\x9a\x9b\xe3\xac\xc2\xf1\xcc?' -p2810 -tp2811 -Rp2812 -ag6 -(g10 -S'\xdc\x94\x0cGg\xe1\xcd?' -p2813 -tp2814 -Rp2815 -asS"L-BFGS \nw f'" -p2816 -(lp2817 -g6 -(g10 -S'\xabk\xcb\xba\x00\xd4\x86?' -p2818 -tp2819 -Rp2820 -ag6 -(g10 -S'Q\xfd\xc96\xa1\xc5\x86?' -p2821 -tp2822 -Rp2823 -ag6 -(g10 -S'\x0b\xab\xe3\x02+\xad\x86?' -p2824 -tp2825 -Rp2826 -ag6 -(g10 -S'\r\x84\x9f\xff\xea\x98\x84?' -p2827 -tp2828 -Rp2829 -ag6 -(g10 -S'\xe6\x17\xcc\xa7\x0c\x0b\x84?' -p2830 -tp2831 -Rp2832 -ag6 -(g10 -S'\xc2Y\x8b\xa5^%\x89?' -p2833 -tp2834 -Rp2835 -ag6 -(g10 -S'O\xad6;!\x13\x8e?' -p2836 -tp2837 -Rp2838 -ag6 -(g10 -S'\xc8.\xd1\x93\x92\xfc\x85?' -p2839 -tp2840 -Rp2841 -ag6 -(g10 -S'\xa4*\x947\xe7_\x89?' -p2842 -tp2843 -Rp2844 -ag6 -(g10 -S'\xbe\x80 \x82\xc0j\x8d?' -p2845 -tp2846 -Rp2847 -ag6 -(g10 -S'\x8e\xa7\xf0\x03R\xf5\x8a?' -p2848 -tp2849 -Rp2850 -ag6 -(g10 -S'\xdc_;M\xc6T\x8f?' -p2851 -tp2852 -Rp2853 -ag6 -(g10 -S'J\xd3W\x8a\x02;\x85?' -p2854 -tp2855 -Rp2856 -ag6 -(g10 -S'\x96\x9b\x9b\xd4<\xed\x8e?' -p2857 -tp2858 -Rp2859 -ag6 -(g10 -S'\xadG\t\x183V\x86?' -p2860 -tp2861 -Rp2862 -ag6 -(g10 -S'\xfbm\xfa\xe3H~\x8a?' -p2863 -tp2864 -Rp2865 -ag6 -(g10 -S'\x91\xb2\xd7\xe1.\x1f\x8a?' -p2866 -tp2867 -Rp2868 -ag6 -(g10 -S'\xbb\xb3\xb5e\xc2\x12\x87?' -p2869 -tp2870 -Rp2871 -ag6 -(g10 -S'\x93n1L"\x9c\x8b?' -p2872 -tp2873 -Rp2874 -ag6 -(g10 -S'\xb6E\x03\xaano\x8c?' -p2875 -tp2876 -Rp2877 -asS"Conjugate gradient\nw f'" -p2878 -(lp2879 -g6 -(g10 -S'\xd5\x1b\x04\xf9[\xdc\x98?' -p2880 -tp2881 -Rp2882 -ag6 -(g10 -S'\x1c\t\xac\x19y\xf2\x96?' -p2883 -tp2884 -Rp2885 -ag6 -(g10 -S'\xe3)K9\x18q\x9b?' -p2886 -tp2887 -Rp2888 -ag6 -(g10 -S'\x1a\xf0\x14\xe5g\xa0\x9b?' -p2889 -tp2890 -Rp2891 -ag6 -(g10 -S'\xed\xb9\x15Qu\x81\x94?' -p2892 -tp2893 -Rp2894 -ag6 -(g10 -S'`\xf5\xe0t\xa6u\x9a?' -p2895 -tp2896 -Rp2897 -ag6 -(g10 -S'\x1f\xed\xb1\xed\xc5\x06\x9f?' -p2898 -tp2899 -Rp2900 -ag6 -(g10 -S'4f!\x9c\x9e\xca\x97?' -p2901 -tp2902 -Rp2903 -ag6 -(g10 -S'T\xb3\x80.`\xb9\x9b?' -p2904 -tp2905 -Rp2906 -ag6 -(g10 -S'XE\x0f%\xe6\xe8\xa1?' -p2907 -tp2908 -Rp2909 -ag6 -(g10 -S'\x92\xad\xbe\xbdR\xba\x9f?' -p2910 -tp2911 -Rp2912 -ag6 -(g10 -S')U\x1d\x93\xb2\xba\xa0?' -p2913 -tp2914 -Rp2915 -ag6 -(g10 -S'\xb3\xe8\xb6\x98\xf8i\x94?' -p2916 -tp2917 -Rp2918 -ag6 -(g10 -S'\xf7\x9a_n\xf1}\x9d?' -p2919 -tp2920 -Rp2921 -ag6 -(g10 -S"'o\xbb\xc4\xa2\x7f\x96?" -p2922 -tp2923 -Rp2924 -ag6 -(g10 -S'5\xbe \xfc\x91\x96\x9f?' -p2925 -tp2926 -Rp2927 -ag6 -(g10 -S'\xef-\xbb\x16\xcf7\x9c?' -p2928 -tp2929 -Rp2930 -ag6 -(g10 -S'\x10\xa5X\xb4\xac\x82\x9b?' -p2931 -tp2932 -Rp2933 -ag6 -(g10 -S'\x80NnRn\x1b\x9f?' -p2934 -tp2935 -Rp2936 -ag6 -(g10 -S'l\xbc8\x8d\xacS\xa0?' -p2937 -tp2938 -Rp2939 -asS"BFGS\nw f'" -p2940 -(lp2941 -g6 -(g10 -S'l\x96E\xab\xa5\x80\xaa?' -p2942 -tp2943 -Rp2944 -ag6 -(g10 -S'\x0ee\xb8\xbeY\x1d\xac?' -p2945 -tp2946 -Rp2947 -ag6 -(g10 -S'"\xe6V\x9a\xff\xba\xa8?' -p2948 -tp2949 -Rp2950 -ag6 -(g10 -S'\xd9:`\xf8\x06\t\xac?' -p2951 -tp2952 -Rp2953 -ag6 -(g10 -S'\xedaR$\xa1\xfa\xa9?' -p2954 -tp2955 -Rp2956 -ag6 -(g10 -S'R-\x06\xb9O\x8e\xad?' -p2957 -tp2958 -Rp2959 -ag6 -(g10 -S'\xab\xe6\xd6\xbc\xddO\xaf?' -p2960 -tp2961 -Rp2962 -ag6 -(g10 -S'zz\xc58\xb7{\xab?' -p2963 -tp2964 -Rp2965 -ag6 -(g10 -S'\x1c\xcaR\x02\x9f\x1d\xac?' -p2966 -tp2967 -Rp2968 -ag6 -(g10 -S'\x81j\x96)\x1cw\xac?' -p2969 -tp2970 -Rp2971 -ag6 -(g10 -S'\xd4hI\x8c+,\xae?' -p2972 -tp2973 -Rp2974 -ag6 -(g10 -S'B)\x80\x14\x96\xf8\xb0?' -p2975 -tp2976 -Rp2977 -ag6 -(g10 -S'\x8b\xbf\xc7\xc0`\xdd\xaa?' -p2978 -tp2979 -Rp2980 -ag6 -(g10 -S'\x1eE\xd8C\xddK\xac?' -p2981 -tp2982 -Rp2983 -ag6 -(g10 -S'\x06\x10"\xe4\x0eh\xac?' -p2984 -tp2985 -Rp2986 -ag6 -(g10 -S'I\x0f\x1e\xbb\xab@\xb0?' -p2987 -tp2988 -Rp2989 -ag6 -(g10 -S'\x8f\x9d\x1dd\x90\xf1\xac?' -p2990 -tp2991 -Rp2992 -ag6 -(g10 -S'\x88\x87&\x15F\x15\xb1?' -p2993 -tp2994 -Rp2995 -ag6 -(g10 -S"\xa6'\xa1\x1d\xa9\x9c\xb0?" -p2996 -tp2997 -Rp2998 -ag6 -(g10 -S'!c\x15@v\xf3\xaa?' -p2999 -tp3000 -Rp3001 -assg512 -(dp3002 -g4 -(lp3003 -g6 -(g10 -S'wO\xa7\xe0\xc5\x9e\xce?' -p3004 -tp3005 -Rp3006 -ag6 -(g10 -S'~\xa3&\xc3\xbdC\xcf?' -p3007 -tp3008 -Rp3009 -ag6 -(g10 -S'e\xd5\xf9\xe4:\x8a\xd0?' -p3010 -tp3011 -Rp3012 -ag6 -(g10 -S'"\x1bG2F\x9d\xd1?' -p3013 -tp3014 -Rp3015 -ag6 -(g10 -S'\x86\xaf\xf4V\x16\xf9\xcd?' -p3016 -tp3017 -Rp3018 -ag6 -(g10 -S'M_\xd2\xb36\x0c\xce?' -p3019 -tp3020 -Rp3021 -ag6 -(g10 -S'DjM6\xb2K\xd1?' -p3022 -tp3023 -Rp3024 -ag6 -(g10 -S'B\xac\xeeep\xf1\xd4?' -p3025 -tp3026 -Rp3027 -ag6 -(g10 -S'\x98,\x8b\xe0\xe6\xff\xc6?' -p3028 -tp3029 -Rp3030 -ag6 -(g10 -S'7um\xa0I\xee\xcc?' -p3031 -tp3032 -Rp3033 -ag6 -(g10 -S'\xd7\xd1\x8btc\x8b\xca?' -p3034 -tp3035 -Rp3036 -ag6 -(g10 -S'f\x83\xd2\x1c\xa5\xea\xd3?' -p3037 -tp3038 -Rp3039 -ag6 -(g10 -S'\x0fL\xe7\x92\xa1s\xd3?' -p3040 -tp3041 -Rp3042 -ag6 -(g10 -S'\x01\xb9@\xb6\xc9\x9d\xde?' -p3043 -tp3044 -Rp3045 -ag6 -(g10 -S'\x9d\x7f\xed\xb1\xca\xe4\xcf?' -p3046 -tp3047 -Rp3048 -ag6 -(g10 -S'U\xae\xdf\\\xc13\xdb?' -p3049 -tp3050 -Rp3051 -ag6 -(g10 -S'\x8e,\x03$\xf1\xa8\xd6?' -p3052 -tp3053 -Rp3054 -ag6 -(g10 -S'\x95\x9dLy=\xd0\xd1?' -p3055 -tp3056 -Rp3057 -ag6 -(g10 -S'\x83\xff\xeb\xcb\xa8\x08\xd7?' -p3058 -tp3059 -Rp3060 -ag6 -(g10 -S'\xb1\xc5\x13\xc1\xe2$\xd1?' -p3061 -tp3062 -Rp3063 -asg73 -(lp3064 -g6 -(g10 -S'Ha\xf6Q\x89I\xd4?' -p3065 -tp3066 -Rp3067 -ag6 -(g10 -S'\xde\xb7\xcb\xc4\xa2\xae\xd6?' -p3068 -tp3069 -Rp3070 -ag6 -(g10 -S'a\xf3M\xc4\xf9\xef\xda?' -p3071 -tp3072 -Rp3073 -ag6 -(g10 -S'X\xcfG\xf1\x0c\x99\xd7?' -p3074 -tp3075 -Rp3076 -ag6 -(g10 -S'O,\x08\xaa\xfc\x96\xd2?' -p3077 -tp3078 -Rp3079 -ag6 -(g10 -S'A\x81\xb2n\xf5\xdb\xd5?' -p3080 -tp3081 -Rp3082 -ag6 -(g10 -S"'\x9d|\x87\xe2\x16\xd7?" -p3083 -tp3084 -Rp3085 -ag6 -(g10 -S'B\xddiQ\x14\x8c\xde?' -p3086 -tp3087 -Rp3088 -ag6 -(g10 -S'\x02\xfd\x9aN\x02g\xce?' -p3089 -tp3090 -Rp3091 -ag6 -(g10 -S'o\xf5\x07\x83\xc5\x08\xd3?' -p3092 -tp3093 -Rp3094 -ag6 -(g10 -S'\xbf\x86\xdd\xb5\x19\x82\xd3?' -p3095 -tp3096 -Rp3097 -ag6 -(g10 -S')\x1e\xda\xd7\xf7?\xdd?' -p3098 -tp3099 -Rp3100 -ag6 -(g10 -S'\xa6.k*)\xf9\xd9?' -p3101 -tp3102 -Rp3103 -ag6 -(g10 -S'o\xa6#\x14\xf8\xc9\xeb?' -p3104 -tp3105 -Rp3106 -ag6 -(g10 -S'JVn\xab\x93d\xd7?' -p3107 -tp3108 -Rp3109 -ag6 -(g10 -S'\xf9\xf2\x19\xcb,\xea\xe6?' -p3110 -tp3111 -Rp3112 -ag6 -(g10 -S'\xff\xb9\xd7\x86u<\xe2?' -p3113 -tp3114 -Rp3115 -ag6 -(g10 -S'\x90\x11`\x97\xea\xb7\xd7?' -p3116 -tp3117 -Rp3118 -ag6 -(g10 -S' \n\x9fxQ\x0c\xe1?' -p3119 -tp3120 -Rp3121 -ag6 -(g10 -S'\x8b\x9a\xb7\xc8\x18\xfd\xd6?' -p3122 -tp3123 -Rp3124 -asS'Newton\nw Hessian ' -p3125 -(lp3126 -g6 -(g10 -S'\x9f\x17S\xe9\x15K\x1f?' -p3127 -tp3128 -Rp3129 -asg140 -(lp3130 -g6 -(g10 -S'\x15\xafs&=\x07\x1b@' -p3131 -tp3132 -Rp3133 -ag6 -(g10 -S'\xc7\xfc\x16*\xfd\x00\x1c@' -p3134 -tp3135 -Rp3136 -ag6 -(g10 -S'\xaf\x8a\x08K\xb4O\x1c@' -p3137 -tp3138 -Rp3139 -ag6 -(g10 -S'\xc4Vy\x0b\x00m\x1b@' -p3140 -tp3141 -Rp3142 -ag6 -(g10 -S'\xf7\x9c\x99\xe9\x02\xaa\x1b@' -p3143 -tp3144 -Rp3145 -ag6 -(g10 -S'\xa0\xe5L\xc6h;\x1c@' -p3146 -tp3147 -Rp3148 -ag6 -(g10 -S'X\xc6\xdfs\xfd\xa2\x1a@' -p3149 -tp3150 -Rp3151 -ag6 -(g10 -S'\x8a\x00\x81u\xd5\xfd\x1a@' -p3152 -tp3153 -Rp3154 -ag6 -(g10 -S'e\xcb\xbf\xb2\xc1\x9e\x1d@' -p3155 -tp3156 -Rp3157 -ag6 -(g10 -S'I\x05|>\x9d\xfb\x1b@' -p3158 -tp3159 -Rp3160 -ag6 -(g10 -S'\xe9\x9b6\xcc#\xea\x1d@' -p3161 -tp3162 -Rp3163 -ag6 -(g10 -S'\r\x19,\xdeM\xc5\x1a@' -p3164 -tp3165 -Rp3166 -ag6 -(g10 -S'W\xe6\xf8\xef\xb9\xc3\x1a@' -p3167 -tp3168 -Rp3169 -ag6 -(g10 -S'\x97\xc2H\x88\xfdL\x18@' -p3170 -tp3171 -Rp3172 -ag6 -(g10 -S'-\xb9\xa3\xe4o\x92\x1b@' -p3173 -tp3174 -Rp3175 -ag6 -(g10 -S'\xd2\xd9\x89\xe0\xfe\xf4\x18@' -p3176 -tp3177 -Rp3178 -ag6 -(g10 -S'\xd8\x98\xedb\x8d\xcf\x19@' -p3179 -tp3180 -Rp3181 -ag6 -(g10 -S'GoE\xe6Hl\x1a@' -p3182 -tp3183 -Rp3184 -ag6 -(g10 -S'\xefH\xae\xe9lq\x18@' -p3185 -tp3186 -Rp3187 -ag6 -(g10 -S'\xe2=OW\x0fZ\x1b@' -p3188 -tp3189 -Rp3190 -asg202 -(lp3191 -g6 -(g10 -S'\x8dCl\xe2gS\x89?' -p3192 -tp3193 -Rp3194 -ag6 -(g10 -S'\xd2b\x97V\xb3\xb4\x95?' -p3195 -tp3196 -Rp3197 -ag6 -(g10 -S'1d{u(\xf9\x9a?' -p3198 -tp3199 -Rp3200 -ag6 -(g10 -S'\xcbSN\xf9\xe0\xe7\x96?' -p3201 -tp3202 -Rp3203 -ag6 -(g10 -S'#\x9d\xea.\xb5\xe0\x91?' -p3204 -tp3205 -Rp3206 -ag6 -(g10 -S'\xe4d\xab\x17\xc7&\x95?' -p3207 -tp3208 -Rp3209 -ag6 -(g10 -S'\x92d\xc8\x94p\xa4\x8c?' -p3210 -tp3211 -Rp3212 -ag6 -(g10 -S'\t\x99;A\xd5\xf8\x92?' -p3213 -tp3214 -Rp3215 -ag6 -(g10 -S'\\\x86\x85\xc5b\x08\x83?' -p3216 -tp3217 -Rp3218 -ag6 -(g10 -S'\x91g\x02\xbfH\x84\x92?' -p3219 -tp3220 -Rp3221 -ag6 -(g10 -S'D\x05\xee?\x94%\x88?' -p3222 -tp3223 -Rp3224 -ag6 -(g10 -S'\xa9h\xb4\x10\x19\x1e\x92?' -p3225 -tp3226 -Rp3227 -ag6 -(g10 -S'\xdb\xf5\x05?\xe92\x99?' -p3228 -tp3229 -Rp3230 -ag6 -(g10 -S'\xe8\xc0\xf2E"H\xa1?' -p3231 -tp3232 -Rp3233 -ag6 -(g10 -S'o\xb6\xe03i{\x96?' -p3234 -tp3235 -Rp3236 -ag6 -(g10 -S"'\x19\x90\xea-\xf5\xa5?" -p3237 -tp3238 -Rp3239 -ag6 -(g10 -S'\xddf\xf7\xe8\xb8s\x96?' -p3240 -tp3241 -Rp3242 -ag6 -(g10 -S'C\n\xec\xd6\xfa\xd0\x96?' -p3243 -tp3244 -Rp3245 -ag6 -(g10 -S'GYL:V\xcf\x94?' -p3246 -tp3247 -Rp3248 -ag6 -(g10 -S"z\x14rC\xe9'\x96?" -p3249 -tp3250 -Rp3251 -asg264 -(lp3252 -g6 -(g10 -S'\x8c)\xaanc$\xef?' -p3253 -tp3254 -Rp3255 -ag6 -(g10 -S'\x8d\x8a\x0e70\xc6\xf0?' -p3256 -tp3257 -Rp3258 -ag6 -(g10 -S'\xc1\xb7\xd81\xdbk\xed?' -p3259 -tp3260 -Rp3261 -ag6 -(g10 -S'yrr\xedV\x7f\xef?' -p3262 -tp3263 -Rp3264 -ag6 -(g10 -S'\xa9B\xbc*qm\xf0?' -p3265 -tp3266 -Rp3267 -ag6 -(g10 -S'~H\xc0\xcc\xf6\xde\xe9?' -p3268 -tp3269 -Rp3270 -ag6 -(g10 -S'\xf5~\x9eCG\x17\xf0?' -p3271 -tp3272 -Rp3273 -ag6 -(g10 -S'\x9b\xd2\xec\xcb3\xa0\xed?' -p3274 -tp3275 -Rp3276 -ag6 -(g10 -S'\x87\xaf\xfe6\xa4\xe3\xe7?' -p3277 -tp3278 -Rp3279 -ag6 -(g10 -S'X\xd6AX\x1e\x80\xeb?' -p3280 -tp3281 -Rp3282 -ag6 -(g10 -S'W\xd8\x92\xba\x07\x93\xe5?' -p3283 -tp3284 -Rp3285 -ag6 -(g10 -S'N"\xfe\xb0P\xa5\xf0?' -p3286 -tp3287 -Rp3288 -ag6 -(g10 -S'yHs[z\x1a\xf1?' -p3289 -tp3290 -Rp3291 -ag6 -(g10 -S'\x02Q\xedF\x8eB\xf0?' -p3292 -tp3293 -Rp3294 -ag6 -(g10 -S'\xdd\xc3F5\xda\xd3\xef?' -p3295 -tp3296 -Rp3297 -ag6 -(g10 -S'r3\xf8\x19\x98\xd2\xef?' -p3298 -tp3299 -Rp3300 -ag6 -(g10 -S"q\x99$'\x11*\xf2?" -p3301 -tp3302 -Rp3303 -ag6 -(g10 -S'\xb5G\xd1\x15c\x1d\xf2?' -p3304 -tp3305 -Rp3306 -ag6 -(g10 -S'\xc8\xf43R*\xc7\xf3?' -p3307 -tp3308 -Rp3309 -ag6 -(g10 -S'1\xefU$\xb3\xea\xf0?' -p3310 -tp3311 -Rp3312 -asS"L-BFGS \nw f'" -p3313 -(lp3314 -g6 -(g10 -S'\xf1\x90\x85\xa14-\xb0?' -p3315 -tp3316 -Rp3317 -ag6 -(g10 -S'\xddT\x16\x93\x90q\xaa?' -p3318 -tp3319 -Rp3320 -ag6 -(g10 -S'\x1a\xbd9\xd2\x851\xa8?' -p3321 -tp3322 -Rp3323 -ag6 -(g10 -S'\x87\xe5\n\xb1se\xb0?' -p3324 -tp3325 -Rp3326 -ag6 -(g10 -S'`\x01\xa1\xa5\x922\xac?' -p3327 -tp3328 -Rp3329 -ag6 -(g10 -S"\xfa'u|\xfcc\xa9?" -p3330 -tp3331 -Rp3332 -ag6 -(g10 -S'(Z\x07\xb5\x03\xe5\xae?' -p3333 -tp3334 -Rp3335 -ag6 -(g10 -S'\x1bW_KD\xb9\xb0?' -p3336 -tp3337 -Rp3338 -ag6 -(g10 -S'\x01-"\xcb\xe3s\xa5?' -p3339 -tp3340 -Rp3341 -ag6 -(g10 -S'\xf0\xea\xa1\xd5\xc4T\xaf?' -p3342 -tp3343 -Rp3344 -ag6 -(g10 -S's\xbf\x18\\.X\xa4?' -p3345 -tp3346 -Rp3347 -ag6 -(g10 -S'\xc9\x99\x88(\xa6*\xb0?' -p3348 -tp3349 -Rp3350 -ag6 -(g10 -S'\xa9MO>C\x10\xaf?' -p3351 -tp3352 -Rp3353 -ag6 -(g10 -S'\x11v\xb4\x19\xf6L\xad?' -p3354 -tp3355 -Rp3356 -ag6 -(g10 -S'\x92u2\x01h\xf2\xab?' -p3357 -tp3358 -Rp3359 -ag6 -(g10 -S'\x93?\xa2\xcf\xf2\xb1\xac?' -p3360 -tp3361 -Rp3362 -ag6 -(g10 -S'\xa9\xe7@S\xeb\x91\xb0?' -p3363 -tp3364 -Rp3365 -ag6 -(g10 -S'\x1f\xfb\x10V\xc5\xa9\xb0?' -p3366 -tp3367 -Rp3368 -ag6 -(g10 -S'<\x83\x04\xb9\xa1\xe6\xaf?' -p3369 -tp3370 -Rp3371 -ag6 -(g10 -S'#Q)x\xc4{\xae?' -p3372 -tp3373 -Rp3374 -asS"Conjugate gradient\nw f'" -p3375 -(lp3376 -g6 -(g10 -S'\x08>\x1a\xe7\xea\xf5\xe3?' -p3377 -tp3378 -Rp3379 -ag6 -(g10 -S'\xa4\x18\xe6\x1e\x9a\xef\xd0?' -p3380 -tp3381 -Rp3382 -ag6 -(g10 -S'\x15\xcf \xc77\x02\xce?' -p3383 -tp3384 -Rp3385 -ag6 -(g10 -S'f\xef\x8d\xffZg\xda?' -p3386 -tp3387 -Rp3388 -ag6 -(g10 -S"'\xee\xd3d\x80\x8e\xdc?" -p3389 -tp3390 -Rp3391 -ag6 -(g10 -S'`!6\x91\xf2E\xde?' -p3392 -tp3393 -Rp3394 -ag6 -(g10 -S'\x1dL}n\xe1\xa3\xe3?' -p3395 -tp3396 -Rp3397 -ag6 -(g10 -S'\x8a\xcdO\xf83\xc8\xda?' -p3398 -tp3399 -Rp3400 -ag6 -(g10 -S'\xb1m\xd9\x93\x99\xa3\xd7?' -p3401 -tp3402 -Rp3403 -ag6 -(g10 -S'E\x83D_\xa5\xeb\xe0?' -p3404 -tp3405 -Rp3406 -ag6 -(g10 -S'\xf4\xba\xf9n\xc1Y\xd1?' -p3407 -tp3408 -Rp3409 -ag6 -(g10 -S'\xb3\r\xe0\x8e\xa8\x8f\xd9?' -p3410 -tp3411 -Rp3412 -ag6 -(g10 -S'%[\xc9\xa7\xc1P\xdb?' -p3413 -tp3414 -Rp3415 -ag6 -(g10 -S'\x1f\xd1\xf8\xbb\x8cN\xdc?' -p3416 -tp3417 -Rp3418 -ag6 -(g10 -S'lN(\xe7\xae\x01\xda?' -p3419 -tp3420 -Rp3421 -ag6 -(g10 -S'\x82\x00\x051\xad\x07\xe0?' -p3422 -tp3423 -Rp3424 -ag6 -(g10 -S'\xa1\xdb\xedz-X\xd8?' -p3425 -tp3426 -Rp3427 -ag6 -(g10 -S'\x0f|\xa6v\xbbK\xe0?' -p3428 -tp3429 -Rp3430 -ag6 -(g10 -S'\x0c\xd1z\xabv\x01\xe5?' -p3431 -tp3432 -Rp3433 -ag6 -(g10 -S'o\x911\xfa\xad[\xd8?' -p3434 -tp3435 -Rp3436 -asS"BFGS\nw f'" -p3437 -(lp3438 -g6 -(g10 -S'\xab\x1e\xf8:\xa4\xdf\x8c?' -p3439 -tp3440 -Rp3441 -ag6 -(g10 -S'\xe5]\xbf\xccn|\x8d?' -p3442 -tp3443 -Rp3444 -ag6 -(g10 -S'Adm \xae4\x8f?' -p3445 -tp3446 -Rp3447 -ag6 -(g10 -S')\x95c?\x1f\x9c\x90?' -p3448 -tp3449 -Rp3450 -ag6 -(g10 -S'3\xe9s\xc6ZB\x8c?' -p3451 -tp3452 -Rp3453 -ag6 -(g10 -S'\xe7\n\xa0.\xc0V\x8c?' -p3454 -tp3455 -Rp3456 -ag6 -(g10 -S'\xd7\x0c\xf482O\x90?' -p3457 -tp3458 -Rp3459 -ag6 -(g10 -S'\xfa%m\xaa\x92\xc0\x93?' -p3460 -tp3461 -Rp3462 -ag6 -(g10 -S'\xa5\xf2n\xad\x02\xb0\x85?' -p3463 -tp3464 -Rp3465 -ag6 -(g10 -S'\xfa=\xd6\xe6\xa0G\x8b?' -p3466 -tp3467 -Rp3468 -ag6 -(g10 -S'\x83\xf1\x8c0\n\t\x89?' -p3469 -tp3470 -Rp3471 -ag6 -(g10 -S'\xdb<\xc7\x8b\xc3\xc8\x92?' -p3472 -tp3473 -Rp3474 -ag6 -(g10 -S'mR\x1b,\x9fW\x92?' -p3475 -tp3476 -Rp3477 -ag6 -(g10 -S'\xb5*z1\xda\xe3\x9c?' -p3478 -tp3479 -Rp3480 -ag6 -(g10 -S':\x92\x9a\xefs\x14\x8e?' -p3481 -tp3482 -Rp3483 -ag6 -(g10 -S'\xe2G\x8dh\xcd\xa9\x99?' -p3484 -tp3485 -Rp3486 -ag6 -(g10 -S'\xec\xd0\x14{;`\x95?' -p3487 -tp3488 -Rp3489 -ag6 -(g10 -S'\x10\x9c\xf6\xf0\x0e\xcc\x90?' -p3490 -tp3491 -Rp3492 -ag6 -(g10 -S'^\x00\x0fg\x81\xb9\x95?' -p3493 -tp3494 -Rp3495 -ag6 -(g10 -S'\xf4,\xc9\x11\x8a*\x90?' -p3496 -tp3497 -Rp3498 -assg1010 -(dp3499 -g4 -(lp3500 -g6 -(g10 -S'\x19\x12\x084\x97\xb5\xf2?' -p3501 -tp3502 -Rp3503 -ag6 -(g10 -S'm\xcc\x96`\x14)\xe0?' -p3504 -tp3505 -Rp3506 -ag6 -(g10 -S'\xa1l\xde\xd6\xda\x03\xec?' -p3507 -tp3508 -Rp3509 -ag6 -(g10 -S'T\xb6\x15:\x02\xd8\xe8?' -p3510 -tp3511 -Rp3512 -ag6 -(g10 -S'47\x9d\x013\xb2\xd8?' -p3513 -tp3514 -Rp3515 -ag6 -(g10 -S's\x11\xb7\xbd\x95\x02\xe4?' -p3516 -tp3517 -Rp3518 -ag6 -(g10 -S'I\xa4\xddXV\x15\xed?' -p3519 -tp3520 -Rp3521 -ag6 -(g10 -S'9(\xad\n\xdd\xfa\xd9?' -p3522 -tp3523 -Rp3524 -ag6 -(g10 -S'\xa6\xc8g\xdd`\x8a\xf0?' -p3525 -tp3526 -Rp3527 -ag6 -(g10 -S'\x0c\xb7leI\xe6\xf1?' -p3528 -tp3529 -Rp3530 -ag6 -(g10 -S'\x05\xf5\xcfm\xe0\xb5\xdb?' -p3531 -tp3532 -Rp3533 -ag6 -(g10 -S'\x9d\x83\xe6b|\x19\xdd?' -p3534 -tp3535 -Rp3536 -ag6 -(g10 -S'\x83\xa1(\x84\x0f\xb4\xd4?' -p3537 -tp3538 -Rp3539 -ag6 -(g10 -S'\x98\x12\xc1#\xfd\xf5\xec?' -p3540 -tp3541 -Rp3542 -ag6 -(g10 -S'\xdb\x95\xa8]\x89\xda\xed?' -p3543 -tp3544 -Rp3545 -ag6 -(g10 -S'\x89\xbe\xea\x14\xa7\xc5\xee?' -p3546 -tp3547 -Rp3548 -ag6 -(g10 -S'2\x9d\xba\xc8\x1b\xff\xf1?' -p3549 -tp3550 -Rp3551 -ag6 -(g10 -S'f\x03G.B\x10\xe9?' -p3552 -tp3553 -Rp3554 -ag6 -(g10 -S')\xeak\xccE\x8b\xf0?' -p3555 -tp3556 -Rp3557 -ag6 -(g10 -S'#_zo\r\x9b\xe5?' -p3558 -tp3559 -Rp3560 -asg73 -(lp3561 -g6 -(g10 -S'\x81m\xd3\xda\x1f\x95\xfb?' -p3562 -tp3563 -Rp3564 -ag6 -(g10 -S'J\xc7\x9c\x08\x8b\xc2\xf6?' -p3565 -tp3566 -Rp3567 -ag6 -(g10 -S'\xedg\xf8\x14\xe0\xfb\x02@' -p3568 -tp3569 -Rp3570 -ag6 -(g10 -S'\x83\x80\xb1A\xe4|\xfa?' -p3571 -tp3572 -Rp3573 -ag6 -(g10 -S'\xbf\xde~\x0fPH\xf0?' -p3574 -tp3575 -Rp3576 -ag6 -(g10 -S'\x1dC\x05+v\x00\xff?' -p3577 -tp3578 -Rp3579 -ag6 -(g10 -S'\x18/\x03W+P\x03@' -p3580 -tp3581 -Rp3582 -ag6 -(g10 -S'\x1b\x8d\xa8\xf7\\4\xf1?' -p3583 -tp3584 -Rp3585 -ag6 -(g10 -S'\xfe\x88\x05\xdc\xe9\x0f\xff?' -p3586 -tp3587 -Rp3588 -ag6 -(g10 -S'\xcd\xac|\x11\x91\xfb\xf1?' -p3589 -tp3590 -Rp3591 -ag6 -(g10 -S'\xce\x04\xf5\xcfm\xe0\xed?' -p3592 -tp3593 -Rp3594 -ag6 -(g10 -S'\x99\xecRf\x13\xad\t@' -p3595 -tp3596 -Rp3597 -ag6 -(g10 -S'\xd3\xcd0\xcb\xcax\xed?' -p3598 -tp3599 -Rp3600 -ag6 -(g10 -S'x:\xe31\xc5H\x01@' -p3601 -tp3602 -Rp3603 -ag6 -(g10 -S'sd\xd2\xd7\xab\xa9\xfd?' -p3604 -tp3605 -Rp3606 -ag6 -(g10 -S'\x94\xe5G!\xd9D\x05@' -p3607 -tp3608 -Rp3609 -ag6 -(g10 -S'\x86[H;\xc3\xba\x05@' -p3610 -tp3611 -Rp3612 -ag6 -(g10 -S'\x1c\xfc\xdc\xc4\xebI\x01@' -p3613 -tp3614 -Rp3615 -ag6 -(g10 -S'\xfeR\x7f\xb3\xf8\xf8\x05@' -p3616 -tp3617 -Rp3618 -ag6 -(g10 -S'\x1a\xb8E@\x0fV\xf3?' -p3619 -tp3620 -Rp3621 -asS'Newton\nw Hessian ' -p3622 -(lp3623 -g6 -(g10 -S'\xf7\x99X\x0c^=w?' -p3624 -tp3625 -Rp3626 -asg140 -(lp3627 -g6 -(g10 -S'R\xd8\xea-\x03$\t@' -p3628 -tp3629 -Rp3630 -ag6 -(g10 -S'\xb9\xb7\xefp\x89?\x11@' -p3631 -tp3632 -Rp3633 -ag6 -(g10 -S'wx{M\x86\xa4\x04@' -p3634 -tp3635 -Rp3636 -ag6 -(g10 -S'\xa1\xc9e\x91\x17\xee\x10@' -p3637 -tp3638 -Rp3639 -ag6 -(g10 -S'\xc3\x9a\x9f8O\x19\x14@' -p3640 -tp3641 -Rp3642 -ag6 -(g10 -S'\xc9\x86\x1c\x1fs\x1d\x11@' -p3643 -tp3644 -Rp3645 -ag6 -(g10 -S'&\xb7\x16\xa6\xab,\xfc?' -p3646 -tp3647 -Rp3648 -ag6 -(g10 -S'6\xa9\x8c\x01\xe3\xcd\x14@' -p3649 -tp3650 -Rp3651 -ag6 -(g10 -S'U\xd2\xa8\xb6\\e\x04@' -p3652 -tp3653 -Rp3654 -ag6 -(g10 -S'#\x0e\xe5.h1\x11@' -p3655 -tp3656 -Rp3657 -ag6 -(g10 -S'\xcbx\x10H[/\x18@' -p3658 -tp3659 -Rp3660 -ag6 -(g10 -S':h.\xc6\x97\xd1\xe9?' -p3661 -tp3662 -Rp3663 -ag6 -(g10 -S',\x03\x90\xda\xa6`\x17@' -p3664 -tp3665 -Rp3666 -ag6 -(g10 -S'\x8eo\x86\xd6\xee\xc3\r@' -p3667 -tp3668 -Rp3669 -ag6 -(g10 -S'\xdfV@\xdd\x7f\xa7\x00@' -p3670 -tp3671 -Rp3672 -ag6 -(g10 -S'\xbe\xa6\x81\xebm\xb2\x03@' -p3673 -tp3674 -Rp3675 -ag6 -(g10 -S'\x95\xfcT\x0c\xb7\x90\x06@' -p3676 -tp3677 -Rp3678 -ag6 -(g10 -S'N\xf3!\t\xa3\x19\xfa?' -p3679 -tp3680 -Rp3681 -ag6 -(g10 -S'\xb3\xe4\x86?\x17\xae\x04@' -p3682 -tp3683 -Rp3684 -ag6 -(g10 -S'""""""\x12@' -p3685 -tp3686 -Rp3687 -asg202 -(lp3688 -g6 -(g10 -S'\xea]\x00\x0eA\x08\xfb?' -p3689 -tp3690 -Rp3691 -ag6 -(g10 -S'\xeb\\\xd1\xde\xd8#\xed?' -p3692 -tp3693 -Rp3694 -ag6 -(g10 -S'aA`/c#\x02@' -p3695 -tp3696 -Rp3697 -ag6 -(g10 -S'\xf3\xe6\x818i\xde\xf5?' -p3698 -tp3699 -Rp3700 -ag6 -(g10 -S'iK0\xa4%\xed\xe3?' -p3701 -tp3702 -Rp3703 -ag6 -(g10 -S'\xa2\x17\x00\x83!\xc1\xf4?' -p3704 -tp3705 -Rp3706 -ag6 -(g10 -S'\x0e\xe5\x04k\x1c>\x07@' -p3707 -tp3708 -Rp3709 -ag6 -(g10 -S'W\x11\xdb;,\x8c\xf4?' -p3710 -tp3711 -Rp3712 -ag6 -(g10 -S'\xd01\xbc8\x0f\x1d\xf5?' -p3713 -tp3714 -Rp3715 -ag6 -(g10 -S'\xab\x08\x90\xe4`\x15\xf6?' -p3716 -tp3717 -Rp3718 -ag6 -(g10 -S'MP\xff\xdc\x06^\xeb?' -p3719 -tp3720 -Rp3721 -ag6 -(g10 -S'/\x04\xd5\xd0\xfb*\x0f@' -p3722 -tp3723 -Rp3724 -ag6 -(g10 -S'\xb4PC\x01b\xe8\xf0?' -p3725 -tp3726 -Rp3727 -ag6 -(g10 -S'Z\xa9\xc1\x96\x00s\xf3?' -p3728 -tp3729 -Rp3730 -ag6 -(g10 -S'\xf9\xb6\x02\xea\xfe;\x00@' -p3731 -tp3732 -Rp3733 -ag6 -(g10 -S'\xd2\t\xb4\xae?\xf0\xfd?' -p3734 -tp3735 -Rp3736 -ag6 -(g10 -S'"\xe2J\x98\x8d\xe5\xf5?' -p3737 -tp3738 -Rp3739 -ag6 -(g10 -S'*\x86\x9c\xb6\xff\xd4\xfd?' -p3740 -tp3741 -Rp3742 -ag6 -(g10 -S'\xec\x7f>\x84\x8eL\xf8?' -p3743 -tp3744 -Rp3745 -ag6 -(g10 -S'J\xf3\xe2O$\x9c\xf2?' -p3746 -tp3747 -Rp3748 -asg264 -(lp3749 -g6 -(g10 -S'i\x1d\x8d\xb4\x15g\xee?' -p3750 -tp3751 -Rp3752 -ag6 -(g10 -S'\xfa\x1a\xda5\xdb{\xf9?' -p3753 -tp3754 -Rp3755 -ag6 -(g10 -S'n\x14\xce\xfe\x0e+\xe3?' -p3756 -tp3757 -Rp3758 -ag6 -(g10 -S'\x8b\xa15R&\x85\xe3?' -p3759 -tp3760 -Rp3761 -ag6 -(g10 -S'\x90\xbd(\xfd\x9b\xd0\xf5?' -p3762 -tp3763 -Rp3764 -ag6 -(g10 -S'8\xaf>u\xe2?\xdc?' -p3765 -tp3766 -Rp3767 -ag6 -(g10 -S'\xc0\xef\xc1\x8d\xabr\xe9?' -p3768 -tp3769 -Rp3770 -ag6 -(g10 -S'2\x83Wi\x81\xbb\xd6?' -p3771 -tp3772 -Rp3773 -ag6 -(g10 -S'"\x9fu\x83)\xf2\xfc?' -p3774 -tp3775 -Rp3776 -ag6 -(g10 -S'\x1d\x1d\xf5\xd0&\x9c\xe6?' -p3777 -tp3778 -Rp3779 -ag6 -(g10 -S'\xf78Y\xc6\x83@\xca?' -p3780 -tp3781 -Rp3782 -ag6 -(g10 -S'\x9d\x83\xe6b|\x19\xdd?' -p3783 -tp3784 -Rp3785 -ag6 -(g10 -S'`\xf2\xde\xc7b\xf1\xd1?' -p3786 -tp3787 -Rp3788 -ag6 -(g10 -S'\xe8*\x96\x8do\x86\xe6?' -p3789 -tp3790 -Rp3791 -ag6 -(g10 -S'\xdb\x95\xa8]\x89\xda\xfd?' -p3792 -tp3793 -Rp3794 -ag6 -(g10 -S'\xde\x99i\x0f\x96\xac\xe6?' -p3795 -tp3796 -Rp3797 -ag6 -(g10 -S'\x98\x8d\xe5\xf5\x8b\xd6\xe4?' -p3798 -tp3799 -Rp3800 -ag6 -(g10 -S'\x8dB\xb5\xa21\xcc\x02@' -p3801 -tp3802 -Rp3803 -ag6 -(g10 -S'\x82\\/lQ\xe2\xea?' -p3804 -tp3805 -Rp3806 -ag6 -(g10 -S'w\xb8\x04\xb3\xd3\xf9\xe0?' -p3807 -tp3808 -Rp3809 -asS"L-BFGS \nw f'" -p3810 -(lp3811 -g6 -(g10 -S'\xebI\x854\xff\xb6\xad?' -p3812 -tp3813 -Rp3814 -ag6 -(g10 -S'\xde7\xb5\xd8dm\xb2?' -p3815 -tp3816 -Rp3817 -ag6 -(g10 -S'\xd5\x1b\x0e\xb2E\xf0\xb7?' -p3818 -tp3819 -Rp3820 -ag6 -(g10 -S'\x8e\xdc\x91D\xfb4\xa3?' -p3821 -tp3822 -Rp3823 -ag6 -(g10 -S"' \xa6/\xf6w\x96?" -p3824 -tp3825 -Rp3826 -ag6 -(g10 -S'\x0f\xf2\xba;\x15\xb2\x9b?' -p3827 -tp3828 -Rp3829 -ag6 -(g10 -S'/\x1b\xccKo\xce\xa8?' -p3830 -tp3831 -Rp3832 -ag6 -(g10 -S'\xf4G*\x98\xcb(\x96?' -p3833 -tp3834 -Rp3835 -ag6 -(g10 -S'\xc1\x14\xf9\xac\x1bL\xb9?' -p3836 -tp3837 -Rp3838 -ag6 -(g10 -S'l\xfe\x9f\x90\xa8*\xa6?' -p3839 -tp3840 -Rp3841 -ag6 -(g10 -S'\x9b\xe6\xf0\xfd\x96\x14\x8a?' -p3842 -tp3843 -Rp3844 -ag6 -(g10 -S' ,\x0c\xe0\xd3\xf3\x9f?' -p3845 -tp3846 -Rp3847 -ag6 -(g10 -S'\x9fX\x10\x8ev\x89\x91?' -p3848 -tp3849 -Rp3850 -ag6 -(g10 -S'\xc2a\x92<\x10\xf5\xa5?' -p3851 -tp3852 -Rp3853 -ag6 -(g10 -S'\x1d\xfb\x8d\x9b-(\xa4?' -p3854 -tp3855 -Rp3856 -ag6 -(g10 -S'\xfcB\xc7\x01%\xc5\xb8?' -p3857 -tp3858 -Rp3859 -ag6 -(g10 -S'_\x80\xe0;\xf7\x80\xa4?' -p3860 -tp3861 -Rp3862 -ag6 -(g10 -S'V\xbf@9\x1e\xda\xa6?' -p3863 -tp3864 -Rp3865 -ag6 -(g10 -S'\x9b(o\x08\x9cF\xaa?' -p3866 -tp3867 -Rp3868 -ag6 -(g10 -S'\xa9\xae\xdf\x98\x1b\xb4\xa0?' -p3869 -tp3870 -Rp3871 -asS"Conjugate gradient\nw f'" -p3872 -(lp3873 -g6 -(g10 -S'7&\xdd\xcd\xdb\xef\xc7?' -p3874 -tp3875 -Rp3876 -ag6 -(g10 -S'\xa8\xde\x19\x94\x97(\xc3?' -p3877 -tp3878 -Rp3879 -ag6 -(g10 -S'\x8f\xe6\xd3\xf7\x13\x9a\xc3?' -p3880 -tp3881 -Rp3882 -ag6 -(g10 -S'\xf9e\xa3\xe9\x86\x1e\xd1?' -p3883 -tp3884 -Rp3885 -ag6 -(g10 -S'\x03=\xbb\x1c\xbf0\xe1?' -p3886 -tp3887 -Rp3888 -ag6 -(g10 -S'\x07mW\x99\xf7n\xd6?' -p3889 -tp3890 -Rp3891 -ag6 -(g10 -S'\xbc\xdfT\xd2\xe2\xf1\xba?' -p3892 -tp3893 -Rp3894 -ag6 -(g10 -S'\xd0\xc0TS2!\xe4?' -p3895 -tp3896 -Rp3897 -ag6 -(g10 -S'e\xefkBP\xf6\xc6?' -p3898 -tp3899 -Rp3900 -ag6 -(g10 -S"\xcd'8\xbc\xdfJ\xd0?" -p3901 -tp3902 -Rp3903 -ag6 -(g10 -S'\x06;\xc2\xb1i\x0e\xdf?' -p3904 -tp3905 -Rp3906 -ag6 -(g10 -S'\x19\xfe\xe4\xe6\x01\x1b\xb9?' -p3907 -tp3908 -Rp3909 -ag6 -(g10 -S'\x94\x9d*\x1eX\x1c\xe1?' -p3910 -tp3911 -Rp3912 -ag6 -(g10 -S'g\xc9\xa2\x9b\x02k\xc8?' -p3913 -tp3914 -Rp3915 -ag6 -(g10 -S'<\xcd\xb4z\x84h\xc1?' -p3916 -tp3917 -Rp3918 -ag6 -(g10 -S'\xde\x99i\x0f\x96\xac\xc6?' -p3919 -tp3920 -Rp3921 -ag6 -(g10 -S'm\x07\x8a\xadP\x13\xca?' -p3922 -tp3923 -Rp3924 -ag6 -(g10 -S'\xd7tQq\xd3\xbf\xbc?' -p3925 -tp3926 -Rp3927 -ag6 -(g10 -S't^S\x06\xf5\xb4\xc3?' -p3928 -tp3929 -Rp3930 -ag6 -(g10 -S'\xb4\xcf\xc4b\xf0\xea\xe9?' -p3931 -tp3932 -Rp3933 -asS"BFGS\nw f'" -p3934 -(lp3935 -g6 -(g10 -S'\x82\x025g\xb8(\xb2?' -p3936 -tp3937 -Rp3938 -ag6 -(g10 -S'\x0f\xf2\xc8\x05\xf6\x96\x9f?' -p3939 -tp3940 -Rp3941 -ag6 -(g10 -S'\x8c\xd1\x04\xb3\x9c\x0f\xab?' -p3942 -tp3943 -Rp3944 -ag6 -(g10 -S'Z,\xce\x1e\xac7\xa8?' -p3945 -tp3946 -Rp3947 -ag6 -(g10 -S'g\xed\x9f\x0f\xa6\x04\x98?' -p3948 -tp3949 -Rp3950 -ag6 -(g10 -S'\xa4\xdc\x02\xdd\x0ec\xa3?' -p3951 -tp3952 -Rp3953 -ag6 -(g10 -S'\xdd\x88@V[:\xac?' -p3954 -tp3955 -Rp3956 -ag6 -(g10 -S'\x92.\xc6\xf3?7\x99?' -p3957 -tp3958 -Rp3959 -ag6 -(g10 -S'\x9d\xfe\x88\x05\xdc\xe9\xaf?' -p3960 -tp3961 -Rp3962 -ag6 -(g10 -S'\xda\xac\xf7\xcc;J\xb1?' -p3963 -tp3964 -Rp3965 -ag6 -(g10 -S'\x0b0\x92\x1fJ\xc4\x9a?' -p3966 -tp3967 -Rp3968 -ag6 -(g10 -S' ,\x0c\xe0\xd3\xf3\x9f?' -p3969 -tp3970 -Rp3971 -ag6 -(g10 -S't\xfd\xa0\x99\x91"\x94?' -p3972 -tp3973 -Rp3974 -ag6 -(g10 -S'Y\xc3\xba\x9c\xb3\x03\xac?' -p3975 -tp3976 -Rp3977 -ag6 -(g10 -S'hm\xa3:X\xb5\xac?' -p3978 -tp3979 -Rp3980 -ag6 -(g10 -S'\xfb\xe9\xbb\x9b_\xb9\xad?' -p3981 -tp3982 -Rp3983 -ag6 -(g10 -S'\x9e\xaf\x06\xc95b\xb1?' -p3984 -tp3985 -Rp3986 -ag6 -(g10 -S'\xb6\xecD\x87\x8bS\xa8?' -p3987 -tp3988 -Rp3989 -ag6 -(g10 -S"\t'\xd2\xaf\xb4\x0e\xb0?" -p3990 -tp3991 -Rp3992 -ag6 -(g10 -S'\x87K0;\x9d\x0f\xa5?' -p3993 -tp3994 -Rp3995 -assg1508 -(dp3996 -g4 -(lp3997 -g6 -(g10 -S'\x84\x02\xb1\xfb\xab\x99\xe4?' -p3998 -tp3999 -Rp4000 -ag6 -(g10 -S'h\xc3`\xf2|6\xe4?' -p4001 -tp4002 -Rp4003 -ag6 -(g10 -S'Vv\xa5\x87\xc9\x12\xe6?' -p4004 -tp4005 -Rp4006 -ag6 -(g10 -S'\x0e\xb5\xf2\x81]\x88\xe1?' -p4007 -tp4008 -Rp4009 -ag6 -(g10 -S'\x9c\xde\xf4\xa67\xbd\xe1?' -p4010 -tp4011 -Rp4012 -ag6 -(g10 -S'\xc4Y\xde\xe4\xff=\xf2?' -p4013 -tp4014 -Rp4015 -ag6 -(g10 -S'\x8dyO\x19\xca\xe1\xdb?' -p4016 -tp4017 -Rp4018 -ag6 -(g10 -S'ea\x997!\xfe\xdb?' -p4019 -tp4020 -Rp4021 -ag6 -(g10 -S'\xa6\xb1\xc5?\xcar\xf2?' -p4022 -tp4023 -Rp4024 -ag6 -(g10 -S'\xb3\xa9\xd6\xd8\xf5\xcd\xf4?' -p4025 -tp4026 -Rp4027 -ag6 -(g10 -S'\x00\xa4+\xde\xb0\x9b\xd8?' -p4028 -tp4029 -Rp4030 -ag6 -(g10 -S'\x97\xbfd\xf9K\x96\xcf?' -p4031 -tp4032 -Rp4033 -ag6 -(g10 -S'\x04\xdbS"\x1d\x12\xdd?' -p4034 -tp4035 -Rp4036 -ag6 -(g10 -S'\xa1\xb6N\xc0n\xc3\xdb?' -p4037 -tp4038 -Rp4039 -ag6 -(g10 -S'\x98\xb1\x9d\xad\xac\x12\xdc?' -p4040 -tp4041 -Rp4042 -ag6 -(g10 -S'\xd3X\xf9\x9dH>\xe0?' -p4043 -tp4044 -Rp4045 -ag6 -(g10 -S'$\x05\xb9\x04\xaa\xe8\xe7?' -p4046 -tp4047 -Rp4048 -ag6 -(g10 -S'\xa8\\\x8d\xca\xd5\xa8\xdc?' -p4049 -tp4050 -Rp4051 -ag6 -(g10 -S'\x0eO3\xf1\x0f\xbb\xe2?' -p4052 -tp4053 -Rp4054 -ag6 -(g10 -S'\xf0\xdd\xdc\xeb\x19\x95\xe8?' -p4055 -tp4056 -Rp4057 -asg73 -(lp4058 -g6 -(g10 -S'M\xda\xa0@\xec\xfe\xea?' -p4059 -tp4060 -Rp4061 -ag6 -(g10 -S'\x8a\x9a\xd7\x95\xa1\xa8\xf4?' -p4062 -tp4063 -Rp4064 -ag6 -(g10 -S'\xfa\x03\xc4~\xee\xc1\xff?' -p4065 -tp4066 -Rp4067 -ag6 -(g10 -S'X\x960\x1cI\xb5\xed?' -p4068 -tp4069 -Rp4070 -ag6 -(g10 -S'\x9c\xde\xf4\xa67\xbd\xf3?' -p4071 -tp4072 -Rp4073 -ag6 -(g10 -S'\xb6\x80\xdd\xd4\x91\xc0\n@' -p4074 -tp4075 -Rp4076 -ag6 -(g10 -S'\xcdG"\x01\x98\xaf\xf0?' -p4077 -tp4078 -Rp4079 -ag6 -(g10 -S'\x88\x8d.n\x14\x83\xf0?' -p4080 -tp4081 -Rp4082 -ag6 -(g10 -S'U\xc3\x8bP\xb9N\x03@' -p4083 -tp4084 -Rp4085 -ag6 -(g10 -S'e\x95r\xa2 \xb3\x00@' -p4086 -tp4087 -Rp4088 -ag6 -(g10 -S'o \xa5HC?\xec?' -p4089 -tp4090 -Rp4091 -ag6 -(g10 -S'\xe9\t\x8e\x9e\xe0\xe8\x18@' -p4092 -tp4093 -Rp4094 -ag6 -(g10 -S'K\xa4C\xa2\x13\x81\xf1?' -p4095 -tp4096 -Rp4097 -ag6 -(g10 -S'\x1b\x96#\xd3\xd8\xae\xf0?' -p4098 -tp4099 -Rp4100 -ag6 -(g10 -S"\xf6\xea'B\xdf\xdd\xf1?" -p4101 -tp4102 -Rp4103 -ag6 -(g10 -S'1\xebe`\xee\xd8\xf7?' -p4104 -tp4105 -Rp4106 -ag6 -(g10 -S'\xa9\xacG\x14\xda\xaa\xfe?' -p4107 -tp4108 -Rp4109 -ag6 -(g10 -S'\xa7ay\x1a\x96\xa7\xf1?' -p4110 -tp4111 -Rp4112 -ag6 -(g10 -S'f$C\xee\x05s\xf6?' -p4113 -tp4114 -Rp4115 -ag6 -(g10 -S'\x94\xea+\x99\x9fE\xf1?' -p4116 -tp4117 -Rp4118 -asS'Newton\nw Hessian ' -p4119 -(lp4120 -g6 -(g10 -S'\xa4\x06\xa3\x15\xd1\x90t?' -p4121 -tp4122 -Rp4123 -asg140 -(lp4124 -g6 -(g10 -S'\xc84\x85\xa5\x1b\x9b\x18@' -p4125 -tp4126 -Rp4127 -ag6 -(g10 -S'\xd0\xc4\xbd\xec\x08M\x17@' -p4128 -tp4129 -Rp4130 -ag6 -(g10 -S'\xf27_F\xfdd\x12@' -p4131 -tp4132 -Rp4133 -ag6 -(g10 -S'\x15\xe9\xb3\xbb1S\x19@' -p4134 -tp4135 -Rp4136 -ag6 -(g10 -S'\x16\xb2\x90\x85,d\x18@' -p4137 -tp4138 -Rp4139 -ag6 -(g10 -S'\xa0\x16Kic\xd4\x08@' -p4140 -tp4141 -Rp4142 -ag6 -(g10 -S'\x88\xc9\x15\xc4\xe4\n\x1a@' -p4143 -tp4144 -Rp4145 -ag6 -(g10 -S'Me\xd9Z{\x0c\x1a@' -p4146 -tp4147 -Rp4148 -ag6 -(g10 -S'\xde\xec\\\xaa\r\x99\x08@' -p4149 -tp4150 -Rp4151 -ag6 -(g10 -S' T\x0cOs\x01\n@' -p4152 -tp4153 -Rp4154 -ag6 -(g10 -S'\x12\xd0Q\xe6\x08\x16\x1b@' -p4155 -tp4156 -Rp4157 -ag6 -(g10 -S"\xa4'8z\x82\xa3\xeb?" -p4158 -tp4159 -Rp4160 -ag6 -(g10 -S'\x9a-\xb8\xea\x87\xa3\x19@' -p4161 -tp4162 -Rp4163 -ag6 -(g10 -S'7\xcbI\xd47\x07\x1a@' -p4164 -tp4165 -Rp4166 -ag6 -(g10 -S'\x84\x85Wg?\xc2\x18@' -p4167 -tp4168 -Rp4169 -ag6 -(g10 -S'\xde\xf6`\xe6)\xb0\x17@' -p4170 -tp4171 -Rp4172 -ag6 -(g10 -S'\x8e\xe7\x8f<\x84\x98\x12@' -p4173 -tp4174 -Rp4175 -ag6 -(g10 -S'y\x19\x9a\x97\xa1y\x19@' -p4176 -tp4177 -Rp4178 -ag6 -(g10 -S'1\xaa\xc8\x98\x08\xa2\x16@' -p4179 -tp4180 -Rp4181 -ag6 -(g10 -S'\xc7 5\x18\xad\x88\x16@' -p4182 -tp4183 -Rp4184 -asg202 -(lp4185 -g6 -(g10 -S'\x9a-\xf9\xfa\x9d\x08\xd8?' -p4186 -tp4187 -Rp4188 -ag6 -(g10 -S']\xfa\x8b\x16\xd0\xa5\xd7?' -p4189 -tp4190 -Rp4191 -ag6 -(g10 -S'>\r\xd3\x99\xe2\xa4\xe9?' -p4192 -tp4193 -Rp4194 -ag6 -(g10 -S'U\xe6/\xfb\x18\xaf\xd4?' -p4195 -tp4196 -Rp4197 -ag6 -(g10 -S'\x86,d!\x0bY\xd4?' -p4198 -tp4199 -Rp4200 -ag6 -(g10 -S'd\x13\xae5UH\xe5?' -p4201 -tp4202 -Rp4203 -ag6 -(g10 -S' Z\xff_\t[\xd0?' -p4204 -tp4205 -Rp4206 -ag6 -(g10 -S'\xb5\xd2\xf3\x06\xca\x8e\xd0?' -p4207 -tp4208 -Rp4209 -ag6 -(g10 -S'\x85\xa1\xaa\x10!?\xe6?' -p4210 -tp4211 -Rp4212 -ag6 -(g10 -S'\x19\x9e\xe6\x02|u\xf2?' -p4213 -tp4214 -Rp4215 -ag6 -(g10 -S'\x8c\x12\xea\xbe\x11C\xcb?' -p4216 -tp4217 -Rp4218 -ag6 -(g10 -S'\xf6qa\x1f\x17\xf6\xf4?' -p4219 -tp4220 -Rp4221 -ag6 -(g10 -S'\x1e\x84i\xd3\xdf%\xd1?' -p4222 -tp4223 -Rp4224 -ag6 -(g10 -S'\xe3\xbd\xa6\x820\xaa\xcf?' -p4225 -tp4226 -Rp4227 -ag6 -(g10 -S'\xf8G\x8eq\xb6w\xe0?' -p4228 -tp4229 -Rp4230 -ag6 -(g10 -S'K\x92M\xb8T\xf3\xd2?' -p4231 -tp4232 -Rp4233 -ag6 -(g10 -S'\x90A8\xe9\xcb\xac\xdc?' -p4234 -tp4235 -Rp4236 -ag6 -(g10 -S'\xe8`|\x0e\xc6\xe7\xd0?' -p4237 -tp4238 -Rp4239 -ag6 -(g10 -S'\x0b\xa4\xd9`\x8cl\xd5?' -p4240 -tp4241 -Rp4242 -ag6 -(g10 -S'xp\xd9\x1bzR\xdd?' -p4243 -tp4244 -Rp4245 -asg264 -(lp4246 -g6 -(g10 -S'\xca\xe5\x80Q\xb5O\xd2?' -p4247 -tp4248 -Rp4249 -ag6 -(g10 -S'y\x1f\x1d\x82\x8b\xf7\xd1?' -p4250 -tp4251 -Rp4252 -ag6 -(g10 -S'i\xf7\xcb\x06\xec\x9e\xd3?' -p4253 -tp4254 -Rp4255 -ag6 -(g10 -S'\x1a\xd0\x04\xe7P+\xcf?' -p4256 -tp4257 -Rp4258 -ag6 -(g10 -S'\xf9\x19%~F\x89\xcf?' -p4259 -tp4260 -Rp4261 -ag6 -(g10 -S'Y3\xa9Y\x1c7\xe0?' -p4262 -tp4263 -Rp4264 -ag6 -(g10 -S'\x0bl\xb8\xa4\xb3\xc8\xc8?' -p4265 -tp4266 -Rp4267 -ag6 -(g10 -S'>\x013\xa3\xe4\xe1\xc8?' -p4268 -tp4269 -Rp4270 -ag6 -(g10 -S'>\xf3=\x1c\tf\xe0?' -p4271 -tp4272 -Rp4273 -ag6 -(g10 -S'D\xe2\xc8\xcbG\xbd\xeb?' -p4274 -tp4275 -Rp4276 -ag6 -(g10 -S'\x00\xa4+\xde\xb0\x9b\xc8?' -p4277 -tp4278 -Rp4279 -ag6 -(g10 -S'\x97\xbfd\xf9K\x96\xcf?' -p4280 -tp4281 -Rp4282 -ag6 -(g10 -S'\x91\xfbfW6\xd7\xc9?' -p4283 -tp4284 -Rp4285 -ag6 -(g10 -S'sib\xc7\xb7\xad\xc8?' -p4286 -tp4287 -Rp4288 -ag6 -(g10 -S"\xa3\x0f\xc5\xb6'\xf4\xc8?" -p4289 -tp4290 -Rp4291 -ag6 -(g10 -S'[\xba\xd7\x18\x81\xe0\xcc?' -p4292 -tp4293 -Rp4294 -ag6 -(g10 -S'u|\x80\x11v\x9a\xf0?' -p4295 -tp4296 -Rp4297 -ag6 -(g10 -S'y\x19\x9a\x97\xa1y\xc9?' -p4298 -tp4299 -Rp4300 -ag6 -(g10 -S'b\rJ\x0fG\xa6\xd0?' -p4301 -tp4302 -Rp4303 -ag6 -(g10 -S'\x0f7\xfd&\xde\xd9\xd5?' -p4304 -tp4305 -Rp4306 -asS"L-BFGS \nw f'" -p4307 -(lp4308 -g6 -(g10 -S'\xca\xe5\x80Q\xb5O\x92?' -p4309 -tp4310 -Rp4311 -ag6 -(g10 -S'y\x1f\x1d\x82\x8b\xf7\x91?' -p4312 -tp4313 -Rp4314 -ag6 -(g10 -S'i\xf7\xcb\x06\xec\x9e\x93?' -p4315 -tp4316 -Rp4317 -ag6 -(g10 -S'\x1a\xd0\x04\xe7P+\x8f?' -p4318 -tp4319 -Rp4320 -ag6 -(g10 -S'\xf9\x19%~F\x89\x8f?' -p4321 -tp4322 -Rp4323 -ag6 -(g10 -S'Y3\xa9Y\x1c7\xa0?' -p4324 -tp4325 -Rp4326 -ag6 -(g10 -S'\x0bl\xb8\xa4\xb3\xc8\x88?' -p4327 -tp4328 -Rp4329 -ag6 -(g10 -S'>\x013\xa3\xe4\xe1\x88?' -p4330 -tp4331 -Rp4332 -ag6 -(g10 -S'>\xf3=\x1c\tf\xa0?' -p4333 -tp4334 -Rp4335 -ag6 -(g10 -S'D\xe2\xc8\xcbG\xbd\xab?' -p4336 -tp4337 -Rp4338 -ag6 -(g10 -S'\xe0\xeae5\x84r\x88?' -p4339 -tp4340 -Rp4341 -ag6 -(g10 -S'\x95RJ)\xa5\x94\x92?' -p4342 -tp4343 -Rp4344 -ag6 -(g10 -S'\xfep4\xba\x1a\xa9\xa5?' -p4345 -tp4346 -Rp4347 -ag6 -(g10 -S'sib\xc7\xb7\xad\x88?' -p4348 -tp4349 -Rp4350 -ag6 -(g10 -S"\xa3\x0f\xc5\xb6'\xf4\x88?" -p4351 -tp4352 -Rp4353 -ag6 -(g10 -S'[\xba\xd7\x18\x81\xe0\x8c?' -p4354 -tp4355 -Rp4356 -ag6 -(g10 -S'\x85\xb1\xf6\xb0\xe2\xe0\xaf?' -p4357 -tp4358 -Rp4359 -ag6 -(g10 -S'y\x19\x9a\x97\xa1y\x89?' -p4360 -tp4361 -Rp4362 -ag6 -(g10 -S'b\rJ\x0fG\xa6\x90?' -p4363 -tp4364 -Rp4365 -ag6 -(g10 -S'\x0f7\xfd&\xde\xd9\x95?' -p4366 -tp4367 -Rp4368 -asS"Conjugate gradient\nw f'" -p4369 -(lp4370 -g6 -(g10 -S'\x85\x1e\xd3\x14\x96n\xe4?' -p4371 -tp4372 -Rp4373 -ag6 -(g10 -S'nuF*\xe6V\xe1?' -p4374 -tp4375 -Rp4376 -ag6 -(g10 -S'\xe5\xbdmq\x8e\xb5\xe1?' -p4377 -tp4378 -Rp4379 -ag6 -(g10 -S'\xf5\xe4\xed\x9a\x0c]\xe2?' -p4380 -tp4381 -Rp4382 -ag6 -(g10 -S'\xbd\xe9Moz\xd3\xdf?' -p4383 -tp4384 -Rp4385 -ag6 -(g10 -S'\x9d\xf5\xbb{?\xee\xc0?' -p4386 -tp4387 -Rp4388 -ag6 -(g10 -S'\x98\xe8\xab\xb2\xd8\xa6\xe0?' -p4389 -tp4390 -Rp4391 -ag6 -(g10 -S'\x03\x8ak6{\xc3\xe0?' -p4392 -tp4393 -Rp4394 -ag6 -(g10 -S'\x17r\x97\x06}\xab\xf0?' -p4395 -tp4396 -Rp4397 -ag6 -(g10 -S'rC\x83+D\xe2\xc8?' -p4398 -tp4399 -Rp4400 -ag6 -(g10 -S'\xee2\xa6|\x1d\x96\xe0?' -p4401 -tp4402 -Rp4403 -ag6 -(g10 -S'\xdf{\xef\xbd\xf7\xde\xab?' -p4404 -tp4405 -Rp4406 -ag6 -(g10 -S'*X\xa8\xa3\xe4\xf7\xdf?' -p4407 -tp4408 -Rp4409 -ag6 -(g10 -S'<\xf0=\x00\x14 \xe1?' -p4410 -tp4411 -Rp4412 -ag6 -(g10 -S'\x93\xae\x81\x03\xa4\x1f\xe0?' -p4413 -tp4414 -Rp4415 -ag6 -(g10 -S'\xfc\xd2\xfb\xda\xaaR\xe0?' -p4416 -tp4417 -Rp4418 -ag6 -(g10 -S'\xd0Be=\xa2\xd0\xb6?' -p4419 -tp4420 -Rp4421 -ag6 -(g10 -S'az\x16\xa6ga\xe2?' -p4422 -tp4423 -Rp4424 -ag6 -(g10 -S'\xd9\x85\xe4\x98\xc6~\xe6?' -p4425 -tp4426 -Rp4427 -ag6 -(g10 -S'.{CO\xaa\xaf\xe4?' -p4428 -tp4429 -Rp4430 -asS"BFGS\nw f'" -p4431 -(lp4432 -g6 -(g10 -S'\x89r9`T\xed\xa3?' -p4433 -tp4434 -Rp4435 -ag6 -(g10 -S'\xd68\xa7\x1cc\x8d\xa3?' -p4436 -tp4437 -Rp4438 -ag6 -(g10 -S'\x1foV\xf8\x1eZ\xa5?' -p4439 -tp4440 -Rp4441 -ag6 -(g10 -S'\xa5\xda\xfe\xc8\xaf\xf5\xa0?' -p4442 -tp4443 -Rp4444 -ag6 -(g10 -S'\x13?\xa3\xc4\xcf(\xa1?' -p4445 -tp4446 -Rp4447 -ag6 -(g10 -S'\xe1\xb7\xce\x9db\xa5\xb1?' -p4448 -tp4449 -Rp4450 -ag6 -(g10 -S'X9PB\x87\xf8\x9a?' -p4451 -tp4452 -Rp4453 -ag6 -(g10 -S'\xd2\xf9/H\xf1\x13\x9b?' -p4454 -tp4455 -Rp4456 -ag6 -(g10 -S'\xc4\x97pSs\xd8\xb1?' -p4457 -tp4458 -Rp4459 -ag6 -(g10 -S'\xba\x00_\x9d\x87e\xb4?' -p4460 -tp4461 -Rp4462 -ag6 -(g10 -S'a\x06O\x92\xd1\xcd\x97?' -p4463 -tp4464 -Rp4465 -ag6 -(g10 -S'\x95RJ)\xa5\x94\x92?' -p4466 -tp4467 -Rp4468 -ag6 -(g10 -S'\xe2/\x0eP\xe8\x1e\x9c?' -p4469 -tp4470 -Rp4471 -ag6 -(g10 -S'\xfd\xae\x81\xe0)\xdb\x9a?' -p4472 -tp4473 -Rp4474 -ag6 -(g10 -S"\x8c\x18|\xdd\xd0'\x9b?" -p4475 -tp4476 -Rp4477 -ag6 -(g10 -S'\x81\xff&\xb9\xc8l\x9f?' -p4478 -tp4479 -Rp4480 -ag6 -(g10 -S'\xe8\x9eXv\xa4 \xa7?' -p4481 -tp4482 -Rp4483 -ag6 -(g10 -S'\xb9\x1b\x91\xbb\x11\xb9\x9b?' -p4484 -tp4485 -Rp4486 -ag6 -(g10 -S'\xbe;vc\\\x1e\xa2?' -p4487 -tp4488 -Rp4489 -ag6 -(g10 -S'\xae\x7f\x04\xc1q\xc7\xa7?' -p4490 -tp4491 -Rp4492 -assg2006 -(dp4493 -g4 -(lp4494 -g6 -(g10 -S'\xb2\xe4\xcdG\tL\xcc?' -p4495 -tp4496 -Rp4497 -ag6 -(g10 -S'\xd2\xcd\xe8\x9e\x94\x83\xd2?' -p4498 -tp4499 -Rp4500 -ag6 -(g10 -S'\xfe9\x08\xce\x92\xdf\xd1?' -p4501 -tp4502 -Rp4503 -ag6 -(g10 -S'=\xaf\xdc.lQ\xd3?' -p4504 -tp4505 -Rp4506 -ag6 -(g10 -S'\x05[?\x9a\xc9\x90\xd9?' -p4507 -tp4508 -Rp4509 -ag6 -(g10 -S'\x939\x0c\xf9\xceq\xcf?' -p4510 -tp4511 -Rp4512 -ag6 -(g10 -S'-\x87\xfa\x98\xe2"\xd7?' -p4513 -tp4514 -Rp4515 -ag6 -(g10 -S'\xe3/\xcf\xd0\xf8\x97\xda?' -p4516 -tp4517 -Rp4518 -ag6 -(g10 -S'G5\x88\xe8\xd3,\xd4?' -p4519 -tp4520 -Rp4521 -ag6 -(g10 -S'\x1b@N.\x98%\xd3?' -p4522 -tp4523 -Rp4524 -ag6 -(g10 -S'v\xbf\x14\x0cX\x19\xda?' -p4525 -tp4526 -Rp4527 -ag6 -(g10 -S'A\x16\x1d+\x9c\x95\xe7?' -p4528 -tp4529 -Rp4530 -ag6 -(g10 -S'\xf9\xb4\xc0\x87M\xdc\xd2?' -p4531 -tp4532 -Rp4533 -ag6 -(g10 -S'\xdfXF\xa8\x15\xf6\xe3?' -p4534 -tp4535 -Rp4536 -ag6 -(g10 -S"4'\xd3$A\xe0\xe2?" -p4537 -tp4538 -Rp4539 -ag6 -(g10 -S'O\x16\x03\x8a\xe3\x85\xd4?' -p4540 -tp4541 -Rp4542 -ag6 -(g10 -S'\x0fW\x14\xfa=\xe7\xd1?' -p4543 -tp4544 -Rp4545 -ag6 -(g10 -S'\xd7\xec\x1c\xa3.\xfb\xe0?' -p4546 -tp4547 -Rp4548 -ag6 -(g10 -S'C\x9b)D\x11\xad\xd1?' -p4549 -tp4550 -Rp4551 -ag6 -(g10 -S'\x14\x1e\xcb\x02r\xc6\xe1?' -p4552 -tp4553 -Rp4554 -asg73 -(lp4555 -g6 -(g10 -S'\x98\xe5K\xe2\xe2\x9b\xcd?' -p4556 -tp4557 -Rp4558 -ag6 -(g10 -S'y\x987\xe7Q\xe9\xd7?' -p4559 -tp4560 -Rp4561 -ag6 -(g10 -S'\xc3|\x0c\xfd_\xb4\xdb?' -p4562 -tp4563 -Rp4564 -ag6 -(g10 -S'\xbb-\x7f\x0e<\xa0\xd7?' -p4565 -tp4566 -Rp4567 -ag6 -(g10 -S'\xff\x9dod?\xd4\xd1?' -p4568 -tp4569 -Rp4570 -ag6 -(g10 -S'\x9f\xb4?\xa2\x9f\x02\xd3?' -p4571 -tp4572 -Rp4573 -ag6 -(g10 -S'\xdf<\xd1<\xd2\xc3\xdc?' -p4574 -tp4575 -Rp4576 -ag6 -(g10 -S'\x10w\xca\xd7{)\xe0?' -p4577 -tp4578 -Rp4579 -ag6 -(g10 -S'@\xca\xeb\xdf\x02t\xd3?' -p4580 -tp4581 -Rp4582 -ag6 -(g10 -S']B^\xf3\xa4\x17\xcc?' -p4583 -tp4584 -Rp4585 -ag6 -(g10 -S'ag\xd6\xb5\x85L\xe1?' -p4586 -tp4587 -Rp4588 -ag6 -(g10 -S'\xe5\x90\xde\x8a\xbf\xbb\xde?' -p4589 -tp4590 -Rp4591 -ag6 -(g10 -S'\x04h\xc7\x18@\xe9\xd7?' -p4592 -tp4593 -Rp4594 -ag6 -(g10 -S'\x9b*d:\xd5{\xf1?' -p4595 -tp4596 -Rp4597 -ag6 -(g10 -S'j\xf71E\xc3H\xde?' -p4598 -tp4599 -Rp4600 -ag6 -(g10 -S'\xd4_d?\x9e\xcc\xe1?' -p4601 -tp4602 -Rp4603 -ag6 -(g10 -S'e\xef\xe2hj\x8d\xdb?' -p4604 -tp4605 -Rp4606 -ag6 -(g10 -S'\xe5\xd1\xfdR\r\x02\xd1?' -p4607 -tp4608 -Rp4609 -ag6 -(g10 -S'\xec!\x13H0\x80\xda?' -p4610 -tp4611 -Rp4612 -ag6 -(g10 -S'8\xc7\x02S\x10C\xd8?' -p4613 -tp4614 -Rp4615 -asS'Newton\nw Hessian ' -p4616 -(lp4617 -g6 -(g10 -S'\x1f\x95\xa1kg\x83 ?' -p4618 -tp4619 -Rp4620 -asg140 -(lp4621 -g6 -(g10 -S'\xb4\xb12q\xf8r\x1e@' -p4622 -tp4623 -Rp4624 -ag6 -(g10 -S'\x9bPH\x89\x82{\x1a@' -p4625 -tp4626 -Rp4627 -ag6 -(g10 -S'\xc0\x84\xb5i\xcbt\x1c@' -p4628 -tp4629 -Rp4630 -ag6 -(g10 -S'\x06\xc74\x07n\xca\x1b@' -p4631 -tp4632 -Rp4633 -ag6 -(g10 -S'\x9a\x94\x02R3\x0e\x1c@' -p4634 -tp4635 -Rp4636 -ag6 -(g10 -S'\x95\xcfF\x9f&\xb9\x1d@' -p4637 -tp4638 -Rp4639 -ag6 -(g10 -S'\xb2[\xa8I^\xab\x19@' -p4640 -tp4641 -Rp4642 -ag6 -(g10 -S'\xdc\xc9(\x80X\x1d\x1a@' -p4643 -tp4644 -Rp4645 -ag6 -(g10 -S'\xeb:?m\xea\x8e\x1c@' -p4646 -tp4647 -Rp4648 -ag6 -(g10 -S'\xcde\xa1\xbf-\x06\x1d@' -p4649 -tp4650 -Rp4651 -ag6 -(g10 -S'\xc3Ch\xa4(o\x19@' -p4652 -tp4653 -Rp4654 -ag6 -(g10 -S'K\x8c\xd2\xce\xb5\xc4\x18@' -p4655 -tp4656 -Rp4657 -ag6 -(g10 -S'Vw\xf9\x04\x13\x8f\x1b@' -p4658 -tp4659 -Rp4660 -ag6 -(g10 -S'\x0f\xbcDS\xd4\xae\x15@' -p4661 -tp4662 -Rp4663 -ag6 -(g10 -S'ds?D\xfe\xcd\x18@' -p4664 -tp4665 -Rp4666 -ag6 -(g10 -S'\xae\x11\xb4\xb4\x1a\x93\x1b@' -p4667 -tp4668 -Rp4669 -ag6 -(g10 -S'\x8e\x87\xe770\xe0\x1b@' -p4670 -tp4671 -Rp4672 -ag6 -(g10 -S'C`\x17;{x\x1c@' -p4673 -tp4674 -Rp4675 -ag6 -(g10 -S'\xff\x90\xcf0\x04.\x1b@' -p4676 -tp4677 -Rp4678 -ag6 -(g10 -S'\x95\xcd\xe4\xa4\xfb]\x19@' -p4679 -tp4680 -Rp4681 -asg202 -(lp4682 -g6 -(g10 -S'\x19\xba\x9b\xed\xb2\xab\xa9?' -p4683 -tp4684 -Rp4685 -ag6 -(g10 -S'\x90\xa5\x0c\x07\x18\xd0\xab?' -p4686 -tp4687 -Rp4688 -ag6 -(g10 -S'\x05\x8bi\x8a\xd0\xda\xaf?' -p4689 -tp4690 -Rp4691 -ag6 -(g10 -S'v1!\x05v]\xb5?' -p4692 -tp4693 -Rp4694 -ag6 -(g10 -S'\x90\xb02\xbeT\xe3\xa5?' -p4695 -tp4696 -Rp4697 -ag6 -(g10 -S'\xdc\x06_\x04l\xf9\xa6?' -p4698 -tp4699 -Rp4700 -ag6 -(g10 -S'A\xb8@#\x14\xfa\xb1?' -p4701 -tp4702 -Rp4703 -ag6 -(g10 -S'6\x8c8\xe0\xf4\x7f\xb3?' -p4704 -tp4705 -Rp4706 -ag6 -(g10 -S'\xdb\xc3\x8f\x97\xe5\xa8\xb1?' -p4707 -tp4708 -Rp4709 -ag6 -(g10 -S'\x0f\xfet%\x1b\x96\xa6?' -p4710 -tp4711 -Rp4712 -ag6 -(g10 -S'=2S\xd1^\xce\xb5?' -p4713 -tp4714 -Rp4715 -ag6 -(g10 -S'W(\xa1^{\x85\xb2?' -p4716 -tp4717 -Rp4718 -ag6 -(g10 -S'g\xec\xe30~\xcc\xad?' -p4719 -tp4720 -Rp4721 -ag6 -(g10 -S'G\xee;\\JI\xcc?' -p4722 -tp4723 -Rp4724 -ag6 -(g10 -S"'O}c\x18~\xc0?" -p4725 -tp4726 -Rp4727 -ag6 -(g10 -S'\xaf6\xf2#\x94\xa6\xbe?' -p4728 -tp4729 -Rp4730 -ag6 -(g10 -S'k\x97\x7f\xa4\x11\xd9\xb5?' -p4731 -tp4732 -Rp4733 -ag6 -(g10 -S'\xb0a\x1a\x9f\x8af\xa5?' -p4734 -tp4735 -Rp4736 -ag6 -(g10 -S'\xf3 \xe4\x94\x8b\xfd\xb6?' -p4737 -tp4738 -Rp4739 -ag6 -(g10 -S'\xdf\xb1\x85\x83L\xa4\xb3?' -p4740 -tp4741 -Rp4742 -asg264 -(lp4743 -g6 -(g10 -S'\xdbx\x93\xfej\xc3\xe3?' -p4744 -tp4745 -Rp4746 -ag6 -(g10 -S'\xc4p\xe9#-\x1c\xec?' -p4747 -tp4748 -Rp4749 -ag6 -(g10 -S'G\xc3\x03l\xe0\xcf\xe6?' -p4750 -tp4751 -Rp4752 -ag6 -(g10 -S'\xdb\x06KF"\xfa\xec?' -p4753 -tp4754 -Rp4755 -ag6 -(g10 -S'\x8aA\xe4\xb5Q\xed\xe8?' -p4756 -tp4757 -Rp4758 -ag6 -(g10 -S'@\xd7/a\x98\x94\xe6?' -p4759 -tp4760 -Rp4761 -ag6 -(g10 -S'\x82e\xb4\x928\xf8\xf1?' -p4762 -tp4763 -Rp4764 -ag6 -(g10 -S'np\xab\xfaW\x87\xf0?' -p4765 -tp4766 -Rp4767 -ag6 -(g10 -S'\xed\xb4\xcb\ta\xef\xe7?' -p4768 -tp4769 -Rp4770 -ag6 -(g10 -S'\xb1O\xa4c\xd1q\xe6?' -p4771 -tp4772 -Rp4773 -ag6 -(g10 -S'J\x14\xb1\xf5\x06b\xf1?' -p4774 -tp4775 -Rp4776 -ag6 -(g10 -S'y\x13\x08p\xd7\x1e\xee?' -p4777 -tp4778 -Rp4779 -ag6 -(g10 -S'U\xff\xfd\xf9\xed\xc9\xed?' -p4780 -tp4781 -Rp4782 -ag6 -(g10 -S'\xe1Si7\xbaC\xf2?' -p4783 -tp4784 -Rp4785 -ag6 -(g10 -S'\xfb\x83\\\xba\xca\xae\xf0?' -p4786 -tp4787 -Rp4788 -ag6 -(g10 -S'f\xaf+\n\xfd#\xe6?' -p4789 -tp4790 -Rp4791 -ag6 -(g10 -S'y\xf85i)\xc6\xe8?' -p4792 -tp4793 -Rp4794 -ag6 -(g10 -S'>>\xb3\x0eR}\xe5?' -p4795 -tp4796 -Rp4797 -ag6 -(g10 -S'\xbbC\xb3gJ\xe4\xea?' -p4798 -tp4799 -Rp4800 -ag6 -(g10 -S'\xb0\xa6\xb8\xc6\x1d\xec\xf1?' -p4801 -tp4802 -Rp4803 -asS"L-BFGS \nw f'" -p4804 -(lp4805 -g6 -(g10 -S'O\xcc\x02#\xd4\xe8\xa2?' -p4806 -tp4807 -Rp4808 -ag6 -(g10 -S'\x96xw(\xf8\xda\xad?' -p4809 -tp4810 -Rp4811 -ag6 -(g10 -S'\xba\x8ej\x89J\x01\xa6?' -p4812 -tp4813 -Rp4814 -ag6 -(g10 -S'@\xd5\x9c-\x80\xfe\xa7?' -p4815 -tp4816 -Rp4817 -ag6 -(g10 -S'\xc3\x0e\x0b\xd0\xf12\xa9?' -p4818 -tp4819 -Rp4820 -ag6 -(g10 -S'\x84\x07\x1d\xc9\x9bO\xa6?' -p4821 -tp4822 -Rp4823 -ag6 -(g10 -S'\x83\xdeE\x90[\x96\xad?' -p4824 -tp4825 -Rp4826 -ag6 -(g10 -S'8L\xa5\xbe\x8f\x86\xaf?' -p4827 -tp4828 -Rp4829 -ag6 -(g10 -S'\x948\x0e\x99\xa4\xee\xa6?' -p4830 -tp4831 -Rp4832 -ag6 -(g10 -S'${\xc7\x07\xc3\xeb\xa5?' -p4833 -tp4834 -Rp4835 -ag6 -(g10 -S'\x83M\x11\x80+\xc3\xb1?' -p4836 -tp4837 -Rp4838 -ag6 -(g10 -S'%\xd6\x06\x1b\xb3\xcd\xaa?' -p4839 -tp4840 -Rp4841 -ag6 -(g10 -S'{\xbe\x9d\xe9\x14x\xac?' -p4842 -tp4843 -Rp4844 -ag6 -(g10 -S'2\n\xe2r\xbeN\xb1?' -p4845 -tp4846 -Rp4847 -ag6 -(g10 -S'\xa4\n\xb0\xca\x18Z\xb2?' -p4848 -tp4849 -Rp4850 -ag6 -(g10 -S'\xb3\xc6\x01\x84\xb7\xab\xa5?' -p4851 -tp4852 -Rp4853 -ag6 -(g10 -S'\xe4\xadptH_\xa7?' -p4854 -tp4855 -Rp4856 -ag6 -(g10 -S'\xe9\xf2\xad$@+\xa4?' -p4857 -tp4858 -Rp4859 -ag6 -(g10 -S'l\x1e\xef\xb3\xf8:\xa7?' -p4860 -tp4861 -Rp4862 -ag6 -(g10 -S'\xad\xc6\xefLL\xf2\xad?' -p4863 -tp4864 -Rp4865 -asS"Conjugate gradient\nw f'" -p4866 -(lp4867 -g6 -(g10 -S')Ian\x9f)\xcc?' -p4868 -tp4869 -Rp4870 -ag6 -(g10 -S'\xc4x\x96\x7f.\xb6\xe6?' -p4871 -tp4872 -Rp4873 -ag6 -(g10 -S'\x95M\xfe\x83T\xb6\xd5?' -p4874 -tp4875 -Rp4876 -ag6 -(g10 -S'\x065\xdf9\x15\x1c\xd5?' -p4877 -tp4878 -Rp4879 -ag6 -(g10 -S'\xdc4a\x80\r&\xdb?' -p4880 -tp4881 -Rp4882 -ag6 -(g10 -S'\xae\xe6.\xd2\xd2$\xcc?' -p4883 -tp4884 -Rp4885 -ag6 -(g10 -S'\x00\x95,)\x02\x0b\xe0?' -p4886 -tp4887 -Rp4888 -ag6 -(g10 -S'c\xa1Yb\xd9\xf0\xd6?' -p4889 -tp4890 -Rp4891 -ag6 -(g10 -S'\xb1,\xcd\xb2#d\xd7?' -p4892 -tp4893 -Rp4894 -ag6 -(g10 -S'\xe2\x0f\xbdR\x02R\xdb?' -p4895 -tp4896 -Rp4897 -ag6 -(g10 -S'\x88\x058Ez\x82\xdb?' -p4898 -tp4899 -Rp4900 -ag6 -(g10 -S"$\x97=\xdd\xf0'\xe0?" -p4901 -tp4902 -Rp4903 -ag6 -(g10 -S'\xcf\xc0%\x17\xe1g\xd8?' -p4904 -tp4905 -Rp4906 -ag6 -(g10 -S'\x11\x935ZFI\xd9?' -p4907 -tp4908 -Rp4909 -ag6 -(g10 -S'\x9b\x7f\xc9\xcf7;\xde?' -p4910 -tp4911 -Rp4912 -ag6 -(g10 -S'N\xc5\x87\xe1B\xcf\xd6?' -p4913 -tp4914 -Rp4915 -ag6 -(g10 -S'W\x1a\x7f=\xa2\x8b\xd9?' -p4916 -tp4917 -Rp4918 -ag6 -(g10 -S'\x97\xd6\xf8R\xca\x8b\xd4?' -p4919 -tp4920 -Rp4921 -ag6 -(g10 -S'\xe1\x90;\x06\x9c\xbf\xe0?' -p4922 -tp4923 -Rp4924 -ag6 -(g10 -S'eO\x14\x12\xec\xf4\xdc?' -p4925 -tp4926 -Rp4927 -asS"BFGS\nw f'" -p4928 -(lp4929 -g6 -(g10 -S'\xbd\xdc\x08\xde\xbe\xa0\x85?' -p4930 -tp4931 -Rp4932 -ag6 -(g10 -S'\xaa\xca\x19\t\x97\xd9\x8f?' -p4933 -tp4934 -Rp4935 -ag6 -(g10 -S')\x92T!\x17\xb6\x90?' -p4936 -tp4937 -Rp4938 -ag6 -(g10 -S'>o\xa9/!\xbd\x8f?' -p4939 -tp4940 -Rp4941 -ag6 -(g10 -S'\x0e\xb3\xae\x06\x95X\x8a?' -p4942 -tp4943 -Rp4944 -ag6 -(g10 -S'h\x84\x96\xe28v\x89?' -p4945 -tp4946 -Rp4947 -ag6 -(g10 -S'\xf0\x88\xa6\x19V\x8b\x93?' -p4948 -tp4949 -Rp4950 -ag6 -(g10 -S'\x15!\x82\xe4p\xdb\x95?' -p4951 -tp4952 -Rp4953 -ag6 -(g10 -S'\x9f\xbe\xad\x02O|\x8c?' -p4954 -tp4955 -Rp4956 -ag6 -(g10 -S'\x07\xdc\x86\x99\xac\x94\x84?' -p4957 -tp4958 -Rp4959 -ag6 -(g10 -S')a4\x92#\x92\x96?' -p4960 -tp4961 -Rp4962 -ag6 -(g10 -S's\x84\xc9@$&\x94?' -p4963 -tp4964 -Rp4965 -ag6 -(g10 -S'\xfe~h\x8bZ.\x90?' -p4966 -tp4967 -Rp4968 -ag6 -(g10 -S'\x08\xac\xe5*\x9c\x05\xa3?' -p4969 -tp4970 -Rp4971 -ag6 -(g10 -S'\xf6qrz\xc9\x84\x94?' -p4972 -tp4973 -Rp4974 -ag6 -(g10 -S'4\xce\xb8\x14\x04]\x93?' -p4975 -tp4976 -Rp4977 -ag6 -(g10 -S'H3j\xaaB\xbd\x90?' -p4978 -tp4979 -Rp4980 -ag6 -(g10 -S'\n\xe1\x0f\x90\xf5\xb8\x88?' -p4981 -tp4982 -Rp4983 -ag6 -(g10 -S'\xb5\xe46\\b\x1a\x90?' -p4984 -tp4985 -Rp4986 -ag6 -(g10 -S'\xd5\xfaeK\x11\x18\x90?' -p4987 -tp4988 -Rp4989 -asssI2 -(dp4990 -g2 -(dp4991 -g4 -(lp4992 -g6 -(g10 -S'\xba3\x07\xa3\x81v\xed?' -p4993 -tp4994 -Rp4995 -ag6 -(g10 -S'p\x81\x0b\\\xe0\x02\xe7?' -p4996 -tp4997 -Rp4998 -ag6 -(g10 -S"\x88\xae\x00\xe2'%\xed?" -p4999 -tp5000 -Rp5001 -ag6 -(g10 -S'7\x01\xa5\xa8\x97\x91\xe8?' -p5002 -tp5003 -Rp5004 -ag6 -(g10 -S'p\xd1T\r\x87y\xb7?' -p5005 -tp5006 -Rp5007 -ag6 -(g10 -S'9J\x06zrF\xf0?' -p5008 -tp5009 -Rp5010 -ag6 -(g10 -S'\x81\x1e\xac\xa6u\xbc\xe9?' -p5011 -tp5012 -Rp5013 -ag6 -(g10 -S'K\xd4\xaeD\xedJ\xf4?' -p5014 -tp5015 -Rp5016 -ag6 -(g10 -S'\xeb\x83\x88]\xc2\x8b\xe9?' -p5017 -tp5018 -Rp5019 -ag6 -(g10 -S'\xe6\xe9\xa3\xd5$D\xf1?' -p5020 -tp5021 -Rp5022 -ag6 -(g10 -S'\x8f^\x19\xdb\xef\xe8\xf5?' -p5023 -tp5024 -Rp5025 -ag6 -(g10 -S'\xd2\xe6}\x8aK\x86\xf0?' -p5026 -tp5027 -Rp5028 -ag6 -(g10 -S'w\x8b\xfc\xe4\x89\x07\xe8?' -p5029 -tp5030 -Rp5031 -ag6 -(g10 -S'\x07\xbc\xb0g\xf2\xbc\xe4?' -p5032 -tp5033 -Rp5034 -ag6 -(g10 -S'\xfeT\x94\xaaI\xd8\xa2?' -p5035 -tp5036 -Rp5037 -ag6 -(g10 -S'\xa5+\x8c\xa9\x16\xf5\xec?' -p5038 -tp5039 -Rp5040 -ag6 -(g10 -S'\xae &W\x10\x93\xeb?' -p5041 -tp5042 -Rp5043 -ag6 -(g10 -S'\xcb=\x8d\xb0\xdc\xd3\xe8?' -p5044 -tp5045 -Rp5046 -ag6 -(g10 -S'\x80\xe0]\x10\xa7\x9f\xe7?' -p5047 -tp5048 -Rp5049 -ag6 -(g10 -S'\x06\xd43\x95\xeb\x8e\xe7?' -p5050 -tp5051 -Rp5052 -asg73 -(lp5053 -g6 -(g10 -S'T\x80\xe4\x05\x11j\xf2?' -p5054 -tp5055 -Rp5056 -ag6 -(g10 -S'7\xb5\xa9Mmj\xf3?' -p5057 -tp5058 -Rp5059 -ag6 -(g10 -S'\xf7\x94e\x8a6\xa0\xf6?' -p5060 -tp5061 -Rp5062 -ag6 -(g10 -S'\x9a\xdb\xa9<:\xf0\xf2?' -p5063 -tp5064 -Rp5065 -ag6 -(g10 -S"C\xfe\xcc':\xff\xb3?" -p5066 -tp5067 -Rp5068 -ag6 -(g10 -S'\xeb-\xe1v5\r\xf5?' -p5069 -tp5070 -Rp5071 -ag6 -(g10 -S'\xb3\x14\x87\x8c\xbdv\xf1?' -p5072 -tp5073 -Rp5074 -ag6 -(g10 -S'\xdb\x95\xa8]\x89\xda\xf5?' -p5075 -tp5076 -Rp5077 -ag6 -(g10 -S'\x99\x18G\xa3\xccI\xf5?' -p5078 -tp5079 -Rp5080 -ag6 -(g10 -S'\xb4\x9a\x84(\xfe"\xf7?' -p5081 -tp5082 -Rp5083 -ag6 -(g10 -S'\xf7;zel\xbf\xf3?' -p5084 -tp5085 -Rp5086 -ag6 -(g10 -S'\xc1:\xda\xbcOq\xf5?' -p5087 -tp5088 -Rp5089 -ag6 -(g10 -S'A\xd0\xe7B\xc54\xf8?' -p5090 -tp5091 -Rp5092 -ag6 -(g10 -S'\xecg\x8b\x95\xe1\x1b\xf1?' -p5093 -tp5094 -Rp5095 -ag6 -(g10 -S'\xfc\xf6\xec\xdf\x9b\x0f\xb0?' -p5096 -tp5097 -Rp5098 -ag6 -(g10 -S'Fl\xaf_\xee$\xf4?' -p5099 -tp5100 -Rp5101 -ag6 -(g10 -S'\x051\xb9\x82\x98\\\xf7?' -p5102 -tp5103 -Rp5104 -ag6 -(g10 -S'\xb9\xa7\x11\x96{\x9a\xf9?' -p5105 -tp5106 -Rp5107 -ag6 -(g10 -S'\xd4\xa5\xf3G\xe3\xa0\xf3?' -p5108 -tp5109 -Rp5110 -ag6 -(g10 -S'\xa5}\x90\x0c\xa8g\xf2?' -p5111 -tp5112 -Rp5113 -asS'Newton\nw Hessian ' -p5114 -(lp5115 -g6 -(g10 -S'\x9e\xa9\\w\xbc\xd8\xa2?' -p5116 -tp5117 -Rp5118 -asg140 -(lp5119 -g6 -(g10 -S'}.\xbfDq\xd5\xf6?' -p5120 -tp5121 -Rp5122 -ag6 -(g10 -S'\x90~\xf4\xa3\x1f\xfd\xf8?' -p5123 -tp5124 -Rp5125 -ag6 -(g10 -S'\xdd\xf9\r\x99\xb1y\xf5?' -p5126 -tp5127 -Rp5128 -ag6 -(g10 -S'\xfck]\xa1\xb9\x9d\xfa?' -p5129 -tp5130 -Rp5131 -ag6 -(g10 -S"j'\x84$/\xbc\xba?" -p5132 -tp5133 -Rp5134 -ag6 -(g10 -S'\xc3\xc6\xdc\x87\x18\x0f\xf8?' -p5135 -tp5136 -Rp5137 -ag6 -(g10 -S'I\xf7\x17>\x95\xa5\xfc?' -p5138 -tp5139 -Rp5140 -ag6 -(g10 -S'\xc8\xe0|\x0c\xce\xc7\xf8?' -p5141 -tp5142 -Rp5143 -ag6 -(g10 -S'ns\xd7\x11JZ\xf6?' -p5144 -tp5145 -Rp5146 -ag6 -(g10 -S'\x0b.\x95\xed]\x07\xf4?' -p5147 -tp5148 -Rp5149 -ag6 -(g10 -S"'\x81\xb8Ps\x12\xf8?" -p5150 -tp5151 -Rp5152 -ag6 -(g10 -S'\xf8).\x19\x82u\xf4?' -p5153 -tp5154 -Rp5155 -ag6 -(g10 -S'\xdb\xcb98C\xf6\xf0?' -p5156 -tp5157 -Rp5158 -ag6 -(g10 -S'\xb5[\xde\xbdx,\x01@' -p5159 -tp5160 -Rp5161 -ag6 -(g10 -S'\xd98=\xb5\tX\xc4?' -p5162 -tp5163 -Rp5164 -ag6 -(g10 -S'\x0bc\xaaE=g\xf5?' -p5165 -tp5166 -Rp5167 -ag6 -(g10 -S'\x83\x98\\AL\xae\xf4?' -p5168 -tp5169 -Rp5170 -ag6 -(g10 -S'\xd4\x08\xcb=\x8d0\xf6?' -p5171 -tp5172 -Rp5173 -ag6 -(g10 -S'\xe1\x1e\xcc\xc7\xed\xd6\xf8?' -p5174 -tp5175 -Rp5176 -ag6 -(g10 -S'\xe8\xa4}\x90\x0c\xa8\xff?' -p5177 -tp5178 -Rp5179 -asg202 -(lp5180 -g6 -(g10 -S'sg\x0eF\x03\xed\x06@' -p5181 -tp5182 -Rp5183 -ag6 -(g10 -S'_\xf7\xba\xd7\xbd\xee\x05@' -p5184 -tp5185 -Rp5186 -ag6 -(g10 -S'\xbf!36\xaf\xe2\t@' -p5187 -tp5188 -Rp5189 -ag6 -(g10 -S'\x8a\xd5p\xf1C\x18\x06@' -p5190 -tp5191 -Rp5192 -ag6 -(g10 -S'\x7fA\x81\xcf\xc6\x07!@' -p5193 -tp5194 -Rp5195 -ag6 -(g10 -S'4\x82w\x0e\x7f:\x05@' -p5196 -tp5197 -Rp5198 -ag6 -(g10 -S'\xaa,\xc5!c\xaf\x03@' -p5199 -tp5200 -Rp5201 -ag6 -(g10 -S'E\xedJ\xd4\xaeD\x01@' -p5202 -tp5203 -Rp5204 -ag6 -(g10 -S'tf\xe7\xb8\\3\x06@' -p5205 -tp5206 -Rp5207 -ag6 -(g10 -S'\xea\xa3\xd5$D\xf1\x03@' -p5208 -tp5209 -Rp5210 -ag6 -(g10 -S"s\x12\x88\x0b5'\x01@" -p5211 -tp5212 -Rp5213 -ag6 -(g10 -S'\xa8\xb8d\x08\xd6\xd1\x06@' -p5214 -tp5215 -Rp5216 -ag6 -(g10 -S'+9\xd6\x8a\x9a,\x06@' -p5217 -tp5218 -Rp5219 -ag6 -(g10 -S'\x86\xa4\xba\x1aT%\x02@' -p5220 -tp5221 -Rp5222 -ag6 -(g10 -S'6\x97+hj8!@' -p5223 -tp5224 -Rp5225 -ag6 -(g10 -S'Oq\xc9\x10\xac\xa3\x05@' -p5226 -tp5227 -Rp5228 -ag6 -(g10 -S'\\AL\xae \xa6\x07@' -p5229 -tp5230 -Rp5231 -ag6 -(g10 -S'\x11\x96{\x1aa\xb9\x05@' -p5232 -tp5233 -Rp5234 -ag6 -(g10 -S'c\xc0\xe5\xf8\xe2\xb5\x07@' -p5235 -tp5236 -Rp5237 -ag6 -(g10 -S'\xd2>H\x06\xd43\x05@' -p5238 -tp5239 -Rp5240 -asg264 -(lp5241 -g6 -(g10 -S'\xba3\x07\xa3\x81v\xed?' -p5242 -tp5243 -Rp5244 -ag6 -(g10 -S'\xfa\xd1\x8f~\xf4\xa3\xef?' -p5245 -tp5246 -Rp5247 -ag6 -(g10 -S'\xbb\xd5CW\x00\xf1\xe3?' -p5248 -tp5249 -Rp5250 -ag6 -(g10 -S'=:\xf0\x9eoL\xeb?' -p5251 -tp5252 -Rp5253 -ag6 -(g10 -S'\xe1\x1f\xa7{\x80c\xb1?' -p5254 -tp5255 -Rp5256 -ag6 -(g10 -S'\xc3\xc6\xdc\x87\x18\x0f\xe8?' -p5257 -tp5258 -Rp5259 -ag6 -(g10 -S'\xfb\xceF}g\xa3\xee?' -p5260 -tp5261 -Rp5262 -ag6 -(g10 -S'jW\xa2v%j\xe7?' -p5263 -tp5264 -Rp5265 -ag6 -(g10 -S'\xf2\x8a\nC\xd8\xa0\xef?' -p5266 -tp5267 -Rp5268 -ag6 -(g10 -S'\xa7\x8fV\x93\x10\xc5\xef?' -p5269 -tp5270 -Rp5271 -ag6 -(g10 -S'C\xcdI .\xd4\xec?' -p5272 -tp5273 -Rp5274 -ag6 -(g10 -S'\xce\xfb\x14\x97\x0c\xc1\xea?' -p5275 -tp5276 -Rp5277 -ag6 -(g10 -S'\xa9+\x9b\x8e\xe6~\xf4?' -p5278 -tp5279 -Rp5280 -ag6 -(g10 -S'\xd6\xa0*\x91\x86\x08\xed?' -p5281 -tp5282 -Rp5283 -ag6 -(g10 -S'\x15\xf0:UnE\xa6?' -p5284 -tp5285 -Rp5286 -ag6 -(g10 -S'i"\x87\x8fe7\xee?' -p5287 -tp5288 -Rp5289 -ag6 -(g10 -S'\x88\xc9\x15\xc4\xe4\n\xea?' -p5290 -tp5291 -Rp5292 -ag6 -(g10 -S'\x8d\xb0\xdc\xd3\x08\xcb\xed?' -p5293 -tp5294 -Rp5295 -ag6 -(g10 -S'\xcc\xdcY!$g\xea?' -p5296 -tp5297 -Rp5298 -ag6 -(g10 -S'\xd43\x95\xeb\x8e\x17\xeb?' -p5299 -tp5300 -Rp5301 -asS"L-BFGS \nw f'" -p5302 -(lp5303 -g6 -(g10 -S'k\xfb\x80\xad\x113\xde?' -p5304 -tp5305 -Rp5306 -ag6 -(g10 -S'\x03\x17\xb8\xc0\x05.\xe0?' -p5307 -tp5308 -Rp5309 -ag6 -(g10 -S'\xcc\xe7(\xf8X\xb5\xd4?' -p5310 -tp5311 -Rp5312 -ag6 -(g10 -S'\x7f\x08\x83\x9c%\xfb\xdb?' -p5313 -tp5314 -Rp5315 -ag6 -(g10 -S'F\x1aX\x18\xca\xd2\xa1?' -p5316 -tp5317 -Rp5318 -ag6 -(g10 -S'\xe7\x176\xe6>\xc4\xd8?' -p5319 -tp5320 -Rp5321 -ag6 -(g10 -S'\n%\x1a\xb8E@\xdf?' -p5322 -tp5323 -Rp5324 -ag6 -(g10 -S'28\x1f\x83\xf31\xd8?' -p5325 -tp5326 -Rp5327 -ag6 -(g10 -S'm_e\xd3F\x1e\xe0?' -p5328 -tp5329 -Rp5330 -ag6 -(g10 -S'Xp\xa9l\xef:\xe0?' -p5331 -tp5332 -Rp5333 -ag6 -(g10 -S'}\xd6\r\xa6\xc8g\xdd?' -p5334 -tp5335 -Rp5336 -ag6 -(g10 -S'\t\xd6\xd1\xe6}\x8a\xdb?' -p5337 -tp5338 -Rp5339 -ag6 -(g10 -S'>\xb5qJ]\xd9\xe4?' -p5340 -tp5341 -Rp5342 -ag6 -(g10 -S'#?\xc2\xd3?\x8d\xdd?' -p5343 -tp5344 -Rp5345 -ag6 -(g10 -S'\xdb\x96\xe4\x7f\xb7 \x97?' -p5346 -tp5347 -Rp5348 -ag6 -(g10 -S'\xcc\x9d\x84\x02\x8d\xd8\xde?' -p5349 -tp5350 -Rp5351 -ag6 -(g10 -S'\x1b\xf5\x9d\x8d\xfa\xce\xda?' -p5352 -tp5353 -Rp5354 -ag6 -(g10 -S'\xe5\x9eFX\xeei\xde?' -p5355 -tp5356 -Rp5357 -ag6 -(g10 -S'\xde\xdb\x98e\x03\x19\xdb?' -p5358 -tp5359 -Rp5360 -ag6 -(g10 -S'!\x19P\xcfT\xae\xdb?' -p5361 -tp5362 -Rp5363 -asS"Conjugate gradient\nw f'" -p5364 -(lp5365 -g6 -(g10 -S'U\x12\xfcI\xb93\xe7?' -p5366 -tp5367 -Rp5368 -ag6 -(g10 -S'\xb5\xa9MmjS\xeb?' -p5369 -tp5370 -Rp5371 -ag6 -(g10 -S'\xc4^\xb6\xa7,S\xe4?' -p5372 -tp5373 -Rp5374 -ag6 -(g10 -S'\xc0\xd6\x15\x9a\xdb\xa9\xec?' -p5375 -tp5376 -Rp5377 -ag6 -(g10 -S'\xfe\x82\x02\x9f\x8d\x8f\xad?' -p5378 -tp5379 -Rp5380 -ag6 -(g10 -S'\x9d\x11\xbcs\xf8\xd3\xe9?' -p5381 -tp5382 -Rp5383 -ag6 -(g10 -S'\xfb\xceF}g\xa3\xee?' -p5384 -tp5385 -Rp5386 -ag6 -(g10 -S'Q\xbb\x12\xb5+Q\xeb?' -p5387 -tp5388 -Rp5389 -ag6 -(g10 -S'\x1d\x1c\x08\x96WT\xe8?' -p5390 -tp5391 -Rp5392 -ag6 -(g10 -S'\x8fV\x93\x10\xc5_\xe4?' -p5393 -tp5394 -Rp5395 -ag6 -(g10 -S'\xb9Ps\x12\x88\x0b\xe5?' -p5396 -tp5397 -Rp5398 -ag6 -(g10 -S'\x15\x97\x0c\xc1:\xda\xe4?' -p5399 -tp5400 -Rp5401 -ag6 -(g10 -S'pU\x10\xf4\xb9P\xe1?' -p5402 -tp5403 -Rp5404 -ag6 -(g10 -S'\x86\xa4\xba\x1aT%\xf2?' -p5405 -tp5406 -Rp5407 -ag6 -(g10 -S'\xb3\x1c\xe6\xbf\xc9\xd7\xa5?' -p5408 -tp5409 -Rp5410 -ag6 -(g10 -S'\xe14\x91\xc3\xc7\xb2\xeb?' -p5411 -tp5412 -Rp5413 -ag6 -(g10 -S'r\x051\xb9\x82\x98\xe6?' -p5414 -tp5415 -Rp5416 -ag6 -(g10 -S'\x11\x96{\x1aa\xb9\xe5?' -p5417 -tp5418 -Rp5419 -ag6 -(g10 -S'\xde\xdb\x98e\x03\x19\xeb?' -p5420 -tp5421 -Rp5422 -ag6 -(g10 -S'\xed\x83d@=S\xe9?' -p5423 -tp5424 -Rp5425 -asS"BFGS\nw f'" -p5426 -(lp5427 -g6 -(g10 -S'k\xfb\x80\xad\x113\xde?' -p5428 -tp5429 -Rp5430 -ag6 -(g10 -S'|\xdd\xeb^\xf7\xba\xd7?' -p5431 -tp5432 -Rp5433 -ag6 -(g10 -S'\x99\xc0\xe5\x82\x80\xe9\xdd?' -p5434 -tp5435 -Rp5436 -ag6 -(g10 -S'y\xcf7\xa6M@\xd9?' -p5437 -tp5438 -Rp5439 -ag6 -(g10 -S'\xd5\xcb\x05\xaa\xd0\xe8\xa7?' -p5440 -tp5441 -Rp5442 -ag6 -(g10 -S'\xcb\xf22\xa9\x05\xa1\xe0?' -p5443 -tp5444 -Rp5445 -ag6 -(g10 -S'\x90t\x7f\xe1SY\xda?' -p5446 -tp5447 -Rp5448 -ag6 -(g10 -S'\xafD\xedJ\xd4\xae\xe4?' -p5449 -tp5450 -Rp5451 -ag6 -(g10 -S"\xd2\xb7H\xc1w'\xda?" -p5452 -tp5453 -Rp5454 -ag6 -(g10 -S'k\x12\xa2\xf8\x8b\x9c\xe1?' -p5455 -tp5456 -Rp5457 -ag6 -(g10 -S',c\xfb\x1d\xbd2\xe6?' -p5458 -tp5459 -Rp5460 -ag6 -(g10 -S'\xefS\\2\x04\xeb\xe0?' -p5461 -tp5462 -Rp5463 -ag6 -(g10 -S'\xa0\x9e\xa9\\w\xbc\xd8?' -p5464 -tp5465 -Rp5466 -ag6 -(g10 -S'SZH\xaa\xabA\xd5?' -p5467 -tp5468 -Rp5469 -ag6 -(g10 -S'\xc4\xfb=\xd5\x92\xb3\x93?' -p5470 -tp5471 -Rp5472 -ag6 -(g10 -S'\x07\xa7\x89\x1c>\x96\xdd?' -p5473 -tp5474 -Rp5475 -ag6 -(g10 -S'AL\xae &W\xdc?' -p5476 -tp5477 -Rp5478 -ag6 -(g10 -S'#,\xf74\xc2r\xd9?' -p5479 -tp5480 -Rp5481 -ag6 -(g10 -S'\x93\xdf\x9cT\x86Q\xd8?' -p5482 -tp5483 -Rp5484 -ag6 -(g10 -S'S\xb9\xeex\xb1%\xd8?' -p5485 -tp5486 -Rp5487 -assg512 -(dp5488 -g4 -(lp5489 -g6 -(g10 -S'\x11u3h\xd9\xf1\xec?' -p5490 -tp5491 -Rp5492 -ag6 -(g10 -S'\xffh\x7f\xb4?\xda\xef?' -p5493 -tp5494 -Rp5495 -ag6 -(g10 -S'\x0bY\xc8B\x16\xb2\xf0?' -p5496 -tp5497 -Rp5498 -ag6 -(g10 -S'\xd9\x89\x9d\xd8\x89\x9d\xe8?' -p5499 -tp5500 -Rp5501 -ag6 -(g10 -S'n\xdb\xb6m\xdb\xb6\xe9?' -p5502 -tp5503 -Rp5504 -ag6 -(g10 -S'_Cy\r\xe55\xe4?' -p5505 -tp5506 -Rp5507 -ag6 -(g10 -S'=:\xf0\x9eoL\xeb?' -p5508 -tp5509 -Rp5510 -ag6 -(g10 -S')\xf2Y7\x98"\xef?' -p5511 -tp5512 -Rp5513 -ag6 -(g10 -S'5\xb0wL\r\xec\xed?' -p5514 -tp5515 -Rp5516 -ag6 -(g10 -S'\x9a\xee`\xbf\xd5\xc6\xf0?' -p5517 -tp5518 -Rp5519 -ag6 -(g10 -S'[X\xe9\xa9\x85\x95\xee?' -p5520 -tp5521 -Rp5522 -ag6 -(g10 -S'n\xdb\xb6m\xdb\xb6\xe9?' -p5523 -tp5524 -Rp5525 -ag6 -(g10 -S'\xe09\x02E[\r\xee?' -p5526 -tp5527 -Rp5528 -ag6 -(g10 -S'\x00\x00\x00\x00\x00\x00\xf0?' -p5529 -tp5530 -Rp5531 -ag6 -(g10 -S'\xc9\x16\xd1\x9c5(\xee?' -p5532 -tp5533 -Rp5534 -ag6 -(g10 -S'\x0bY\xc8B\x16\xb2\xf0?' -p5535 -tp5536 -Rp5537 -ag6 -(g10 -S'h\xac\x0f\x8d\xf5\xa1\xf1?' -p5538 -tp5539 -Rp5540 -ag6 -(g10 -S'\xa3\xce4n`\xd4\xe9?' -p5541 -tp5542 -Rp5543 -ag6 -(g10 -S'\x0bY\xc8B\x16\xb2\xf0?' -p5544 -tp5545 -Rp5546 -ag6 -(g10 -S'\xb8\x1e\x85\xebQ\xb8\xee?' -p5547 -tp5548 -Rp5549 -asg73 -(lp5550 -g6 -(g10 -S'\x11u3h\xd9\xf1\x0c@' -p5551 -tp5552 -Rp5553 -ag6 -(g10 -S'\xd4,j\x165\x8b\n@' -p5554 -tp5555 -Rp5556 -ag6 -(g10 -S'\xa77\xbd\xe9Mo\n@' -p5557 -tp5558 -Rp5559 -ag6 -(g10 -S"vb'vb'\n@" -p5560 -tp5561 -Rp5562 -ag6 -(g10 -S'$I\x92$I\x92\r@' -p5563 -tp5564 -Rp5565 -ag6 -(g10 -S'\xcak(\xaf\xa1\xbc\x0c@' -p5566 -tp5567 -Rp5568 -ag6 -(g10 -S'\x02\xa5\xa8\x97\x91X\r@' -p5569 -tp5570 -Rp5571 -ag6 -(g10 -S'7\x98"\x9fu\x83\r@' -p5572 -tp5573 -Rp5574 -ag6 -(g10 -S'\xde15\xb0wL\r@' -p5575 -tp5576 -Rp5577 -ag6 -(g10 -S'\xa4\x92\xf3\xb2\x88O\x0c@' -p5578 -tp5579 -Rp5580 -ag6 -(g10 -S'\xfe\x90\xc0\xdb\x0f\t\x0c@' -p5581 -tp5582 -Rp5583 -ag6 -(g10 -S'\x92$I\x92$\t\n@' -p5584 -tp5585 -Rp5586 -ag6 -(g10 -S'\x11(\xdaj\xf0\x1c\r@' -p5587 -tp5588 -Rp5589 -ag6 -(g10 -S'\x00\x00\x00\x00\x00\x00\x0c@' -p5590 -tp5591 -Rp5592 -ag6 -(g10 -S'\xb5\xc7U@0$\x0b@' -p5593 -tp5594 -Rp5595 -ag6 -(g10 -S'\xc8B\x16\xb2\x90\x85\x0c@' -p5596 -tp5597 -Rp5598 -ag6 -(g10 -S'\x05/\xa7\xe0\xe5\x14\n@' -p5599 -tp5600 -Rp5601 -ag6 -(g10 -S'\xa2\xed\xef\xb1;\xb4\r@' -p5602 -tp5603 -Rp5604 -ag6 -(g10 -S'\xa77\xbd\xe9Mo\n@' -p5605 -tp5606 -Rp5607 -ag6 -(g10 -S'\xd7\xa3p=\n\xd7\x0b@' -p5608 -tp5609 -Rp5610 -asS'Newton\nw Hessian ' -p5611 -(lp5612 -g6 -(g10 -S'{\x14\xaeG\xe1z\xc4?' -p5613 -tp5614 -Rp5615 -asg140 -(lp5616 -g6 -(g10 -S"t*) \xe1'\xe7?" -p5617 -tp5618 -Rp5619 -ag6 -(g10 -S'\xffh\x7f\xb4?\xda\xef?' -p5620 -tp5621 -Rp5622 -ag6 -(g10 -S'd!\x0bY\xc8B\xe6?' -p5623 -tp5624 -Rp5625 -ag6 -(g10 -S'\x14;\xb1\x13;\xb1\xf3?' -p5626 -tp5627 -Rp5628 -ag6 -(g10 -S'\xb7m\xdb\xb6m\xdb\xee?' -p5629 -tp5630 -Rp5631 -ag6 -(g10 -S'_Cy\r\xe55\xf4?' -p5632 -tp5633 -Rp5634 -ag6 -(g10 -S'1\xc8Y\xb2\xbf\xd6\xe5?' -p5635 -tp5636 -Rp5637 -ag6 -(g10 -S'\x1bL\x91\xcf\xba\xc1\xe4?' -p5638 -tp5639 -Rp5640 -ag6 -(g10 -S'\x81\xbdcj`\xef\xe8?' -p5641 -tp5642 -Rp5643 -ag6 -(g10 -S'#>\x81Tr^\xe6?' -p5644 -tp5645 -Rp5646 -ag6 -(g10 -S'\xa1\xc9\x97\r\x9a|\xe9?' -p5647 -tp5648 -Rp5649 -ag6 -(g10 -S'$I\x92$I\x92\xf4?' -p5650 -tp5651 -Rp5652 -ag6 -(g10 -S'\xe09\x02E[\r\xee?' -p5653 -tp5654 -Rp5655 -ag6 -(g10 -S'UUUUUU\xe5?' -p5656 -tp5657 -Rp5658 -ag6 -(g10 -S'\xa1x\xda\xe3* \xe8?' -p5659 -tp5660 -Rp5661 -ag6 -(g10 -S'd!\x0bY\xc8B\xe6?' -p5662 -tp5663 -Rp5664 -ag6 -(g10 -S'\xe0\xe5\x14\xbc\x9c\x82\xe7?' -p5665 -tp5666 -Rp5667 -ag6 -(g10 -S'\xf7\xf7\xd8\x1d\xda\xfe\xee?' -p5668 -tp5669 -Rp5670 -ag6 -(g10 -S'd!\x0bY\xc8B\xe6?' -p5671 -tp5672 -Rp5673 -ag6 -(g10 -S'\xb8\x1e\x85\xebQ\xb8\xee?' -p5674 -tp5675 -Rp5676 -asg202 -(lp5677 -g6 -(g10 -S'\x1b\xbd+2_\x9a\xf8?' -p5678 -tp5679 -Rp5680 -ag6 -(g10 -S'\xb5?\xda\x1f\xed\x8f\xf6?' -p5681 -tp5682 -Rp5683 -ag6 -(g10 -S'z\xd3\x9b\xde\xf4\xa6\xf7?' -p5684 -tp5685 -Rp5686 -ag6 -(g10 -S'\xc5N\xec\xc4N\xec\xf4?' -p5687 -tp5688 -Rp5689 -ag6 -(g10 -S'\x00\x00\x00\x00\x00\x80\xf6?' -p5690 -tp5691 -Rp5692 -ag6 -(g10 -S'\x94\xd7P^Cy\xf5?' -p5693 -tp5694 -Rp5695 -ag6 -(g10 -S'\xb4d\x7f\xad+4\xf7?' -p5696 -tp5697 -Rp5698 -ag6 -(g10 -S'\xdd`\x8a|\xd6\r\xf6?' -p5699 -tp5700 -Rp5701 -ag6 -(g10 -S'{\xc7\xd4\xc0\xde1\xf5?' -p5702 -tp5703 -Rp5704 -ag6 -(g10 -S'\x05R\xc9yY\xc4\xf7?' -p5705 -tp5706 -Rp5707 -ag6 -(g10 -S'm\xd0\xe4\xcb\x06M\xf6?' -p5708 -tp5709 -Rp5710 -ag6 -(g10 -S'\xb7m\xdb\xb6m\xdb\xf5?' -p5711 -tp5712 -Rp5713 -ag6 -(g10 -S'T>\x8c\xfbuI\xf5?' -p5714 -tp5715 -Rp5716 -ag6 -(g10 -S'UUUUUU\xf7?' -p5717 -tp5718 -Rp5719 -ag6 -(g10 -S'+ \x18\x92-\xa2\xf9?' -p5720 -tp5721 -Rp5722 -ag6 -(g10 -S'z\xd3\x9b\xde\xf4\xa6\xf7?' -p5723 -tp5724 -Rp5725 -ag6 -(g10 -S'>4\xd6\x87\xc6\xfa\xf8?' -p5726 -tp5727 -Rp5728 -ag6 -(g10 -S'\xa4\xafy*\x85\xf4\xf5?' -p5729 -tp5730 -Rp5731 -ag6 -(g10 -S'z\xd3\x9b\xde\xf4\xa6\xf7?' -p5732 -tp5733 -Rp5734 -ag6 -(g10 -S'ffffff\xf6?' -p5735 -tp5736 -Rp5737 -asg264 -(lp5738 -g6 -(g10 -S"t*) \xe1'\xe7?" -p5739 -tp5740 -Rp5741 -ag6 -(g10 -S'\xaa\xf0Tx*<\xe5?' -p5742 -tp5743 -Rp5744 -ag6 -(g10 -S'\xbd\xe9Moz\xd3\xeb?' -p5745 -tp5746 -Rp5747 -ag6 -(g10 -S'\xd9\x89\x9d\xd8\x89\x9d\xe8?' -p5748 -tp5749 -Rp5750 -ag6 -(g10 -S'$I\x92$I\x92\xe4?' -p5751 -tp5752 -Rp5753 -ag6 -(g10 -S'_Cy\r\xe55\xe4?' -p5754 -tp5755 -Rp5756 -ag6 -(g10 -S'=:\xf0\x9eoL\xeb?' -p5757 -tp5758 -Rp5759 -ag6 -(g10 -S'"\x9fu\x83)\xf2\xe9?' -p5760 -tp5761 -Rp5762 -ag6 -(g10 -S'\x81\xbdcj`\xef\xe8?' -p5763 -tp5764 -Rp5765 -ag6 -(g10 -S'#>\x81Tr^\xe6?' -p5766 -tp5767 -Rp5768 -ag6 -(g10 -S'\xa1\xc9\x97\r\x9a|\xe9?' -p5769 -tp5770 -Rp5771 -ag6 -(g10 -S'$I\x92$I\x92\xe4?' -p5772 -tp5773 -Rp5774 -ag6 -(g10 -S'@\xd1V\x83\xe7\x08\xe4?' -p5775 -tp5776 -Rp5777 -ag6 -(g10 -S'\xab\xaa\xaa\xaa\xaa\xaa\xea?' -p5778 -tp5779 -Rp5780 -ag6 -(g10 -S'\xa1x\xda\xe3* \xe8?' -p5781 -tp5782 -Rp5783 -ag6 -(g10 -S'd!\x0bY\xc8B\xe6?' -p5784 -tp5785 -Rp5786 -ag6 -(g10 -S'\xe0\xe5\x14\xbc\x9c\x82\xe7?' -p5787 -tp5788 -Rp5789 -ag6 -(g10 -S'O\xa5\x90\xbe\xe6\xa9\xe4?' -p5790 -tp5791 -Rp5792 -ag6 -(g10 -S'\xbd\xe9Moz\xd3\xeb?' -p5793 -tp5794 -Rp5795 -ag6 -(g10 -S'{\x14\xaeG\xe1z\xe4?' -p5796 -tp5797 -Rp5798 -asS"L-BFGS \nw f'" -p5799 -(lp5800 -g6 -(g10 -S'\xc2O.D\xdd\x0c\xda?' -p5801 -tp5802 -Rp5803 -ag6 -(g10 -S'\xbf\x8e_\xc7\xaf\xe3\xd7?' -p5804 -tp5805 -Rp5806 -ag6 -(g10 -S'\xeaMoz\xd3\x9b\xde?' -p5807 -tp5808 -Rp5809 -ag6 -(g10 -S';\xb1\x13;\xb1\x13\xdb?' -p5810 -tp5811 -Rp5812 -ag6 -(g10 -S'I\x92$I\x92$\xd7?' -p5813 -tp5814 -Rp5815 -ag6 -(g10 -S'\xcak(\xaf\xa1\xbc\xd6?' -p5816 -tp5817 -Rp5818 -ag6 -(g10 -S'Cs;\x95G\x07\xde?' -p5819 -tp5820 -Rp5821 -ag6 -(g10 -S'\xa6\xc8g\xdd`\x8a\xdc?' -p5822 -tp5823 -Rp5824 -ag6 -(g10 -S'\xdb\xb6m\xdb\xb6m\xdb?' -p5825 -tp5826 -Rp5827 -ag6 -(g10 -S'\xe7e\x11\x9f@*\xd9?' -p5828 -tp5829 -Rp5830 -ag6 -(g10 -S'\xfe\x90\xc0\xdb\x0f\t\xdc?' -p5831 -tp5832 -Rp5833 -ag6 -(g10 -S'I\x92$I\x92$\xd7?' -p5834 -tp5835 -Rp5836 -ag6 -(g10 -S'h\xab\xc1s\x04\x8a\xd6?' -p5837 -tp5838 -Rp5839 -ag6 -(g10 -S'UUUUUU\xdd?' -p5840 -tp5841 -Rp5842 -ag6 -(g10 -S'\xb5\xc7U@0$\xdb?' -p5843 -tp5844 -Rp5845 -ag6 -(g10 -S'\x91\x85,d!\x0b\xd9?' -p5846 -tp5847 -Rp5848 -ag6 -(g10 -S'\x9c\x82\x97S\xf0r\xda?' -p5849 -tp5850 -Rp5851 -ag6 -(g10 -S'\xf9\xb9b\x96#?\xd7?' -p5852 -tp5853 -Rp5854 -ag6 -(g10 -S'\xeaMoz\xd3\x9b\xde?' -p5855 -tp5856 -Rp5857 -ag6 -(g10 -S'\n\xd7\xa3p=\n\xd7?' -p5858 -tp5859 -Rp5860 -asS"Conjugate gradient\nw f'" -p5861 -(lp5862 -g6 -(g10 -S'\xc2O.D\xdd\x0c\xda?' -p5863 -tp5864 -Rp5865 -ag6 -(g10 -S'\x8a\x03\xc5\x81\xe2@\xe1?' -p5866 -tp5867 -Rp5868 -ag6 -(g10 -S'\x91\x85,d!\x0b\xd9?' -p5869 -tp5870 -Rp5871 -ag6 -(g10 -S'\xc5N\xec\xc4N\xec\xe4?' -p5872 -tp5873 -Rp5874 -ag6 -(g10 -S'n\xdb\xb6m\xdb\xb6\xe0?' -p5875 -tp5876 -Rp5877 -ag6 -(g10 -S'\x94\xd7P^Cy\xe5?' -p5878 -tp5879 -Rp5880 -ag6 -(g10 -S'7\x01\xa5\xa8\x97\x91\xd8?' -p5881 -tp5882 -Rp5883 -ag6 -(g10 -S'\x9fu\x83)\xf2Y\xd7?' -p5884 -tp5885 -Rp5886 -ag6 -(g10 -S'\xdb\xb6m\xdb\xb6m\xdb?' -p5887 -tp5888 -Rp5889 -ag6 -(g10 -S'\xe7e\x11\x9f@*\xd9?' -p5890 -tp5891 -Rp5892 -ag6 -(g10 -S'\xfe\x90\xc0\xdb\x0f\t\xdc?' -p5893 -tp5894 -Rp5895 -ag6 -(g10 -S'\xb7m\xdb\xb6m\xdb\xe5?' -p5896 -tp5897 -Rp5898 -ag6 -(g10 -S'\x04\x8a\xb6\x1a\x06\xe7c\xe0?' -p6245 -tp6246 -Rp6247 -ag6 -(g10 -S'\xdc\xb6m\xdb\xb6m\xeb?' -p6248 -tp6249 -Rp6250 -ag6 -(g10 -S'v,X\xa6E\xac\xe5?' -p6251 -tp6252 -Rp6253 -ag6 -(g10 -S'\x88>B\xdev\x80\xe3?' -p6254 -tp6255 -Rp6256 -ag6 -(g10 -S':Z\xd2\x14\xce\x04\xe2?' -p6257 -tp6258 -Rp6259 -ag6 -(g10 -S'N\x14o#\ru\xee?' -p6260 -tp6261 -Rp6262 -ag6 -(g10 -S'\xa7\xae\xe5\xe0f\xbf\xe0?' -p6263 -tp6264 -Rp6265 -ag6 -(g10 -S'47\x9d\x013\xb2\xe8?' -p6266 -tp6267 -Rp6268 -ag6 -(g10 -S')\xae\xe9\xf9\x89\xc8\xd3?' -p6269 -tp6270 -Rp6271 -ag6 -(g10 -S')\xaf\xa1\xbc\x86\xf2\xe2?' -p6272 -tp6273 -Rp6274 -ag6 -(g10 -S'#s\x02\x9eO\x8f\xeb?' -p6275 -tp6276 -Rp6277 -ag6 -(g10 -S'\n\xd7\xa3p=\n\xe7?' -p6278 -tp6279 -Rp6280 -ag6 -(g10 -S'\xc1\xf0Z\xb5A\x05\xda?' -p6281 -tp6282 -Rp6283 -ag6 -(g10 -S'\x04\xda4\xa0M\x03\xda?' -p6284 -tp6285 -Rp6286 -ag6 -(g10 -S'\x93\xba/\x8f\xad\x08\xea?' -p6287 -tp6288 -Rp6289 -ag6 -(g10 -S'4l\x9cu$\xef\xe6?' -p6290 -tp6291 -Rp6292 -ag6 -(g10 -S'"v7\x7f\x8a&\xe5?' -p6293 -tp6294 -Rp6295 -asS"L-BFGS \nw f'" -p6296 -(lp6297 -g6 -(g10 -S'\xae\x1d\x98k\x07\xe6\xda?' -p6298 -tp6299 -Rp6300 -ag6 -(g10 -S'\tO\n\x92?\xaf\xd2?' -p6301 -tp6302 -Rp6303 -ag6 -(g10 -S' {\xd5/\x8a\xe4\xce?' -p6304 -tp6305 -Rp6306 -ag6 -(g10 -S'\xfa\x18\x9c\x8f\xc1\xf9\xd0?' -p6307 -tp6308 -Rp6309 -ag6 -(g10 -S'=<<<<<\xdc?' -p6310 -tp6311 -Rp6312 -ag6 -(g10 -S'f\xdfG\xca\xaa\x81\xd6?' -p6313 -tp6314 -Rp6315 -ag6 -(g10 -S'\xe8\t\xa0A\xc42\xd4?' -p6316 -tp6317 -Rp6318 -ag6 -(g10 -S'\x80\xa1\x1d\xaf\x90\x9e\xd2?' -p6319 -tp6320 -Rp6321 -ag6 -(g10 -S'\xe7\x94.Y`Z\xdf?' -p6322 -tp6323 -Rp6324 -ag6 -(g10 -S'-\x077\xfb\x85X\xd1?' -p6325 -tp6326 -Rp6327 -ag6 -(g10 -S'jO\x9ar%l\xd9?' -p6328 -tp6329 -Rp6330 -ag6 -(g10 -S'\xf0o\xc14T\x8b\xc4?' -p6331 -tp6332 -Rp6333 -ag6 -(g10 -S'Dy\r\xe55\x94\xd3?' -p6334 -tp6335 -Rp6336 -ag6 -(g10 -S'\x1c\x99\x13\xf0|z\xdc?' -p6337 -tp6338 -Rp6339 -ag6 -(g10 -S'\x00\x00\x00\x00\x00\x00\xd8?' -p6340 -tp6341 -Rp6342 -ag6 -(g10 -S'\xe1\xb5j\x83\n4\xcb?' -p6343 -tp6344 -Rp6345 -ag6 -(g10 -S'\xf1*\x12\xaf"\xf1\xca?' -p6346 -tp6347 -Rp6348 -ag6 -(g10 -S'h8\xa9\xfb\xf2\xd8\xda?' -p6349 -tp6350 -Rp6351 -ag6 -(g10 -S'\x84\xe6\x84\xa1\xf4\xd0\xd7?' -p6352 -tp6353 -Rp6354 -ag6 -(g10 -S'4\x1c\x86\x94\x06\xdb\xd5?' -p6355 -tp6356 -Rp6357 -asS"Conjugate gradient\nw f'" -p6358 -(lp6359 -g6 -(g10 -S'Y\xc8B\x16\xb2\x90\xf5?' -p6360 -tp6361 -Rp6362 -ag6 -(g10 -S'#<)H\xfe\xbc\xfa?' -p6363 -tp6364 -Rp6365 -ag6 -(g10 -S"\xaf\xfaE\x91\xdc'\xfb?" -p6366 -tp6367 -Rp6368 -ag6 -(g10 -S'\x96\xa8]\x89\xda\x95\x05@' -p6369 -tp6370 -Rp6371 -ag6 -(g10 -S'\xd5\x8b\xf9\xd4\x8b\xf9\xf4?' -p6372 -tp6373 -Rp6374 -ag6 -(g10 -S'\x03\xa3\x98\x0c\xaaw\xff?' -p6375 -tp6376 -Rp6377 -ag6 -(g10 -S'\xe3F\xa1uL\x8b\x02@' -p6378 -tp6379 -Rp6380 -ag6 -(g10 -S'\xb7\xef\x02K\xd9\xae\xff?' -p6381 -tp6382 -Rp6383 -ag6 -(g10 -S'\xcc+\x86e+\xb1\xf1?' -p6384 -tp6385 -Rp6386 -ag6 -(g10 -S'\xda/\xc4\x8a\xd2\xf8\x00@' -p6387 -tp6388 -Rp6389 -ag6 -(g10 -S'\xc6\xd3\xf0\x86\x0e\xcb\xf3?' -p6390 -tp6391 -Rp6392 -ag6 -(g10 -S'\xd5"\x95\xcbN[\xf2?' -p6393 -tp6394 -Rp6395 -ag6 -(g10 -S'\xbd\x86\xf2\x1a\xcak\xf9?' -p6396 -tp6397 -Rp6398 -ag6 -(g10 -S'\xeb@\xdb\xbdU\x9a\xf2?' -p6399 -tp6400 -Rp6401 -ag6 -(g10 -S'{\x14\xaeG\xe1z\xf2?' -p6402 -tp6403 -Rp6404 -ag6 -(g10 -S'\xf5\xcb$6\x7f\xc2\xfa?' -p6405 -tp6406 -Rp6407 -ag6 -(g10 -S'[k\xad\xb5\xd6Z\xef?' -p6408 -tp6409 -Rp6410 -ag6 -(g10 -S'\x114\x9c\xd4}y\xf0?' -p6411 -tp6412 -Rp6413 -ag6 -(g10 -S'j:\xd7\x07\x9c\xba\xf4?' -p6414 -tp6415 -Rp6416 -ag6 -(g10 -S'\x04c\x04\x92\x8e\x89\x01@' -p6417 -tp6418 -Rp6419 -asS"BFGS\nw f'" -p6420 -(lp6421 -g6 -(g10 -S'\x04s\xed\xc0\\;\xe0?' -p6422 -tp6423 -Rp6424 -ag6 -(g10 -S'H\xfe\xbc\xca\xe2\x8c\xd6?' -p6425 -tp6426 -Rp6427 -ag6 -(g10 -S'\xbb\xac\x9d\x8e\x7fp\xd1?' -p6428 -tp6429 -Rp6430 -ag6 -(g10 -S'Q\xbb\x12\xb5+Q\xd3?' -p6431 -tp6432 -Rp6433 -ag6 -(g10 -S'\xfeF\xd9\xfdF\xd9\xdd?' -p6434 -tp6435 -Rp6436 -ag6 -(g10 -S'\xedv\xc5\xe9\xd3,\xdd?' -p6437 -tp6438 -Rp6439 -ag6 -(g10 -S'i7\x17\xcf\xf9\xfb\xd6?' -p6440 -tp6441 -Rp6442 -ag6 -(g10 -S'W\x87;\x1f5\x07\xdb?' -p6443 -tp6444 -Rp6445 -ag6 -(g10 -S'\xd7L\x95\x03}B\xe3?' -p6446 -tp6447 -Rp6448 -ag6 -(g10 -S'\x81\xde\xa9k9\xb8\xd9?' -p6449 -tp6450 -Rp6451 -ag6 -(g10 -S'C\xb0\x8e6\xefS\xdc?' -p6452 -tp6453 -Rp6454 -ag6 -(g10 -S'>\x85\xde\xf6\xce\xac\xcd?' -p6455 -tp6456 -Rp6457 -ag6 -(g10 -S'\x87\xf2\x1a\xcak(\xdb?' -p6458 -tp6459 -Rp6460 -ag6 -(g10 -S'i\x8a_\x12!\xd5\xe2?' -p6461 -tp6462 -Rp6463 -ag6 -(g10 -S'\xc2\xf5(\\\x8f\xc2\xe1?' -p6464 -tp6465 -Rp6466 -ag6 -(g10 -S'1\x0eSN\xddm\xd9?' -p6467 -tp6468 -Rp6469 -ag6 -(g10 -S'\xf1*\x12\xaf"\xf1\xca?' -p6470 -tp6471 -Rp6472 -ag6 -(g10 -S'[\x114\x9c\xd4}\xe1?' -p6473 -tp6474 -Rp6475 -ag6 -(g10 -S"'Q5X[3\xe1?" -p6476 -tp6477 -Rp6478 -ag6 -(g10 -S'\xea\x98\x98i\xdf\xe7\xdc?' -p6479 -tp6480 -Rp6481 -assg1508 -(dp6482 -g4 -(lp6483 -g6 -(g10 -S'\x1b\x97\xda\xce\x1e\xce\xd3?' -p6484 -tp6485 -Rp6486 -ag6 -(g10 -S']\xf3\xc6\x050?\xdd?' -p6487 -tp6488 -Rp6489 -ag6 -(g10 -S'\xbdH\x1d\x0f\x10!\xd3?' -p6490 -tp6491 -Rp6492 -ag6 -(g10 -S'\x02\xe9X\xca$\xd8\xf5?' -p6493 -tp6494 -Rp6495 -ag6 -(g10 -S'\x8d\xc0\xe1\xcaW\xeb\xe3?' -p6496 -tp6497 -Rp6498 -ag6 -(g10 -S'\xa1\xe8?%\xa2\x94\xed?' -p6499 -tp6500 -Rp6501 -ag6 -(g10 -S"\x88\xae\x00\xe2'%\xed?" -p6502 -tp6503 -Rp6504 -ag6 -(g10 -S"\x1cg87\xd9'\xd3?" -p6505 -tp6506 -Rp6507 -ag6 -(g10 -S'\xa3\x8b.\xba\xe8\xa2\xf3?' -p6508 -tp6509 -Rp6510 -ag6 -(g10 -S'n\xed\x8d\xd5\x1f\xe8\xd1?' -p6511 -tp6512 -Rp6513 -ag6 -(g10 -S'K\x96\xb9\x16\xc4\xe3\xdb?' -p6514 -tp6515 -Rp6516 -ag6 -(g10 -S'W\xa4\x13\x8ea\xfd\xeb?' -p6517 -tp6518 -Rp6519 -ag6 -(g10 -S'\t6c\x90\xbd\xea\xd7?' -p6520 -tp6521 -Rp6522 -ag6 -(g10 -S'L&\xa5\x8fM\x92\xd3?' -p6523 -tp6524 -Rp6525 -ag6 -(g10 -S'T\xdfc\xd8\xd4\xf7\xd8?' -p6526 -tp6527 -Rp6528 -ag6 -(g10 -S'\xd4+\xd4+\xd4+\xd4?' -p6529 -tp6530 -Rp6531 -ag6 -(g10 -S'0\xc9\x9c\x8e\xc7Q\xd4?' -p6532 -tp6533 -Rp6534 -ag6 -(g10 -S'\xd4+\xd4+\xd4+\xf4?' -p6535 -tp6536 -Rp6537 -ag6 -(g10 -S'(\xfdT)\xa0\xec\xd4?' -p6538 -tp6539 -Rp6540 -ag6 -(g10 -S'\xb5\xaa9!a\x9f\xd4?' -p6541 -tp6542 -Rp6543 -asg73 -(lp6544 -g6 -(g10 -S'\xc9\xc6\xa5\xb6\xb3\x87\xd3?' -p6545 -tp6546 -Rp6547 -ag6 -(g10 -S'\x85j\x05=k\xeb\xe4?' -p6548 -tp6549 -Rp6550 -ag6 -(g10 -S'^\x93\\\x82*\xfd\xd4?' -p6551 -tp6552 -Rp6553 -ag6 -(g10 -S'\x02\xe9X\xca$\xd8\xf5?' -p6554 -tp6555 -Rp6556 -ag6 -(g10 -S'{[\xcc\xb1]S\xed?' -p6557 -tp6558 -Rp6559 -ag6 -(g10 -S'\xef\xef\x9by/V\xf4?' -p6560 -tp6561 -Rp6562 -ag6 -(g10 -S'\x99\xc0\xe5\x82\x80\xe9\xed?' -p6563 -tp6564 -Rp6565 -ag6 -(g10 -S'\xaa\x9a\xd4\xd2\xc5\xbb\xdc?' -p6566 -tp6567 -Rp6568 -ag6 -(g10 -S']t\xd1E\x17]\xff?' -p6569 -tp6570 -Rp6571 -ag6 -(g10 -S'\x18n\xed\x8d\xd5\x1f\xd8?' -p6572 -tp6573 -Rp6574 -ag6 -(g10 -S'\xba\xb8\x01*4_\xe9?' -p6575 -tp6576 -Rp6577 -ag6 -(g10 -S'~\xc0\xd6\x88\x19\x9f\xeb?' -p6578 -tp6579 -Rp6580 -ag6 -(g10 -S'R$\xf7\xc9\x9co\xe2?' -p6581 -tp6582 -Rp6583 -ag6 -(g10 -S'3\xe8\x92\xc0\n\xa1\xdd?' -p6584 -tp6585 -Rp6586 -ag6 -(g10 -S'\xf01l\xea{\x0c\xdb?' -p6587 -tp6588 -Rp6589 -ag6 -(g10 -S'$\xdb$\xdb$\xdb\xdc?' -p6590 -tp6591 -Rp6592 -ag6 -(g10 -S'\xb0\xc8\xc0\xb7?$\xd7?' -p6593 -tp6594 -Rp6595 -ag6 -(g10 -S'\x06\xfa\x05\xfa\x05\xfa\xfd?' -p6596 -tp6597 -Rp6598 -ag6 -(g10 -S"<\xaa3\xc8'Z\xdd?" -p6599 -tp6600 -Rp6601 -ag6 -(g10 -S'\xb3\xda\xfe\xea~\xc1\xe0?' -p6602 -tp6603 -Rp6604 -asS'Newton\nw Hessian ' -p6605 -(lp6606 -g6 -(g10 -S'\xbe\x97\x88\x1d\xc8T\x92?' -p6607 -tp6608 -Rp6609 -asg140 -(lp6610 -g6 -(g10 -S">\x9c'R\xd04\x08@" -p6611 -tp6612 -Rp6613 -ag6 -(g10 -S'\xe9L/Yu\x7f\x03@' -p6614 -tp6615 -Rp6616 -ag6 -(g10 -S'\x90\x18\xba\x15\x87\x7f\x0b@' -p6617 -tp6618 -Rp6619 -ag6 -(g10 -S'\xe0\xe2\xb4f\xfbD\x05@' -p6620 -tp6621 -Rp6622 -ag6 -(g10 -S'YE\x86\xfe\xa5\x8d\x08@' -p6623 -tp6624 -Rp6625 -ag6 -(g10 -S'\xc0\x04o\x10\xcf\xfd\r@' -p6626 -tp6627 -Rp6628 -ag6 -(g10 -S'w\x9c\x1bA\xcf`\x0c@' -p6629 -tp6630 -Rp6631 -ag6 -(g10 -S'\x00t\x1f^\xd4\x8c\x05@' -p6632 -tp6633 -Rp6634 -ag6 -(g10 -S'\xa3\x8b.\xba\xe8\xa2\x03@' -p6635 -tp6636 -Rp6637 -ag6 -(g10 -S'\xcd\xe6\xd2\xa1\x06\xbb\x0b@' -p6638 -tp6639 -Rp6640 -ag6 -(g10 -S'\xa2\x0ee;\xa9\x87\x04@' -p6641 -tp6642 -Rp6643 -ag6 -(g10 -S'k\xfb\x80\xad\x113\x0e@' -p6644 -tp6645 -Rp6646 -ag6 -(g10 -S'\x8b\x03|\xf4l\xe5\xfd?' -p6647 -tp6648 -Rp6649 -ag6 -(g10 -S'\xdfo\x8e\xf3\xe0v\x08@' -p6650 -tp6651 -Rp6652 -ag6 -(g10 -S'\xf6\x14\xd8~=\x05\x06@' -p6653 -tp6654 -Rp6655 -ag6 -(g10 -S'6\xca5\xca5\xca\x01@' -p6656 -tp6657 -Rp6658 -ag6 -(g10 -S'\xdc\xfa\xb0\xa5O\xed\x0c@' -p6659 -tp6660 -Rp6661 -ag6 -(g10 -S'c\x9cc\x9cc\x9c\x03@' -p6662 -tp6663 -Rp6664 -ag6 -(g10 -S'WPW\x12g\xaf\x07@' -p6665 -tp6666 -Rp6667 -ag6 -(g10 -S'\n\x10&\x0fkU\x03@' -p6668 -tp6669 -Rp6670 -asg202 -(lp6671 -g6 -(g10 -S'\xd3U\x07nr\xb4\xc2?' -p6672 -tp6673 -Rp6674 -ag6 -(g10 -S'\xf5\xac\xad\x93;\x9f\xcb?' -p6675 -tp6676 -Rp6677 -ag6 -(g10 -S'\xcf\x8b\xd4\xf1\x00\x11\xc2?' -p6678 -tp6679 -Rp6680 -ag6 -(g10 -S'{\xd0\xc8;\x7f\x8b\xe3?' -p6681 -tp6682 -Rp6683 -ag6 -(g10 -S'4{[\xcc\xb1]\xd3?' -p6684 -tp6685 -Rp6686 -ag6 -(g10 -S'&x\x83x\xee\xef\xdb?' -p6687 -tp6688 -Rp6689 -ag6 -(g10 -S'Dfl^\xc5\x13\xda?' -p6690 -tp6691 -Rp6692 -ag6 -(g10 -S'\xf0D\xb5\x97i\x17\xc2?' -p6693 -tp6694 -Rp6695 -ag6 -(g10 -S'\x8c.\xba\xe8\xa2\x8b\xe2?' -p6696 -tp6697 -Rp6698 -ag6 -(g10 -S'\xaf|"fs\xe9\xc0?' -p6699 -tp6700 -Rp6701 -ag6 -(g10 -S'\x1eRmkp\x1d\xcb?' -p6702 -tp6703 -Rp6704 -ag6 -(g10 -S'\x91\x85,d!\x0b\xd9?' -p6705 -tp6706 -Rp6707 -ag6 -(g10 -S'\x97\x96\x96\x96\x96\x96\xc6?' -p6708 -tp6709 -Rp6710 -ag6 -(g10 -S'\xca\xc8n\xbd \x07\xc3?' -p6711 -tp6712 -Rp6713 -ag6 -(g10 -S'\xecR^\xcc\xba\x94\xc7?' -p6714 -tp6715 -Rp6716 -ag6 -(g10 -S'\xf3\x0c\xf3\x0c\xf3\x0c\xc3?' -p6717 -tp6718 -Rp6719 -ag6 -(g10 -S'\xca/[\xb1\xca0\xc3?' -p6720 -tp6721 -Rp6722 -ag6 -(g10 -S'\xf3\x0c\xf3\x0c\xf3\x0c\xe3?' -p6723 -tp6724 -Rp6725 -ag6 -(g10 -S'\xdf`\x97\n\t\xc3\xc3?' -p6726 -tp6727 -Rp6728 -ag6 -(g10 -S'\xf7eM\xe0\xba\x0c\xc4?' -p6729 -tp6730 -Rp6731 -asg264 -(lp6732 -g6 -(g10 -S'\x8a\x144\r\xc6\x9a\xc1?' -p6733 -tp6734 -Rp6735 -ag6 -(g10 -S'\x8cf\x94!G\xff\xc9?' -p6736 -tp6737 -Rp6738 -ag6 -(g10 -S'\xe1\xce\x8b\xd4\xf1\x00\xc1?' -p6739 -tp6740 -Rp6741 -ag6 -(g10 -S'7\xc4\x80t,e\xe2?' -p6742 -tp6743 -Rp6744 -ag6 -(g10 -S'(\xab\xc8\xd0\xbf\xb4\xd1?' -p6745 -tp6746 -Rp6747 -ag6 -(g10 -S'\xac\x07\xc7\xcb:K\xda?' -p6748 -tp6749 -Rp6750 -ag6 -(g10 -S'"B\xa2\x1c\x14\x8b\xd8?' -p6751 -tp6752 -Rp6753 -ag6 -(g10 -S'\xc4"2\xf8\xf9\x06\xc1?' -p6754 -tp6755 -Rp6756 -ag6 -(g10 -S'u\xd1E\x17]t\xe1?' -p6757 -tp6758 -Rp6759 -ag6 -(g10 -S'n\xed\x8d\xd5\x1f\xe8\xc1?' -p6760 -tp6761 -Rp6762 -ag6 -(g10 -S'\x98\x85\x88iu\xca\xc8?' -p6763 -tp6764 -Rp6765 -ag6 -(g10 -S'.\xf68O\x01\x92\xd7?' -p6766 -tp6767 -Rp6768 -ag6 -(g10 -S'%\xf7\xc9\x9coB\xc5?' -p6769 -tp6770 -Rp6771 -ag6 -(g10 -S'C\xb0\xcbF\x9ae\xc1?' -p6772 -tp6773 -Rp6774 -ag6 -(g10 -S'\x83\xc6X\xc0\xa01\xc6?' -p6775 -tp6776 -Rp6777 -ag6 -(g10 -S'\xd4+\xd4+\xd4+\xc4?' -p6778 -tp6779 -Rp6780 -ag6 -(g10 -S'd\x96\x19\xd4\xcd\x0f\xc2?' -p6781 -tp6782 -Rp6783 -ag6 -(g10 -S'\x12\xee\x11\xee\x11\xee\xe1?' -p6784 -tp6785 -Rp6786 -ag6 -(g10 -S'\x96\xc4\xd9\xebq\x99\xc2?' -p6787 -tp6788 -Rp6789 -ag6 -(g10 -S'\xbe\x97\x88\x1d\xc8T\xc2?' -p6790 -tp6791 -Rp6792 -asS"L-BFGS \nw f'" -p6793 -(lp6794 -g6 -(g10 -S'\xd3U\x07nr\xb4\xb2?' -p6795 -tp6796 -Rp6797 -ag6 -(g10 -S'\xf5\xac\xad\x93;\x9f\xbb?' -p6798 -tp6799 -Rp6800 -ag6 -(g10 -S'\xcf\x8b\xd4\xf1\x00\x11\xb2?' -p6801 -tp6802 -Rp6803 -ag6 -(g10 -S'{\xd0\xc8;\x7f\x8b\xd3?' -p6804 -tp6805 -Rp6806 -ag6 -(g10 -S'\xdb5\xd5\xcd\x0b\xd0\xc2?' -p6807 -tp6808 -Rp6809 -ag6 -(g10 -S'&x\x83x\xee\xef\xcb?' -p6810 -tp6811 -Rp6812 -ag6 -(g10 -S'Dfl^\xc5\x13\xca?' -p6813 -tp6814 -Rp6815 -ag6 -(g10 -S'\xf0D\xb5\x97i\x17\xb2?' -p6816 -tp6817 -Rp6818 -ag6 -(g10 -S'\x8c.\xba\xe8\xa2\x8b\xd2?' -p6819 -tp6820 -Rp6821 -ag6 -(g10 -S'-^\xf9D\xcc\xe6\xb2?' -p6822 -tp6823 -Rp6824 -ag6 -(g10 -S'\xf2\r!\xc0\x1cW\xba?' -p6825 -tp6826 -Rp6827 -ag6 -(g10 -S'\x91\x85,d!\x0b\xc9?' -p6828 -tp6829 -Rp6830 -ag6 -(g10 -S'\x97\x96\x96\x96\x96\x96\xb6?' -p6831 -tp6832 -Rp6833 -ag6 -(g10 -S'Hk8\xeb\xf3{\xb2?' -p6834 -tp6835 -Rp6836 -ag6 -(g10 -S'\xecR^\xcc\xba\x94\xb7?' -p6837 -tp6838 -Rp6839 -ag6 -(g10 -S'\xb5J\xb5J\xb5J\xb5?' -p6840 -tp6841 -Rp6842 -ag6 -(g10 -S'\xca/[\xb1\xca0\xb3?' -p6843 -tp6844 -Rp6845 -ag6 -(g10 -S'\xf3\x0c\xf3\x0c\xf3\x0c\xd3?' -p6846 -tp6847 -Rp6848 -ag6 -(g10 -S'\xdf`\x97\n\t\xc3\xb3?' -p6849 -tp6850 -Rp6851 -ag6 -(g10 -S'9!a\x9f\x14z\xb3?' -p6852 -tp6853 -Rp6854 -asS"Conjugate gradient\nw f'" -p6855 -(lp6856 -g6 -(g10 -S'\x86\xdd\xfa\xb2|N\x13@' -p6857 -tp6858 -Rp6859 -ag6 -(g10 -S'\x9bDZN\xfb\xa8\x12@' -p6860 -tp6861 -Rp6862 -ag6 -(g10 -S'\xe9gN\x83;\xaf\x11@' -p6863 -tp6864 -Rp6865 -ag6 -(g10 -S'\xf1\xe5\x86\x18\x90\x8e\xf5?' -p6866 -tp6867 -Rp6868 -ag6 -(g10 -S'\x118\\\xf9j}\n@' -p6869 -tp6870 -Rp6871 -ag6 -(g10 -S'\xd5B}]\x00k\xf7?' -p6872 -tp6873 -Rp6874 -ag6 -(g10 -S'5\xeb\x92\xbf\x12\xc7\x00@' -p6875 -tp6876 -Rp6877 -ag6 -(g10 -S'\xaecA\x9b\x83+\x14@' -p6878 -tp6879 -Rp6880 -ag6 -(g10 -S'\xe9\xa2\x8b.\xba\xe8\xf3?' -p6881 -tp6882 -Rp6883 -ag6 -(g10 -S'\xde\x99\x8c\x16\xaf|\x11@' -p6884 -tp6885 -Rp6886 -ag6 -(g10 -S'\xe4\xbd\x7f\xc6Q\xcb\x11@' -p6887 -tp6888 -Rp6889 -ag6 -(g10 -S'6b\xc6\xe7\xf2K\x00@' -p6890 -tp6891 -Rp6892 -ag6 -(g10 -S'Er\x9f\xcc\xf9&\x16@' -p6893 -tp6894 -Rp6895 -ag6 -(g10 -S'\xe4\xe9\x10\xec\xb2\x91\x12@' -p6896 -tp6897 -Rp6898 -ag6 -(g10 -S'g4\x1c\xbd\x19\r\x13@' -p6899 -tp6900 -Rp6901 -ag6 -(g10 -S'\xbcC\xbcC\xbc\xc3\x15@' -p6902 -tp6903 -Rp6904 -ag6 -(g10 -S'>\x90\x12\xcc\r\xa2\x10@' -p6905 -tp6906 -Rp6907 -ag6 -(g10 -S'\x1c\xe4\x1b\xe4\x1b\xe4\xf3?' -p6908 -tp6909 -Rp6910 -ag6 -(g10 -S'\x90n,,K\xc3\x12@' -p6911 -tp6912 -Rp6913 -ag6 -(g10 -S's\x95^\x1bK\xb6\x14@' -p6914 -tp6915 -Rp6916 -asS"BFGS\nw f'" -p6917 -(lp6918 -g6 -(g10 -S'\xc07D\xff\xf4Z\xc4?' -p6919 -tp6920 -Rp6921 -ag6 -(g10 -S'\x92\x96\xd3>*\x0f\xce?' -p6922 -tp6923 -Rp6924 -ag6 -(g10 -S'4\xa7\xc1\x9d\x17\xa9\xc3?' -p6925 -tp6926 -Rp6927 -ag6 -(g10 -S'$\xef\xfc-Nk\xe6?' -p6928 -tp6929 -Rp6930 -ag6 -(g10 -S'\xe7\x05h\xc9\xfdx\xd4?' -p6931 -tp6932 -Rp6933 -ag6 -(g10 -S'\xde \x9e\xfb\xfbf\xde?' -p6934 -tp6935 -Rp6936 -ag6 -(g10 -S'\x99\xc0\xe5\x82\x80\xe9\xdd?' -p6937 -tp6938 -Rp6939 -ag6 -(g10 -S'2\xf8\xf9\x06\x11\xb0\xc3?' -p6940 -tp6941 -Rp6942 -ag6 -(g10 -S'/\xba\xe8\xa2\x8b.\xe4?' -p6943 -tp6944 -Rp6945 -ag6 -(g10 -S'\xce\xa5C\rvg\xc2?' -p6946 -tp6947 -Rp6948 -ag6 -(g10 -S'x\xda\x05\xc2\x17\xaa\xcc?' -p6949 -tp6950 -Rp6951 -ag6 -(g10 -S'\x08l\x8d\x98\xf1\xb9\xdc?' -p6952 -tp6953 -Rp6954 -ag6 -(g10 -S'\xc2\x85I\r\xd1\x94\xc8?' -p6955 -tp6956 -Rp6957 -ag6 -(g10 -S'\xce\x83\xdbaz\x1d\xc4?' -p6958 -tp6959 -Rp6960 -ag6 -(g10 -S'\x88\xa5f\xdea\xa9\xc9?' -p6961 -tp6962 -Rp6963 -ag6 -(g10 -S'D\xbbD\xbbD\xbb\xc4?' -p6964 -tp6965 -Rp6966 -ag6 -(g10 -S'\xe3\x95=\xfdE\xe2\xc4?' -p6967 -tp6968 -Rp6969 -ag6 -(g10 -S'D\xbbD\xbbD\xbb\xe4?' -p6970 -tp6971 -Rp6972 -ag6 -(g10 -S'M\xcb\xb3\xb8k\x81\xc5?' -p6973 -tp6974 -Rp6975 -ag6 -(g10 -S's\xef%b\x072\xc5?' -p6976 -tp6977 -Rp6978 -assg2006 -(dp6979 -g4 -(lp6980 -g6 -(g10 -S'\xc6\x18c\x8c1\xc6\xe8?' -p6981 -tp6982 -Rp6983 -ag6 -(g10 -S'\xb3\xa6\xac)k\xca\xea?' -p6984 -tp6985 -Rp6986 -ag6 -(g10 -S'&\xf0[\x843\xd5\xe1?' -p6987 -tp6988 -Rp6989 -ag6 -(g10 -S'\xdcC.+\x06J\xe8?' -p6990 -tp6991 -Rp6992 -ag6 -(g10 -S'\x1b\x97\xda\xce\x1e\xce\xe3?' -p6993 -tp6994 -Rp6995 -ag6 -(g10 -S'\x0ex\xfc\xe1\x80\xc7\xef?' -p6996 -tp6997 -Rp6998 -ag6 -(g10 -S'\xa0\xbbJ1Aw\xe5?' -p6999 -tp7000 -Rp7001 -ag6 -(g10 -S'v\n\x9f\xa4,@\xec?' -p7002 -tp7003 -Rp7004 -ag6 -(g10 -S'\x1cK\x99\x04\xbb\n\xef?' -p7005 -tp7006 -Rp7007 -ag6 -(g10 -S'\xfc\x85XQ\x1a\x1f\xe9?' -p7008 -tp7009 -Rp7010 -ag6 -(g10 -S'\x10\xa8\x8e\xbd\xb5a\xea?' -p7011 -tp7012 -Rp7013 -ag6 -(g10 -S'\xe5\xb3n0E>\xeb?' -p7014 -tp7015 -Rp7016 -ag6 -(g10 -S'\xe2\xe0}kdu\xe9?' -p7017 -tp7018 -Rp7019 -ag6 -(g10 -S'\xbf\x9e\xabX6\xbe\xe9?' -p7020 -tp7021 -Rp7022 -ag6 -(g10 -S'\xd1\n\x9b\x03\x89V\xe8?' -p7023 -tp7024 -Rp7025 -ag6 -(g10 -S'vI\xe5\xc3\xb8_\xe7?' -p7026 -tp7027 -Rp7028 -ag6 -(g10 -S'<\x815\xb9Y\x85\xe2?' -p7029 -tp7030 -Rp7031 -ag6 -(g10 -S'\x1d>\x96\xddxp\xea?' -p7032 -tp7033 -Rp7034 -ag6 -(g10 -S'\xa1\xf3\x00;J\xfa\xed?' -p7035 -tp7036 -Rp7037 -ag6 -(g10 -S'Y\x87S<\xd6\xe1\xe4?' -p7038 -tp7039 -Rp7040 -asg73 -(lp7041 -g6 -(g10 -S'\xa5\x94RJ)\xa5\x04@' -p7042 -tp7043 -Rp7044 -ag6 -(g10 -S'\xc9\xe4\x9f\xd4\xde"\x03@' -p7045 -tp7046 -Rp7047 -ag6 -(g10 -S'\xf1\xf0\xf0\xf0\xf0\xf0\x00@' -p7048 -tp7049 -Rp7050 -ag6 -(g10 -S'\xc9e\xc5@\to\x02@' -p7051 -tp7052 -Rp7053 -ag6 -(g10 -S'%&<\x86\xdd\xfa\x02@' -p7054 -tp7055 -Rp7056 -ag6 -(g10 -S'Z}\xa9\xa0\xd5\x97\x06@' -p7057 -tp7058 -Rp7059 -ag6 -(g10 -S',d!\x0bY\xc8\x02@' -p7060 -tp7061 -Rp7062 -ag6 -(g10 -S'\xf2+\xcf\x19U\xda\x01@' -p7063 -tp7064 -Rp7065 -ag6 -(g10 -S'\xf2\xce\xdf\xe2\xb4f\x03@' -p7066 -tp7067 -Rp7068 -ag6 -(g10 -S'\xe6\xe0f\xbf\x10+\x02@' -p7069 -tp7070 -Rp7071 -ag6 -(g10 -S'CJ\x9eeD\x1f\x00@' -p7072 -tp7073 -Rp7074 -ag6 -(g10 -S'?\xeb\x06S\xe4\xb3\x03@' -p7075 -tp7076 -Rp7077 -ag6 -(g10 -S'\xe1}kdu\x19\x02@' -p7078 -tp7079 -Rp7080 -ag6 -(g10 -S'.\xa00\xaa\xd3\xe4\x00@' -p7081 -tp7082 -Rp7083 -ag6 -(g10 -S'\x1dH\xb4\xc2\xe6@\x02@' -p7084 -tp7085 -Rp7086 -ag6 -(g10 -S'\x12(\xdaj\xf0\x1c\x01@' -p7087 -tp7088 -Rp7089 -ag6 -(g10 -S'L|_\xd4\xf2o\x00@' -p7090 -tp7091 -Rp7092 -ag6 -(g10 -S'\xbd )\xff\xd0\xb7\x05@' -p7093 -tp7094 -Rp7095 -ag6 -(g10 -S'\x08\x13\x9c\xcc\x8dW\x04@' -p7096 -tp7097 -Rp7098 -ag6 -(g10 -S'\xa8\xb2\xab&\xaa\xec\x02@' -p7099 -tp7100 -Rp7101 -asS'Newton\nw Hessian ' -p7102 -(lp7103 -g6 -(g10 -S'!_oP\xc8\xd7\xbb?' -p7104 -tp7105 -Rp7106 -asg140 -(lp7107 -g6 -(g10 -S'\xb6\xd6Zk\xad\xb5\xf6?' -p7108 -tp7109 -Rp7110 -ag6 -(g10 -S'\xa8\x073T1\x9e\xee?' -p7111 -tp7112 -Rp7113 -ag6 -(g10 -S'\xa4\xe6_mR\x88\xec?' -p7114 -tp7115 -Rp7116 -ag6 -(g10 -S'T\xe7\xd7\x1erY\xf1?' -p7117 -tp7118 -Rp7119 -ag6 -(g10 -S'\xcf\x1e\xce\x13)h\xea?' -p7120 -tp7121 -Rp7122 -ag6 -(g10 -S'\r\xe9\xbc\xc5\x90\xce\xeb?' -p7123 -tp7124 -Rp7125 -ag6 -(g10 -S'\xb0\xf1h\xfe`\xe3\xf1?' -p7126 -tp7127 -Rp7128 -ag6 -(g10 -S'\xfa\x06j\x18s\xd5\xf2?' -p7129 -tp7130 -Rp7131 -ag6 -(g10 -S'\xbe\xdc\x10\x03\xd2\xb1\xf4?' -p7132 -tp7133 -Rp7134 -ag6 -(g10 -S'\x97\x83\x9b\xfdB\xac\xf8?' -p7135 -tp7136 -Rp7137 -ag6 -(g10 -S'`\xc5\t)y\x96\xf1?' -p7138 -tp7139 -Rp7140 -ag6 -(g10 -S'\xa0u\x83)\xf2Y\xf7?' -p7141 -tp7142 -Rp7143 -ag6 -(g10 -S'\x9c\x8a\xe6\t\xb5\x80\xf1?' -p7144 -tp7145 -Rp7146 -ag6 -(g10 -S'$\xfd\xf5\\\xc5\xb2\xf1?' -p7147 -tp7148 -Rp7149 -ag6 -(g10 -S'6\x07\x12\xad\xb09\xf0?' -p7150 -tp7151 -Rp7152 -ag6 -(g10 -S'&\x95\x0f\xe3~]\xf2?' -p7153 -tp7154 -Rp7155 -ag6 -(g10 -S'\x93\x9bU()\xa2\xed?' -p7156 -tp7157 -Rp7158 -ag6 -(g10 -S'\x83u\xb4y\x9f\xe2\xf2?' -p7159 -tp7160 -Rp7161 -ag6 -(g10 -S'\x80\x1d%\xfdN!\xf1?' -p7162 -tp7163 -Rp7164 -ag6 -(g10 -S'u\x9bE2\xddf\xf1?' -p7165 -tp7166 -Rp7167 -asg202 -(lp7168 -g6 -(g10 -S'\x08!\x84\x10B\x08\x01@' -p7169 -tp7170 -Rp7171 -ag6 -(g10 -S'\x9fy\xd59,|\x06@' -p7172 -tp7173 -Rp7174 -ag6 -(g10 -S'\xd9\xe5\xca\x00\x95l\r@' -p7175 -tp7176 -Rp7177 -ag6 -(g10 -S'Kx\xa3\xa9\xf3k\x07@' -p7178 -tp7179 -Rp7180 -ag6 -(g10 -S'\xd8\xad/\xcb\xe7\x94\t@' -p7181 -tp7182 -Rp7183 -ag6 -(g10 -S'\x88\x03\x1e\x7f8\xe0\x01@' -p7184 -tp7185 -Rp7186 -ag6 -(g10 -S'}\x11\xd5:\xfb"\x06@' -p7187 -tp7188 -Rp7189 -ag6 -(g10 -S'\x92A~\xe59\xa3\x02@' -p7190 -tp7191 -Rp7192 -ag6 -(g10 -S'\xb9!\x06\xa4c)\xfb?' -p7193 -tp7194 -Rp7195 -ag6 -(g10 -S'M\xa0w\xeaZ\x0e\xfe?' -p7196 -tp7197 -Rp7198 -ag6 -(g10 -S'G\xf4\x01\xd5\xb1\xb7\x06@' -p7199 -tp7200 -Rp7201 -ag6 -(g10 -S'*\xf2Y7\x98"\xff?' -p7202 -tp7203 -Rp7204 -ag6 -(g10 -S'J\x1c\xbco\x8d\xac\x06@' -p7205 -tp7206 -Rp7207 -ag6 -(g10 -S'\xc8\xbc\x94\x08\x1e\xe9\x03@' -p7208 -tp7209 -Rp7210 -ag6 -(g10 -S'\xeeRO\xc6o\x97\x08@' -p7211 -tp7212 -Rp7213 -ag6 -(g10 -S'\x9e#P\xb4\xd5\xe0\t@' -p7214 -tp7215 -Rp7216 -ag6 -(g10 -S'\xee\x99c1\\\xf0\x0c@' -p7217 -tp7218 -Rp7219 -ag6 -(g10 -S'W\xe9\nc\xaaE\xfd?' -p7220 -tp7221 -Rp7222 -ag6 -(g10 -S'lF\x0euY\xaa\x01@' -p7223 -tp7224 -Rp7225 -ag6 -(g10 -S'\x94#6\xee\xe4\x88\x05@' -p7226 -tp7227 -Rp7228 -asg264 -(lp7229 -g6 -(g10 -S'\x84\x10B\x08!\x84\xe0?' -p7230 -tp7231 -Rp7232 -ag6 -(g10 -S'\xc9\xe4\x9f\xd4\xde"\xe3?' -p7233 -tp7234 -Rp7235 -ag6 -(g10 -S'&\xf0[\x843\xd5\xe1?' -p7236 -tp7237 -Rp7238 -ag6 -(g10 -S'T\xe7\xd7\x1erY\xe1?' -p7239 -tp7240 -Rp7241 -ag6 -(g10 -S'\xf5ZT\xf1#\x1b\xe7?' -p7242 -tp7243 -Rp7244 -ag6 -(g10 -S'\t\xcb=\x8d\xb0\xdc\xe3?' -p7245 -tp7246 -Rp7247 -ag6 -(g10 -S'\xa0\xbbJ1Aw\xe5?' -p7248 -tp7249 -Rp7250 -ag6 -(g10 -S'\xf7\xb3\xe2u\x99\x1c\xe9?' -p7251 -tp7252 -Rp7253 -ag6 -(g10 -S'R&\xc1\xae\xc2\x97\xeb?' -p7254 -tp7255 -Rp7256 -ag6 -(g10 -S'\xd8rp\xb3_\x88\xe5?' -p7257 -tp7258 -Rp7259 -ag6 -(g10 -S'\xd5\xb1\xb76Ls\xe7?' -p7260 -tp7261 -Rp7262 -ag6 -(g10 -S'Z7\x98"\x9fu\xe3?' -p7263 -tp7264 -Rp7265 -ag6 -(g10 -S'\xaah\x9eP\x0b\x18\xe3?' -p7266 -tp7267 -Rp7268 -ag6 -(g10 -S'\x97\x12\xc1#\xfd\xf5\xec?' -p7269 -tp7270 -Rp7271 -ag6 -(g10 -S'6\x07\x12\xad\xb09\xe0?' -p7272 -tp7273 -Rp7274 -ag6 -(g10 -S'\xab\xc1s\x04\x8a\xb6\xda?' -p7275 -tp7276 -Rp7277 -ag6 -(g10 -S'<\x815\xb9Y\x85\xe2?' -p7278 -tp7279 -Rp7280 -ag6 -(g10 -S'\x1d>\x96\xddxp\xea?' -p7281 -tp7282 -Rp7283 -ag6 -(g10 -S'\xe1dn\xbc\xa2i\xe5?' -p7284 -tp7285 -Rp7286 -ag6 -(g10 -S'=saF\xcf\\\xe8?' -p7287 -tp7288 -Rp7289 -asS"L-BFGS \nw f'" -p7290 -(lp7291 -g6 -(g10 -S'\x95RJ)\xa5\x94\xd2?' -p7292 -tp7293 -Rp7294 -ag6 -(g10 -S'C\x15\xe3\xe9\xc1\x0c\xd5?' -p7295 -tp7296 -Rp7297 -ag6 -(g10 -S'\x91\xee1\xab\xb8\x9d\xd3?' -p7298 -tp7299 -Rp7300 -ag6 -(g10 -S'v~\xed!\x97\x15\xd3?' -p7301 -tp7302 -Rp7303 -ag6 -(g10 -S'\xe2<\x91\x82\xa6\xc1\xd8?' -p7304 -tp7305 -Rp7306 -ag6 -(g10 -S'\x8a\x92]\x9b(\xd9\xd5?' -p7307 -tp7308 -Rp7309 -ag6 -(g10 -S'\x99\xa0\xbbJ1A\xd7?' -p7310 -tp7311 -Rp7312 -ag6 -(g10 -S'7\xdf@\rc\xae\xda?' -p7313 -tp7314 -Rp7315 -ag6 -(g10 -S'\xb78\xad\xd9>Q\xdd?' -p7316 -tp7317 -Rp7318 -ag6 -(g10 -S'j|d\x02\xbdS\xd7?' -p7319 -tp7320 -Rp7321 -ag6 -(g10 -S'\xf3,#\xfa\x80\xea\xd8?' -p7322 -tp7323 -Rp7324 -ag6 -(g10 -S'}\xd6\r\xa6\xc8g\xd5?' -p7325 -tp7326 -Rp7327 -ag6 -(g10 -S'\xb8FV\x97a\xaf\xd4?' -p7328 -tp7329 -Rp7330 -ag6 -(g10 -S'\x83\xccK\x89\xe0\x91\xde?' -p7331 -tp7332 -Rp7333 -ag6 -(g10 -S'\x1dH\xb4\xc2\xe6@\xd2?' -p7334 -tp7335 -Rp7336 -ag6 -(g10 -S'\xe09\x02E[\r\xce?' -p7337 -tp7338 -Rp7339 -ag6 -(g10 -S'\xf5\xda\xbaK|_\xd4?' -p7340 -tp7341 -Rp7342 -ag6 -(g10 -S'D\xb0\x8e6\xefS\xdc?' -p7343 -tp7344 -Rp7345 -ag6 -(g10 -S'\x91\x08\x13\x9c\xcc\x8d\xd7?' -p7346 -tp7347 -Rp7348 -ag6 -(g10 -S'/ih\xcbK\x1a\xda?' -p7349 -tp7350 -Rp7351 -asS"Conjugate gradient\nw f'" -p7352 -(lp7353 -g6 -(g10 -S'\xbe\xf7\xde{\xef\xbd\xe7?' -p7354 -tp7355 -Rp7356 -ag6 -(g10 -S'\x11\x1c\xbb4\nD\xe0?' -p7357 -tp7358 -Rp7359 -ag6 -(g10 -S'\x0e\xe55\x94\xd7P\xde?' -p7360 -tp7361 -Rp7362 -ag6 -(g10 -S'\xe5\xb2b\xa0\x847\xe2?' -p7363 -tp7364 -Rp7365 -ag6 -(g10 -S'\xbc\x00\x0b\xa5\xab\x0e\xdc?' -p7366 -tp7367 -Rp7368 -ag6 -(g10 -S'\x8d\xb0\xdc\xd3\x08\xcb\xdd?' -p7369 -tp7370 -Rp7371 -ag6 -(g10 -S',d!\x0bY\xc8\xe2?' -p7372 -tp7373 -Rp7374 -ag6 -(g10 -S'\x99\x1c\x19\xe4W\x9e\xe3?' -p7375 -tp7376 -Rp7377 -ag6 -(g10 -S'\xf0\xe5\x86\x18\x90\x8e\xe5?' -p7378 -tp7379 -Rp7380 -ag6 -(g10 -S'\xb2\xa24>2\x81\xee?' -p7381 -tp7382 -Rp7383 -ag6 -(g10 -S'\xef\x82\xbf\x8a\x13R\xe2?' -p7384 -tp7385 -Rp7386 -ag6 -(g10 -S'1E>\xeb\x06S\xe8?' -p7387 -tp7388 -Rp7389 -ag6 -(g10 -S'\xa3yB-`L\xe2?' -p7390 -tp7391 -Rp7392 -ag6 -(g10 -S'\x1aZ\xbb\x0f\xb7\x80\xe2?' -p7393 -tp7394 -Rp7395 -ag6 -(g10 -S"\xa9'\xe3\xb7K=\xe1?" -p7396 -tp7397 -Rp7398 -ag6 -(g10 -S'333333\xe3?' -p7399 -tp7400 -Rp7401 -ag6 -(g10 -S'L\xf5\xda\xbaK|\xdf?' -p7402 -tp7403 -Rp7404 -ag6 -(g10 -S'\x96\xae0\xa6Z\xd4\xe3?' -p7405 -tp7406 -Rp7407 -ag6 -(g10 -S'Xo\xf7\xecc3\xe2?' -p7408 -tp7409 -Rp7410 -ag6 -(g10 -S'n\x16\xc9t\x9bE\xe2?' -p7411 -tp7412 -Rp7413 -asS"BFGS\nw f'" -p7414 -(lp7415 -g6 -(g10 -S'\xd7Zk\xad\xb5\xd6\xda?' -p7416 -tp7417 -Rp7418 -ag6 -(g10 -S'-\xd7\xef>N\xb4\xdc?' -p7419 -tp7420 -Rp7421 -ag6 -(g10 -S'\x91\xee1\xab\xb8\x9d\xd3?' -p7422 -tp7423 -Rp7424 -ag6 -(g10 -S'\xfe\xdaC.+\x06\xda?' -p7425 -tp7426 -Rp7427 -ag6 -(g10 -S'\x08y\x17`\xa1t\xd5?' -p7428 -tp7429 -Rp7430 -ag6 -(g10 -S'\xc8\x1f\x0ex\xfc\xe1\xe0?' -p7431 -tp7432 -Rp7433 -ag6 -(g10 -S'\x99\xa0\xbbJ1A\xd7?' -p7434 -tp7435 -Rp7436 -ag6 -(g10 -S'\xb65\xfd;\xf6\xd1\xdd?' -p7437 -tp7438 -Rp7439 -ag6 -(g10 -S'\xc1\xae\xc2\x97\x1bb\xe0?' -p7440 -tp7441 -Rp7442 -ag6 -(g10 -S'\x8e\x8fL\xa0w\xea\xda?' -p7443 -tp7444 -Rp7445 -ag6 -(g10 -S'-#\xfa\x80\xea\xd8\xdb?' -p7446 -tp7447 -Rp7448 -ag6 -(g10 -S'\x08S\xe4\xb3n0\xdd?' -p7449 -tp7450 -Rp7451 -ag6 -(g10 -S'\xf1\xbe5\xb2\xba\x0c\xdb?' -p7452 -tp7453 -Rp7454 -ag6 -(g10 -S'\xabX6\xbe\x19Z\xdb?' -p7455 -tp7456 -Rp7457 -ag6 -(g10 -S'\xb8K=\x19\xbf]\xda?' -p7458 -tp7459 -Rp7460 -ag6 -(g10 -S'\x90\x85,d!\x0b\xd9?' -p7461 -tp7462 -Rp7463 -ag6 -(g10 -S'\xf5\xda\xbaK|_\xd4?' -p7464 -tp7465 -Rp7466 -ag6 -(g10 -S'D\xb0\x8e6\xefS\xdc?' -p7467 -tp7468 -Rp7469 -ag6 -(g10 -S'\xa8\xcbR\r:\x0f\xe0?' -p7470 -tp7471 -Rp7472 -ag6 -(g10 -S'K}Z\xc1R\x9f\xd6?' -p7473 -tp7474 -Rp7475 -asssI128 -(dp7476 -g2 -(dp7477 -g4 -(lp7478 -g6 -(g10 -S'1\xdc\xf4W\x8d\xf8\x00@' -p7479 -tp7480 -Rp7481 -ag6 -(g10 -S'm9\x1e\xa4\xcf\xbf\xfc?' -p7482 -tp7483 -Rp7484 -ag6 -(g10 -S'pb\x9aF\xd9\x00\xfd?' -p7485 -tp7486 -Rp7487 -ag6 -(g10 -S'DdF\xaa\xdb\xc1\x00@' -p7488 -tp7489 -Rp7490 -ag6 -(g10 -S'\xa11Q\xf2\xc2\xa7\xf9?' -p7491 -tp7492 -Rp7493 -ag6 -(g10 -S'@ \x10\x08\x04\x02\x01@' -p7494 -tp7495 -Rp7496 -ag6 -(g10 -S'\\D\x11PF\xb8\xfb?' -p7497 -tp7498 -Rp7499 -ag6 -(g10 -S'\xd5?\xef\x88\x12h\x00@' -p7500 -tp7501 -Rp7502 -ag6 -(g10 -S'\x83\xaf\x9d%\xab\x8d\x00@' -p7503 -tp7504 -Rp7505 -ag6 -(g10 -S'\x8b\x18\x7f(\xbc\xca\xfe?' -p7506 -tp7507 -Rp7508 -asg73 -(lp7509 -g6 -(g10 -S'\x12\rw\x1f\x17\xa2\xe0?' -p7510 -tp7511 -Rp7512 -ag6 -(g10 -S'\xf1~\xe44_\xab\xdb?' -p7513 -tp7514 -Rp7515 -ag6 -(g10 -S'4J\x11\xd5)\xaa\xdb?' -p7516 -tp7517 -Rp7518 -ag6 -(g10 -S'&\x18aQ\xc2\x9b\xe1?' -p7519 -tp7520 -Rp7521 -ag6 -(g10 -S'\xe6\xf0Z.i\xc5?' -p7611 -tp7612 -Rp7613 -ag6 -(g10 -S'C\x12\xfd\x9c\x80\xee\xca?' -p7614 -tp7615 -Rp7616 -ag6 -(g10 -S'~\xe7\x0b\x93`\x02\xcd?' -p7617 -tp7618 -Rp7619 -ag6 -(g10 -S'\xc4\xbb\x1a\xf0\xdf\xcd\xcd?' -p7620 -tp7621 -Rp7622 -ag6 -(g10 -S'\x9d\xd7\xa7\xd1y}\xca?' -p7623 -tp7624 -Rp7625 -ag6 -(g10 -S'\x17\x04\xf8n\x17\x01\xca?' -p7626 -tp7627 -Rp7628 -ag6 -(g10 -S'j\xf4>\xd4\xd6\xb1\xd2?' -p7629 -tp7630 -Rp7631 -ag6 -(g10 -S'\xf6mV\x17\xa9\xeb\xcf?' -p7632 -tp7633 -Rp7634 -ag6 -(g10 -S'd\xd4\xe7\xa5\x8a\n\xca?' -p7635 -tp7636 -Rp7637 -asS"L-BFGS \nw f'" -p7638 -(lp7639 -g6 -(g10 -S'\xaa^\x82E\xc9ck?' -p7640 -tp7641 -Rp7642 -ag6 -(g10 -S'&\xde\x8f\x9c\xe6ke?' -p7643 -tp7644 -Rp7645 -ag6 -(g10 -S'\xce\xc9\xca\xddv\xdbj?' -p7646 -tp7647 -Rp7648 -ag6 -(g10 -S'yD\x95A\xea\xfel?' -p7649 -tp7650 -Rp7651 -ag6 -(g10 -S'9\xab\xf4}\xdc\xadm?' -p7652 -tp7653 -Rp7654 -ag6 -(g10 -S'}\x07\xcfwPzj?' -p7655 -tp7656 -Rp7657 -ag6 -(g10 -S'\\r\xa4>\xb6\xf5i?' -p7658 -tp7659 -Rp7660 -ag6 -(g10 -S'n7\x99\xe1K\x99r?' -p7661 -tp7662 -Rp7663 -ag6 -(g10 -S'V\x0c\xd7\xee\xab\xdao?' -p7664 -tp7665 -Rp7666 -ag6 -(g10 -S'\xf6\r\x14\x8d\xae\xfci?' -p7667 -tp7668 -Rp7669 -asS"Conjugate gradient\nw f'" -p7670 -(lp7671 -g6 -(g10 -S';VN\x94\xa1\xa8\x80?' -p7672 -tp7673 -Rp7674 -ag6 -(g10 -S'*\xfb\xef:\xb3\xa1w?' -p7675 -tp7676 -Rp7677 -ag6 -(g10 -S'|B\xa6\xea\xda\xaay?' -p7678 -tp7679 -Rp7680 -ag6 -(g10 -S'\xec\xf6P\x14x\xf1~?' -p7681 -tp7682 -Rp7683 -ag6 -(g10 -S'\x0eO9N)-w?' -p7684 -tp7685 -Rp7686 -ag6 -(g10 -S'd\rvP\x11\x12{?' -p7687 -tp7688 -Rp7689 -ag6 -(g10 -S"\x99\x9d\xb9\xfa'my?" -p7690 -tp7691 -Rp7692 -ag6 -(g10 -S'B\xd0\x034Z\xd8\x83?' -p7693 -tp7694 -Rp7695 -ag6 -(g10 -S'H\x8a\x08G\x8f\x8bz?' -p7696 -tp7697 -Rp7698 -ag6 -(g10 -S'\xd4tP\x1ap\xdaz?' -p7699 -tp7700 -Rp7701 -asS"BFGS\nw f'" -p7702 -(lp7703 -g6 -(g10 -S'q1\x0e\x8f\xae`\xa0?' -p7704 -tp7705 -Rp7706 -ag6 -(g10 -S'\xf2\x13\xe2\xac\xfc\x84\x9c?' -p7707 -tp7708 -Rp7709 -ag6 -(g10 -S'\xce\x80t\xc5\x0b\x1c\x9d?' -p7710 -tp7711 -Rp7712 -ag6 -(g10 -S'!\xe3\x929\x07\xcc\xa0?' -p7713 -tp7714 -Rp7715 -ag6 -(g10 -S'}\x91\x05]\x05\xf8\x98?' -p7716 -tp7717 -Rp7718 -ag6 -(g10 -S'kE\xf6ej\x9f\xa0?' -p7719 -tp7720 -Rp7721 -ag6 -(g10 -S'\xac3\xfc\x1d\x8e\x0c\x9c?' -p7722 -tp7723 -Rp7724 -ag6 -(g10 -S'wJ#\x9eE1\x9d?' -p7725 -tp7726 -Rp7727 -ag6 -(g10 -S':\x8c\x973h\xb2\xa0?' -p7728 -tp7729 -Rp7730 -ag6 -(g10 -S'\x93\xf6\xf2\xeb\x05\x1a\x9e?' -p7731 -tp7732 -Rp7733 -assg512 -(dp7734 -g4 -(lp7735 -g6 -(g10 -S'\xd1\xb7FQq+\xe9?' -p7736 -tp7737 -Rp7738 -ag6 -(g10 -S'\x7f\x12\xdc@s\x0c\xe9?' -p7739 -tp7740 -Rp7741 -ag6 -(g10 -S'#\xc04$\xe9\t\xe6?' -p7742 -tp7743 -Rp7744 -ag6 -(g10 -S'r\xcb\xf9:A\xa3\xe5?' -p7745 -tp7746 -Rp7747 -ag6 -(g10 -S"\xac\xae'_L8\xed?" -p7748 -tp7749 -Rp7750 -ag6 -(g10 -S'\x04v\xa4z\xf1{\xeb?' -p7751 -tp7752 -Rp7753 -ag6 -(g10 -S'P:\xfd\x84\xfb\x8b\xed?' -p7754 -tp7755 -Rp7756 -ag6 -(g10 -S'\xefc\xa9\xe4J\xf2\xec?' -p7757 -tp7758 -Rp7759 -ag6 -(g10 -S'M=\xdc\xd4\xc3m\xea?' -p7760 -tp7761 -Rp7762 -ag6 -(g10 -S'4\x14z\x1d9\x1b\xeb?' -p7763 -tp7764 -Rp7765 -asg73 -(lp7766 -g6 -(g10 -S'|\x1c\xd1\x884\xe1\xd7?' -p7767 -tp7768 -Rp7769 -ag6 -(g10 -S'\x98!\xbf\x97!\xbf\xd7?' -p7770 -tp7771 -Rp7772 -ag6 -(g10 -S'\x1a<\xb0\xa13~\xd3?' -p7773 -tp7774 -Rp7775 -ag6 -(g10 -S'\xf6\x90\xda\x1b\xbaV\xd2?' -p7776 -tp7777 -Rp7778 -ag6 -(g10 -S'/{\x1e\xe5\xc3\xfd\xde?' -p7779 -tp7780 -Rp7781 -ag6 -(g10 -S'\xeaM\x87\x13\x19^\xda?' -p7782 -tp7783 -Rp7784 -ag6 -(g10 -S'>\x15]\x1d/\x04\xdc?' -p7785 -tp7786 -Rp7787 -ag6 -(g10 -S'\r\xca\x1f\x17l\xa4\xdb?' -p7788 -tp7789 -Rp7790 -ag6 -(g10 -S'xy\x8e\x97\xe7\xf8\xd9?' -p7791 -tp7792 -Rp7793 -ag6 -(g10 -S'\x15Mq\xe9`\xf2\xdc?' -p7794 -tp7795 -Rp7796 -asS'Newton\nw Hessian ' -p7797 -(lp7798 -g6 -(g10 -S'2\x81U\xef^\xc6&?' -p7799 -tp7800 -Rp7801 -asg140 -(lp7802 -g6 -(g10 -S'w\xe8mf:\x81\x19@' -p7803 -tp7804 -Rp7805 -ag6 -(g10 -S'@\xeb\xc0+\xe1;\x19@' -p7806 -tp7807 -Rp7808 -ag6 -(g10 -S'\n\xab\x8c`\x18E\x1a@' -p7809 -tp7810 -Rp7811 -ag6 -(g10 -S'\x00\x17\xcc\xc4\x18\xeb\x1a@' -p7812 -tp7813 -Rp7814 -ag6 -(g10 -S'N5W&\xb9|\x17@' -p7815 -tp7816 -Rp7817 -ag6 -(g10 -S'\x1a|t\x81p\xf9\x18@' -p7818 -tp7819 -Rp7820 -ag6 -(g10 -S'x\x9c\xe8N\xb8@\x17@' -p7821 -tp7822 -Rp7823 -ag6 -(g10 -S'\x91\xc61*\x97\x16\x17@' -p7824 -tp7825 -Rp7826 -ag6 -(g10 -S'\xf8\xd9\x80\x9f\r\xec\x19@' -p7827 -tp7828 -Rp7829 -ag6 -(g10 -S'\x1a`7\x13dR\x19@' -p7830 -tp7831 -Rp7832 -asg202 -(lp7833 -g6 -(g10 -S'\xa7\xe4\xc5\xc6\x9e\xc3\xa8?' -p7834 -tp7835 -Rp7836 -ag6 -(g10 -S"\x9f'X\xb5\xb2\x1d\xb8?" -p7837 -tp7838 -Rp7839 -ag6 -(g10 -S'\xf8H\xa5\xa4\xfc\r\xb3?' -p7840 -tp7841 -Rp7842 -ag6 -(g10 -S'\x16\x9c\xe6\x86\xbe\xfa\xb1?' -p7843 -tp7844 -Rp7845 -ag6 -(g10 -S'\xd2\x90\xec\x9f;?\xbe?' -p7846 -tp7847 -Rp7848 -ag6 -(g10 -S'\xce\x83Tp\xb3U\xbb?' -p7849 -tp7850 -Rp7851 -ag6 -(g10 -S'&\xf6\xbb\xf0\xf5t\xbb?' -p7852 -tp7853 -Rp7854 -ag6 -(g10 -S'\x9f\xa4\xe5[#\xcd\xbb?' -p7855 -tp7856 -Rp7857 -ag6 -(g10 -S'R\xe5$UNR\xaa?' -p7858 -tp7859 -Rp7860 -ag6 -(g10 -S'\xb7\x0f\x11\xdd\x8aP\xae?' -p7861 -tp7862 -Rp7863 -asg264 -(lp7864 -g6 -(g10 -S'\x012\xf4\xadQT\xf4?' -p7865 -tp7866 -Rp7867 -ag6 -(g10 -S'?\xd4\x1e\xc4\x16\xc0\xf4?' -p7868 -tp7869 -Rp7870 -ag6 -(g10 -S'\xa0\x0eC\x13\x85U\xf4?' -p7871 -tp7872 -Rp7873 -ag6 -(g10 -S'7\xc7H\xe1m\xcd\xf1?' -p7874 -tp7875 -Rp7876 -ag6 -(g10 -S'\xfa\xceu5t\xb4\xf7?' -p7877 -tp7878 -Rp7879 -ag6 -(g10 -S'I\xf5\xe2\xf7\xf6\xfe\xf3?' -p7880 -tp7881 -Rp7882 -ag6 -(g10 -S'\x82JY\xe9\xab\x16\xf9?' -p7883 -tp7884 -Rp7885 -ag6 -(g10 -S'\xb7\x0b)\x98\xf3=\xf9?' -p7886 -tp7887 -Rp7888 -ag6 -(g10 -S'_\xc8\xf5\x85\\\xff\xf1?' -p7889 -tp7890 -Rp7891 -ag6 -(g10 -S'.))\xe6\xee\x9c\xf2?' -p7892 -tp7893 -Rp7894 -asS"L-BFGS \nw f'" -p7895 -(lp7896 -g6 -(g10 -S'\xff\xdf\xce\xe3v\x83\x92?' -p7897 -tp7898 -Rp7899 -ag6 -(g10 -S'\x90\xba\x1e=\x11\xca\x92?' -p7900 -tp7901 -Rp7902 -ag6 -(g10 -S'\x01\xfa\x98\x82\x8a\xe9\x92?' -p7903 -tp7904 -Rp7905 -ag6 -(g10 -S'\xaa!\x97xc\xda\x92?' -p7906 -tp7907 -Rp7908 -ag6 -(g10 -S'\xea=\x1aA\xcb\x87\x96?' -p7909 -tp7910 -Rp7911 -ag6 -(g10 -S'<\xdb\xefF{\xb3\x94?' -p7912 -tp7913 -Rp7914 -ag6 -(g10 -S'tk!D [\x94?' -p7915 -tp7916 -Rp7917 -ag6 -(g10 -S'\x012\x8c\x87\xceH\x97?' -p7918 -tp7919 -Rp7920 -ag6 -(g10 -S'3w:s\xa73\x91?' -p7921 -tp7922 -Rp7923 -ag6 -(g10 -S'\xd1\xd1\x9f\xbc\x0fT\x94?' -p7924 -tp7925 -Rp7926 -asS"Conjugate gradient\nw f'" -p7927 -(lp7928 -g6 -(g10 -S'\xa6\x9c\xb9\x7f|d\xbd?' -p7929 -tp7930 -Rp7931 -ag6 -(g10 -S'\xf9\xfa\xac\x19\x0b\xb5\xbd?' -p7932 -tp7933 -Rp7934 -ag6 -(g10 -S'%\xe5o\xd8$\x91\xb0?' -p7935 -tp7936 -Rp7937 -ag6 -(g10 -S'\x18\xea2S\xe1\x89\xb8?' -p7938 -tp7939 -Rp7940 -ag6 -(g10 -S'\xd5\xd7\xc2\n\xedG\xb8?' -p7941 -tp7942 -Rp7943 -ag6 -(g10 -S'H \x03\xc1\x00k\xb8?' -p7944 -tp7945 -Rp7946 -ag6 -(g10 -S'T4\xac\xef\x1d\xba\xbd?' -p7947 -tp7948 -Rp7949 -ag6 -(g10 -S'a0\xb9\xe6\x00\x8d\xc5?' -p7950 -tp7951 -Rp7952 -ag6 -(g10 -S'qh\x18\x87\x86\xf1\xb4?' -p7953 -tp7954 -Rp7955 -ag6 -(g10 -S'zx\xea\xfc+S\xbd?' -p7956 -tp7957 -Rp7958 -asS"BFGS\nw f'" -p7959 -(lp7960 -g6 -(g10 -S'\xf3\xce\xf59\x15\xdb\x88?' -p7961 -tp7962 -Rp7963 -ag6 -(g10 -S'=\x8a\xc4\x1cz\xbc\x88?' -p7964 -tp7965 -Rp7966 -ag6 -(g10 -S'S%\x8f+u\xc2\x85?' -p7967 -tp7968 -Rp7969 -ag6 -(g10 -S'\x9do\xa2\xd3v\\\x85?' -p7970 -tp7971 -Rp7972 -ag6 -(g10 -S'\x92R\xf6\xdd\x9a\xdd\x8c?' -p7973 -tp7974 -Rp7975 -ag6 -(g10 -S'Z\xec\x18\x8ap$\x8b?' -p7976 -tp7977 -Rp7978 -ag6 -(g10 -S'\xd4U\x84\xdc\xa5-\x8d?' -p7979 -tp7980 -Rp7981 -ag6 -(g10 -S'_RO\xd1\x00\x96\x8c?' -p7982 -tp7983 -Rp7984 -ag6 -(g10 -S'\x1a\xc6\xa1a\x1c\x1a\x8a?' -p7985 -tp7986 -Rp7987 -ag6 -(g10 -S'\xe8\x88g\x9f=\xc7\x8a?' -p7988 -tp7989 -Rp7990 -assg1010 -(dp7991 -g4 -(lp7992 -g6 -(g10 -S'\xa68\n\xed\x82\xd2\xe6?' -p7993 -tp7994 -Rp7995 -ag6 -(g10 -S'\x1dr_/\x8d\x8b\xda?' -p7996 -tp7997 -Rp7998 -ag6 -(g10 -S'\xef\xec\x95\x13CY\xe9?' -p7999 -tp8000 -Rp8001 -ag6 -(g10 -S'^\xe8~\xc67I\xd4?' -p8002 -tp8003 -Rp8004 -ag6 -(g10 -S'\xcb5\xc8\x1e\x14\xca\xeb?' -p8005 -tp8006 -Rp8007 -ag6 -(g10 -S')\xb1`\xbc\x8e\xb4\xe2?' -p8008 -tp8009 -Rp8010 -ag6 -(g10 -S'1\x10\xb0:\xbe\x94\xd5?' -p8011 -tp8012 -Rp8013 -ag6 -(g10 -S'V\xe2\nH\xb8\x00\xe4?' -p8014 -tp8015 -Rp8016 -ag6 -(g10 -S'm\xb4\xc1\x95%x\xed?' -p8017 -tp8018 -Rp8019 -ag6 -(g10 -S'\x15\xd7\x95?\xe3\x8b\xed?' -p8020 -tp8021 -Rp8022 -asg73 -(lp8023 -g6 -(g10 -S'tR\xbd\x91{\xde\x10@' -p8024 -tp8025 -Rp8026 -ag6 -(g10 -S'\x16\x8bL\xf8\x18\xed\x04@' -p8027 -tp8028 -Rp8029 -ag6 -(g10 -S'\x05&H\x05#_\r@' -p8030 -tp8031 -Rp8032 -ag6 -(g10 -S'\x8b\xd0[b\xa5\x03\x00@' -p8033 -tp8034 -Rp8035 -ag6 -(g10 -S'\xedT\t\xe4\xfak\x11@' -p8036 -tp8037 -Rp8038 -ag6 -(g10 -S'$(\x11/\xd0\x9a\r@' -p8039 -tp8040 -Rp8041 -ag6 -(g10 -S'\x82\x80\xd5\xf1\xa5\x8c\x01@' -p8042 -tp8043 -Rp8044 -ag6 -(g10 -S'g\xe8\xe9\xb6\xba\x8a\x08@' -p8045 -tp8046 -Rp8047 -ag6 -(g10 -S'\xbf\x80<\xc5\xce%\x11@' -p8048 -tp8049 -Rp8050 -ag6 -(g10 -S'v`\x0c\x1fb\x7f\x11@' -p8051 -tp8052 -Rp8053 -asS'Newton\nw Hessian ' -p8054 -(lp8055 -g6 -(g10 -S'M\xa0\xed\x8f\x98\x7fX?' -p8056 -tp8057 -Rp8058 -asg140 -(lp8059 -g6 -(g10 -S'\xc9\xfdp\xd16\x86\x01@' -p8060 -tp8061 -Rp8062 -ag6 -(g10 -S'\x0c^\x05\xa1\xde\x16\x13@' -p8063 -tp8064 -Rp8065 -ag6 -(g10 -S'\x03\xef\xf9\xc6\xb4\t\x00@' -p8066 -tp8067 -Rp8068 -ag6 -(g10 -S'\xa6\xd0\xc2\xca\xb6\x01\x16@' -p8069 -tp8070 -Rp8071 -ag6 -(g10 -S'F{\x06.h\xa9\x00@' -p8072 -tp8073 -Rp8074 -ag6 -(g10 -S'\x929\x83\xb6\xc1b\t@' -p8075 -tp8076 -Rp8077 -ag6 -(g10 -S'%\x0c\x04\xac\x8e_\x13@' -p8078 -tp8079 -Rp8080 -ag6 -(g10 -S'F\xe9\xbeu\x91\xf3\x0c@' -p8081 -tp8082 -Rp8083 -ag6 -(g10 -S'J\xbb\x8c>"I\x00@' -p8084 -tp8085 -Rp8086 -ag6 -(g10 -S'\x82\xac\xb8\xae\xfc\x19\xff?' -p8087 -tp8088 -Rp8089 -asg202 -(lp8090 -g6 -(g10 -S'\xe7\xb2"\xe1\xb6_\xf3?' -p8091 -tp8092 -Rp8093 -ag6 -(g10 -S'iN0\xa9\xd3\xcd\xe6?' -p8094 -tp8095 -Rp8096 -ag6 -(g10 -S'\x13RE\x0f\xcaF\xf0?' -p8097 -tp8098 -Rp8099 -ag6 -(g10 -S'\x94\x8d\x81\xb8\xdf\x03\xea?' -p8100 -tp8101 -Rp8102 -ag6 -(g10 -S'/\xce\xdc\xb1\x1dY\xf2?' -p8103 -tp8104 -Rp8105 -ag6 -(g10 -S'P\x17\xcc:j>\xf0?' -p8106 -tp8107 -Rp8108 -ag6 -(g10 -S'x\xd2\xf0\xfa\xa8\xcd\xf2?' -p8109 -tp8110 -Rp8111 -ag6 -(g10 -S'\xd5\x15\xb54\xb2\xe4\xf2?' -p8112 -tp8113 -Rp8114 -ag6 -(g10 -S'\x05x\x81\xf0\xfe\xab\xf2?' -p8115 -tp8116 -Rp8117 -ag6 -(g10 -S'\x11\xf9`\x08\xe9\xb3\xf2?' -p8118 -tp8119 -Rp8120 -asg264 -(lp8121 -g6 -(g10 -S'E\x9e\xbf\xef\xdd\x8f\xe3?' -p8122 -tp8123 -Rp8124 -ag6 -(g10 -S"\x1b3\xfd\xab'\xa6\xd8?" -p8125 -tp8126 -Rp8127 -ag6 -(g10 -S'\x99\x9d\xfc\xac\xf3X\xf7?' -p8128 -tp8129 -Rp8130 -ag6 -(g10 -S'P\x10\xff`Tc\xd1?' -p8131 -tp8132 -Rp8133 -ag6 -(g10 -S'\xfe\xc93"\x88\xe0\xde?' -p8134 -tp8135 -Rp8136 -ag6 -(g10 -S'\xd2\xf1\x97L\xe0d\xdd?' -p8137 -tp8138 -Rp8139 -ag6 -(g10 -S'\xc7\xecN\x1a^\x1f\xd7?' -p8140 -tp8141 -Rp8142 -ag6 -(g10 -S'\xef\xc9D\x04M_\xdb?' -p8143 -tp8144 -Rp8145 -ag6 -(g10 -S'\xd3\xabb\xf8\xa9\x0f\xe1?' -p8146 -tp8147 -Rp8148 -ag6 -(g10 -S'\xae+\x7f\xc6\x17\x1b\xe1?' -p8149 -tp8150 -Rp8151 -asS"L-BFGS \nw f'" -p8152 -(lp8153 -g6 -(g10 -S'@g\xb7\x16F\x10\x84?' -p8154 -tp8155 -Rp8156 -ag6 -(g10 -S'\x10\xe3\x9bD\n4y?' -p8157 -tp8158 -Rp8159 -ag6 -(g10 -S'\xfb\xe0\xb7\xdd\x916~?' -p8160 -tp8161 -Rp8162 -ag6 -(g10 -S'\xf0X\xeb\\w\xd5q?' -p8163 -tp8164 -Rp8165 -ag6 -(g10 -S'C\xb5w\xdc\x11\xec\x7f?' -p8166 -tp8167 -Rp8168 -ag6 -(g10 -S'\xb8\x8b\xd3\xf2\xe2A~?' -p8169 -tp8170 -Rp8171 -ag6 -(g10 -S'\xbf\x941\xbb\x93\x86w?' -p8172 -tp8173 -Rp8174 -ag6 -(g10 -S'\x9fbt\x88\xdc\xfc{?' -p8175 -tp8176 -Rp8177 -ag6 -(g10 -S'\xd1b\x8e\xea\xf1\x8f\x81?' -p8178 -tp8179 -Rp8180 -ag6 -(g10 -S'7\xcbr\xa7\xb5\x9b\x81?' -p8181 -tp8182 -Rp8183 -asS"Conjugate gradient\nw f'" -p8184 -(lp8185 -g6 -(g10 -S'\xc0\xeb\xae\xe5\x8et\xa1?' -p8186 -tp8187 -Rp8188 -ag6 -(g10 -S'\x1cOE\x1f\xaa\xef\xb5?' -p8189 -tp8190 -Rp8191 -ag6 -(g10 -S'\xd4~\xaf\xe6\xc8g\xa1?' -p8192 -tp8193 -Rp8194 -ag6 -(g10 -S'\xc3\xd0?\x86t\xb6\xb5?' -p8195 -tp8196 -Rp8197 -ag6 -(g10 -S'\xc3\x17\xcf:g\x17\xa5?' -p8198 -tp8199 -Rp8200 -ag6 -(g10 -S'\xca\xf2\xc8~\xdf(\xa9?' -p8201 -tp8202 -Rp8203 -ag6 -(g10 -S'\xbe\xe9Moz\xd3\xb3?' -p8204 -tp8205 -Rp8206 -ag6 -(g10 -S'X\xf9\x96y\xcf\xbe\xaf?' -p8207 -tp8208 -Rp8209 -ag6 -(g10 -S'+\xd0\x19\xba\xdc9\xa0?' -p8210 -tp8211 -Rp8212 -ag6 -(g10 -S'\xe2\xbe(\x16}\x01\x9f?' -p8213 -tp8214 -Rp8215 -asS"BFGS\nw f'" -p8216 -(lp8217 -g6 -(g10 -S'U:\xe9\xdc\x13F\x87?' -p8218 -tp8219 -Rp8220 -ag6 -(g10 -S'.\\\xcd\x10\xf8\x11{?' -p8221 -tp8222 -Rp8223 -ag6 -(g10 -S'P\xc4\x10\xee\x91\x9d\x89?' -p8224 -tp8225 -Rp8226 -ag6 -(g10 -S'\xba)m\xdc\xf0\xaft?' -p8227 -tp8228 -Rp8229 -ag6 -(g10 -S'\x04uiN4\x1f\x8c?' -p8230 -tp8231 -Rp8232 -ag6 -(g10 -S'B\x07@' -p8281 -tp8282 -Rp8283 -ag6 -(g10 -S'\x9a\x02\x9cl\xfaT\x06@' -p8284 -tp8285 -Rp8286 -ag6 -(g10 -S'\x9f,\x83(\xd5a\x12@' -p8287 -tp8288 -Rp8289 -ag6 -(g10 -S'\t[\x8b\xfbFk\x02@' -p8290 -tp8291 -Rp8292 -ag6 -(g10 -S'\x0bW\xf1\x0f\xe5\xe6\x12@' -p8293 -tp8294 -Rp8295 -ag6 -(g10 -S'\xc0\xb4\xa0&\x9cs\x02@' -p8296 -tp8297 -Rp8298 -ag6 -(g10 -S'\x18R\xe7\xa5V\xc3\x12@' -p8299 -tp8300 -Rp8301 -ag6 -(g10 -S'\xf4)\x9d+\xe7\xb2\t@' -p8302 -tp8303 -Rp8304 -ag6 -(g10 -S'\x86,\x9e\xfa@F\x03@' -p8305 -tp8306 -Rp8307 -ag6 -(g10 -S'\xaf-\xcdS\x0e\x89\x12@' -p8308 -tp8309 -Rp8310 -asS'Newton\nw Hessian ' -p8311 -(lp8312 -g6 -(g10 -S'\x85\xa6\x08\xa0\x90\xf3Y?' -p8313 -tp8314 -Rp8315 -asg140 -(lp8316 -g6 -(g10 -S'\t\xb8%\x88\xab\xab\x12@' -p8317 -tp8318 -Rp8319 -ag6 -(g10 -S'E\xb0\xe4\x99\x8b\xba\x13@' -p8320 -tp8321 -Rp8322 -ag6 -(g10 -S'*\xa7\xa4\x8fk\xc9\x02@' -p8323 -tp8324 -Rp8325 -ag6 -(g10 -S'\x1c<\xa5\\\xbe\x7f\x16@' -p8326 -tp8327 -Rp8328 -ag6 -(g10 -S'zw$5\xa6\x01\x02@' -p8329 -tp8330 -Rp8331 -ag6 -(g10 -S'\x83\xf0\xb8}\xe6{\x16@' -p8332 -tp8333 -Rp8334 -ag6 -(g10 -S';\xcd\x0c\xd0\x8a\x8a\x02@' -p8335 -tp8336 -Rp8337 -ag6 -(g10 -S'\xeciV,w\xa3\x11@' -p8338 -tp8339 -Rp8340 -ag6 -(g10 -S'\xccV\xf2k\xb7\xe6\x15@' -p8341 -tp8342 -Rp8343 -ag6 -(g10 -S'\x8eP\xd22\xac\xf1\x02@' -p8344 -tp8345 -Rp8346 -asg202 -(lp8347 -g6 -(g10 -S'\xf4+z\x11f\x12\xda?' -p8348 -tp8349 -Rp8350 -ag6 -(g10 -S'\x18\x0c\xba?\xeed\xd7?' -p8351 -tp8352 -Rp8353 -ag6 -(g10 -S'+f\x07\x06\xcc\x89\xe3?' -p8354 -tp8355 -Rp8356 -ag6 -(g10 -S'l.\x96\xc5\x85e\xd3?' -p8357 -tp8358 -Rp8359 -ag6 -(g10 -S'\xd9\xe0\xc1\xa6j\x92\xe3?' -p8360 -tp8361 -Rp8362 -ag6 -(g10 -S'\xe7Rm\xc8\xc4h\xd3?' -p8363 -tp8364 -Rp8365 -ag6 -(g10 -S'\xa38\xb6\x8e(E\xe3?' -p8366 -tp8367 -Rp8368 -ag6 -(g10 -S'od\x12L"\xa9\xd9?' -p8369 -tp8370 -Rp8371 -ag6 -(g10 -S'\x8ex\x12\xf3\xbeM\xd4?' -p8372 -tp8373 -Rp8374 -ag6 -(g10 -S'/fb\x1c\x8d2\xe3?' -p8375 -tp8376 -Rp8377 -asg264 -(lp8378 -g6 -(g10 -S'\xc79&\xb3\xfb\xf7\xd1?' -p8379 -tp8380 -Rp8381 -ag6 -(g10 -S'\xf7\xe5\xd5\x84\t0\xd0?' -p8382 -tp8383 -Rp8384 -ag6 -(g10 -S'\x032Eq\x10\xb8\xda?' -p8385 -tp8386 -Rp8387 -ag6 -(g10 -S'+m\x94\xdf\\\xaa\xca?' -p8388 -tp8389 -Rp8390 -ag6 -(g10 -S"'\x88\x95\x90\xdf\xcc\xda?" -p8391 -tp8392 -Rp8393 -ag6 -(g10 -S'F\xab\xe4\xcd\xce\xa5\xca?' -p8394 -tp8395 -Rp8396 -ag6 -(g10 -S'=\xd4\x06J\xa3^\xda?' -p8397 -tp8398 -Rp8399 -ag6 -(g10 -S'\xeciV,w\xa3\xd1?' -p8400 -tp8401 -Rp8402 -ag6 -(g10 -S'm\xc6c\x84\xe4\xe4\xcb?' -p8403 -tp8404 -Rp8405 -ag6 -(g10 -S'\x1f\xc9\x88\xe2^[\xda?' -p8406 -tp8407 -Rp8408 -asS"L-BFGS \nw f'" -p8409 -(lp8410 -g6 -(g10 -S"'\x8fCEJ\xccr?" -p8411 -tp8412 -Rp8413 -ag6 -(g10 -S'\x1aO}\xe9L\xefp?' -p8414 -tp8415 -Rp8416 -ag6 -(g10 -S'\xff\x08\x9fn\xc2\xf3{?' -p8417 -tp8418 -Rp8419 -ag6 -(g10 -S'\x9f\x17\xa7\xf9l\xe5k?' -p8420 -tp8421 -Rp8422 -ag6 -(g10 -S'\x19$\xefk\x87\t|?' -p8423 -tp8424 -Rp8425 -ag6 -(g10 -S'\xb8Pi\x16\xa9\xe0k?' -p8426 -tp8427 -Rp8428 -ag6 -(g10 -S'\xf1c\xf7\xab4\x96{?' -p8429 -tp8430 -Rp8431 -ag6 -(g10 -S'a\x8a\x91"\xdfsr?' -p8432 -tp8433 -Rp8434 -ag6 -(g10 -S"'\xeb\x05\xe9x.m?" -p8435 -tp8436 -Rp8437 -ag6 -(g10 -S'\xed0\t\xaa\xc9\x92{?' -p8438 -tp8439 -Rp8440 -asS"Conjugate gradient\nw f'" -p8441 -(lp8442 -g6 -(g10 -S'\xd3\xab\xbc\x9f\xabm\xb7?' -p8443 -tp8444 -Rp8445 -ag6 -(g10 -S'\xc0A\x10\xcf&\x1d\xb3?' -p8446 -tp8447 -Rp8448 -ag6 -(g10 -S'7\xa8Y\xb11\xc0\xb0?' -p8449 -tp8450 -Rp8451 -ag6 -(g10 -S'i\xfc\xfa\xb7\xaf\xc4\xb4?' -p8452 -tp8453 -Rp8454 -ag6 -(g10 -S'\xbf\x02\r\xb5\x14s\x9c?' -p8455 -tp8456 -Rp8457 -ag6 -(g10 -S'\xdbg\xbe\x87#\xc1\xb4?' -p8458 -tp8459 -Rp8460 -ag6 -(g10 -S'-\xe9\x9c\xcc\x0f\xfe\x9b?' -p8461 -tp8462 -Rp8463 -ag6 -(g10 -S'\x80\xe1J5m\xe4\xb0?' -p8464 -tp8465 -Rp8466 -ag6 -(g10 -S'\x14\xcd\x9f\x93\xc0\xe6\xb3?' -p8467 -tp8468 -Rp8469 -ag6 -(g10 -S'\xfdHF\x14\xf7\xda\xa2?' -p8470 -tp8471 -Rp8472 -asS"BFGS\nw f'" -p8473 -(lp8474 -g6 -(g10 -S'\xe69~i\xe7t\x84?' -p8475 -tp8476 -Rp8477 -ag6 -(g10 -S'`!\xcc\xb2\xd3m\x82?' -p8478 -tp8479 -Rp8480 -ag6 -(g10 -S'\xf7\xb6Ri&k\x8e?' -p8481 -tp8482 -Rp8483 -ag6 -(g10 -S'\x87l\xcc-\x8d[~?' -p8484 -tp8485 -Rp8486 -ag6 -(g10 -S'\xfd[\xa2"\xd7\x82\x8e?' -p8487 -tp8488 -Rp8489 -ag6 -(g10 -S'\x9b\x9br\xa7]V~?' -p8490 -tp8491 -Rp8492 -ag6 -(g10 -S'Z\x83\xd8oW\x05\x8e?' -p8493 -tp8494 -Rp8495 -ag6 -(g10 -S'K\xcb\x07\x0f\xaf\x14\x84?' -p8496 -tp8497 -Rp8498 -ag6 -(g10 -S'\x9c4J\xb2\xa1\xc1\x7f?' -p8499 -tp8500 -Rp8501 -ag6 -(g10 -S'\x8a\x00\n9\x9f\x01\x8e?' -p8502 -tp8503 -Rp8504 -assg2006 -(dp8505 -g4 -(lp8506 -g6 -(g10 -S'\x8e\xda\xc4\x93\xfa9\xe6?' -p8507 -tp8508 -Rp8509 -ag6 -(g10 -S'\x97XFE\xc2G\xe6?' -p8510 -tp8511 -Rp8512 -ag6 -(g10 -S'\xd63\x01\x0b\xd6\xa6\xe3?' -p8513 -tp8514 -Rp8515 -ag6 -(g10 -S'2+\x12F\xdbr\xe4?' -p8516 -tp8517 -Rp8518 -ag6 -(g10 -S'\x92\xe0\x11\xc4\x14?\xea?' -p8519 -tp8520 -Rp8521 -ag6 -(g10 -S'\x9b=a\xcc\xc7\x01\xea?' -p8522 -tp8523 -Rp8524 -ag6 -(g10 -S'\xf5\x08;\x06\xd0\xd7\xeb?' -p8525 -tp8526 -Rp8527 -ag6 -(g10 -S'\rS\x81\xa4\x1f\x81\xeb?' -p8528 -tp8529 -Rp8530 -ag6 -(g10 -S'\xa9;\xb7\xb7"t\xee?' -p8531 -tp8532 -Rp8533 -ag6 -(g10 -S'\xf3\x0c\x80\xa5\xe0O\xec?' -p8534 -tp8535 -Rp8536 -asg73 -(lp8537 -g6 -(g10 -S'\x00\xab\xecx\xa6?\xd4?' -p8538 -tp8539 -Rp8540 -ag6 -(g10 -S'Mhr\x9f\x92\xbc\xd6?' -p8541 -tp8542 -Rp8543 -ag6 -(g10 -S'\xd2\x90d\xb2\xec\t\xd4?' -p8544 -tp8545 -Rp8546 -ag6 -(g10 -S'\xf5\x9dq\x1e\xebf\xd2?' -p8547 -tp8548 -Rp8549 -ag6 -(g10 -S'\xc2{\xec\xd6\xfd\xf1\xda?' -p8550 -tp8551 -Rp8552 -ag6 -(g10 -S'\x02\n\xff\xa2U\xe1\xd9?' -p8553 -tp8554 -Rp8555 -ag6 -(g10 -S'\x8f\xe2\x15q\xccY\xd9?' -p8556 -tp8557 -Rp8558 -ag6 -(g10 -S'\xdf\x13\xd6_\xfe\xed\xd8?' -p8559 -tp8560 -Rp8561 -ag6 -(g10 -S'0c4(:\xad\xdf?' -p8562 -tp8563 -Rp8564 -ag6 -(g10 -S'\xbc\xb9S\x0fPB\xdd?' -p8565 -tp8566 -Rp8567 -asS'Newton\nw Hessian ' -p8568 -(lp8569 -g6 -(g10 -S"X\x9d\xaa\x05C\x05'?" -p8570 -tp8571 -Rp8572 -asg140 -(lp8573 -g6 -(g10 -S'\\\xcc\xfa\xf9B\x8d\x1a@' -p8574 -tp8575 -Rp8576 -ag6 -(g10 -S'^\x129\x04p\xa9\x1a@' -p8577 -tp8578 -Rp8579 -ag6 -(g10 -S'\x03,G\x9a|E\x1b@' -p8580 -tp8581 -Rp8582 -ag6 -(g10 -S'\xda\x18\x9du>\x15\x1b@' -p8583 -tp8584 -Rp8585 -ag6 -(g10 -S'c\x1f\xb9x\\\xd6\x18@' -p8586 -tp8587 -Rp8588 -ag6 -(g10 -S'\x1b\xdd\x95^\x1e\x0c\x1a@' -p8589 -tp8590 -Rp8591 -ag6 -(g10 -S"F\xb2\x89\xa8T'\x19@" -p8592 -tp8593 -Rp8594 -ag6 -(g10 -S'\xd69 \x92\xd5v\x17@' -p8595 -tp8596 -Rp8597 -ag6 -(g10 -S'3U\xfe@\xed\xa7\x17@' -p8598 -tp8599 -Rp8600 -ag6 -(g10 -S'\xb02X~\x13(\x18@' -p8601 -tp8602 -Rp8603 -asg202 -(lp8604 -g6 -(g10 -S'\xb5\xc2\\8\xda\xf1\xcf?' -p8605 -tp8606 -Rp8607 -ag6 -(g10 -S'\xb1\xff\xf3<\x1av\xc9?' -p8608 -tp8609 -Rp8610 -ag6 -(g10 -S'>\xd8S[L\x80\xc7?' -p8611 -tp8612 -Rp8613 -ag6 -(g10 -S'\xd0b\x82\xfam\xb6\xcb?' -p8614 -tp8615 -Rp8616 -ag6 -(g10 -S'\xf7\x16ATcS\xce?' -p8617 -tp8618 -Rp8619 -ag6 -(g10 -S'\xc1\xd6@\xd8\x18a\xcd?' -p8620 -tp8621 -Rp8622 -ag6 -(g10 -S'o\x93\xdd\xd5*\xb6\xcb?' -p8623 -tp8624 -Rp8625 -ag6 -(g10 -S'rr\xc0\xf1%\x80\xcd?' -p8626 -tp8627 -Rp8628 -ag6 -(g10 -S'\xdb\xf90\xd3\xc2\x8a\xd1?' -p8629 -tp8630 -Rp8631 -ag6 -(g10 -S'\xf2b\xf2c\xe3\xc6\xcf?' -p8632 -tp8633 -Rp8634 -asg264 -(lp8635 -g6 -(g10 -S'\xfe\xe4\xce4[\x92\xef?' -p8636 -tp8637 -Rp8638 -ag6 -(g10 -S'\xec<\xcc\x15\xe4\x15\xee?' -p8639 -tp8640 -Rp8641 -ag6 -(g10 -S'\xc7\xfa(_~*\xee?' -p8642 -tp8643 -Rp8644 -ag6 -(g10 -S'\xa9\x9b;c&z\xef?' -p8645 -tp8646 -Rp8647 -ag6 -(g10 -S'\x06\xf5k\x8fk\xc2\xf2?' -p8648 -tp8649 -Rp8650 -ag6 -(g10 -S'\x8f\x97\x004{\x13\xed?' -p8651 -tp8652 -Rp8653 -ag6 -(g10 -S'\xe9I\xadIb>\xf1?' -p8654 -tp8655 -Rp8656 -ag6 -(g10 -S'\xad"\x92:\x81\xb2\xf7?' -p8657 -tp8658 -Rp8659 -ag6 -(g10 -S'\xcd\xb9\xed\x9a\xac+\xf4?' -p8660 -tp8661 -Rp8662 -ag6 -(g10 -S'v\x0f\xb4\xee\x9c\\\xf3?' -p8663 -tp8664 -Rp8665 -asS"L-BFGS \nw f'" -p8666 -(lp8667 -g6 -(g10 -S'\xaf\x8c\xe1\xaf\xeb\xe4\x8f?' -p8668 -tp8669 -Rp8670 -ag6 -(g10 -S'\xa4\xec\x06\xf2\xe3\xb6\x8c?' -p8671 -tp8672 -Rp8673 -ag6 -(g10 -S'0\x9a\xa45sL\x8b?' -p8674 -tp8675 -Rp8676 -ag6 -(g10 -S'\x0b\xb7\xcf=\xbe\xd2\x8e?' -p8677 -tp8678 -Rp8679 -ag6 -(g10 -S'|\x8c~\xc4\xd9\xc5\x90?' -p8680 -tp8681 -Rp8682 -ag6 -(g10 -S'\xf2%he\x0e\xfb\x8d?' -p8683 -tp8684 -Rp8685 -ag6 -(g10 -S',F\xfd\x94\xd1\xcb\x91?' -p8686 -tp8687 -Rp8688 -ag6 -(g10 -S'\xde\x8ab\xa5k=\x93?' -p8689 -tp8690 -Rp8691 -ag6 -(g10 -S'J\xca\x89\x7f=\xf6\x91?' -p8692 -tp8693 -Rp8694 -ag6 -(g10 -S'os\xd8\xb9v\xb7\x95?' -p8695 -tp8696 -Rp8697 -asS"Conjugate gradient\nw f'" -p8698 -(lp8699 -g6 -(g10 -S'\xd7Q\xfdga\xab\xb6?' -p8700 -tp8701 -Rp8702 -ag6 -(g10 -S'\xc9\x06\x93\xb2Bx\xbe?' -p8703 -tp8704 -Rp8705 -ag6 -(g10 -S'T+*!R\x0f\xbb?' -p8706 -tp8707 -Rp8708 -ag6 -(g10 -S'\x8es\x9cK\x1e\xd8\xb3?' -p8709 -tp8710 -Rp8711 -ag6 -(g10 -S'\xba\xd4y\xdb\x16a\xbc?' -p8712 -tp8713 -Rp8714 -ag6 -(g10 -S'0\xa1x\xb0A\x07\xbb?' -p8715 -tp8716 -Rp8717 -ag6 -(g10 -S'\xcdlm5\xfe\xc0\xbe?' -p8718 -tp8719 -Rp8720 -ag6 -(g10 -S'\x15\xde\x98\xfa\x16\x0f\xc2?' -p8721 -tp8722 -Rp8723 -ag6 -(g10 -S'n\x96\x1d&3|\xb2?' -p8724 -tp8725 -Rp8726 -ag6 -(g10 -S'\x0c\x1cC\xe7\xba\x0b\xc0?' -p8727 -tp8728 -Rp8729 -asS"BFGS\nw f'" -p8730 -(lp8731 -g6 -(g10 -S']*\xebO\x86\xd2\x85?' -p8732 -tp8733 -Rp8734 -ag6 -(g10 -S'8)y\xe4\xe4\x01\x86?' -p8735 -tp8736 -Rp8737 -ag6 -(g10 -S'\xaa\x96\xbf\xc26i\x83?' -p8738 -tp8739 -Rp8740 -ag6 -(g10 -S'd\xaeWx\xcc0\x84?' -p8741 -tp8742 -Rp8743 -ag6 -(g10 -S')\xae\xe02\xea\xec\x89?' -p8744 -tp8745 -Rp8746 -ag6 -(g10 -S'6\xd9h\x18\xb6\xaf\x89?' -p8747 -tp8748 -Rp8749 -ag6 -(g10 -S'\xf5!\x94\xbe\xf7\xa5\x8b?' -p8750 -tp8751 -Rp8752 -ag6 -(g10 -S'\x04\xf8\x90\x18\x8dO\x8b?' -p8753 -tp8754 -Rp8755 -ag6 -(g10 -S'Bx\xc5&\x1e\x15\x8e?' -p8756 -tp8757 -Rp8758 -ag6 -(g10 -S'\x9c\xef\xf8wyS\x8c?' -p8759 -tp8760 -Rp8761 -asss. diff --git a/advanced/mathematical_optimization/examples/helper/compare_optimizers_py3.pkl b/advanced/mathematical_optimization/examples/helper/compare_optimizers_py3.pkl deleted file mode 100644 index bc1f84fbc..000000000 Binary files a/advanced/mathematical_optimization/examples/helper/compare_optimizers_py3.pkl and /dev/null differ diff --git a/advanced/mathematical_optimization/examples/plot_exercise_flat_minimum.py b/advanced/mathematical_optimization/examples/plot_exercise_flat_minimum.py deleted file mode 100644 index a8cc42199..000000000 --- a/advanced/mathematical_optimization/examples/plot_exercise_flat_minimum.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -Finding a minimum in a flat neighborhood -========================================= - -An exercise of finding minimum. This exercise is hard because the -function is very flat around the minimum (all its derivatives are zero). -Thus gradient information is unreliable. - -The function admits a minimum in [0, 0]. The challenge is to get within -1e-7 of this minimum, starting at x0 = [1, 1]. - -The solution that we adopt here is to give up on using gradient or -information based on local differences, and to rely on the Powell -algorithm. With 162 function evaluations, we get to 1e-8 of the -solution. -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - - -def f(x): - return np.exp(-1 / (0.01 * x[0] ** 2 + x[1] ** 2)) - - -# A well-conditionned version of f: -def g(x): - return f([10 * x[0], x[1]]) - - -# The gradient of g. We won't use it here for the optimization. -def g_prime(x): - r = np.sqrt(x[0] ** 2 + x[1] ** 2) - return 2 / r**3 * g(x) * x / r - - -result = sp.optimize.minimize(g, [1, 1], method="Powell", tol=1e-10) -x_min = result.x - -############################################################################### -# Some pretty plotting - -plt.figure(0) -plt.clf() -t = np.linspace(-1.1, 1.1, 100) -plt.plot(t, f([0, t])) - -plt.figure(1) -plt.clf() -X, Y = np.mgrid[-1.5:1.5:100j, -1.1:1.1:100j] # type: ignore[misc] -plt.imshow(f([X, Y]).T, cmap="gray_r", extent=(-1.5, 1.5, -1.1, 1.1), origin="lower") -plt.contour(X, Y, f([X, Y]), cmap="gnuplot") - -# Plot the gradient -dX, dY = g_prime([0.1 * X[::5, ::5], Y[::5, ::5]]) -# Adjust for our preconditioning -dX *= 0.1 -plt.quiver(X[::5, ::5], Y[::5, ::5], dX, dY, color=".5") - -# Plot our solution -plt.plot(x_min[0], x_min[1], "r+", markersize=15) - -plt.show() diff --git a/advanced/mathematical_optimization/examples/plot_exercise_ill_conditioned.py b/advanced/mathematical_optimization/examples/plot_exercise_ill_conditioned.py deleted file mode 100644 index c2a557ddb..000000000 --- a/advanced/mathematical_optimization/examples/plot_exercise_ill_conditioned.py +++ /dev/null @@ -1,89 +0,0 @@ -""" -Alternating optimization -========================= - -The challenge here is that Hessian of the problem is a very -ill-conditioned matrix. This can easily be seen, as the Hessian of the -first term in simply 2 * K.T @ K. Thus the conditioning of the -problem can be judged from looking at the conditioning of K. -""" - -import time - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) - -K = rng.normal(size=(100, 100)) - - -def f(x): - return np.sum((K @ (x - 1)) ** 2) + np.sum(x**2) ** 2 - - -def f_prime(x): - return 2 * K.T @ K @ (x - 1) + 4 * np.sum(x**2) * x - - -def hessian(x): - H = 2 * K.T @ K + 4 * 2 * x * x[:, np.newaxis] - return H + 4 * np.eye(H.shape[0]) * np.sum(x**2) - - -############################################################################### -# Some pretty plotting - -plt.figure(1) -plt.clf() -Z = X, Y = np.mgrid[-1.5:1.5:100j, -1.1:1.1:100j] # type: ignore[misc] -# Complete in the additional dimensions with zeros -Z = np.reshape(Z, (2, -1)).copy() -Z.resize((100, Z.shape[-1])) -Z = np.apply_along_axis(f, 0, Z) -Z = np.reshape(Z, X.shape) -plt.imshow(Z.T, cmap="gray_r", extent=(-1.5, 1.5, -1.1, 1.1), origin="lower") -plt.contour(X, Y, Z, cmap="gnuplot") - -# A reference but slow solution: -t0 = time.time() -x_ref = sp.optimize.minimize(f, K[0], method="Powell").x -print(f" Powell: time {time.time() - t0:.2f}s") -f_ref = f(x_ref) - -# Compare different approaches -t0 = time.time() -x_bfgs = sp.optimize.minimize(f, K[0], method="BFGS").x -print( - f" BFGS: time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_bfgs - x_ref) ** 2)):.2f}, f error {f(x_bfgs) - f_ref:.2f}" -) - -t0 = time.time() -x_l_bfgs = sp.optimize.minimize(f, K[0], method="L-BFGS-B").x -print( - f" L-BFGS: time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_l_bfgs - x_ref) ** 2)):.2f}, f error {f(x_l_bfgs) - f_ref:.2f}" -) - - -t0 = time.time() -x_bfgs = sp.optimize.minimize(f, K[0], jac=f_prime, method="BFGS").x -print( - f" BFGS w f': time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_bfgs - x_ref) ** 2)):.2f}, f error {f(x_bfgs) - f_ref:.2f}" -) - -t0 = time.time() -x_l_bfgs = sp.optimize.minimize(f, K[0], jac=f_prime, method="L-BFGS-B").x -print( - f"L-BFGS w f': time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_l_bfgs - x_ref) ** 2)):.2f}, f error {f(x_l_bfgs) - f_ref:.2f}" -) - -t0 = time.time() -x_newton = sp.optimize.minimize( - f, K[0], jac=f_prime, hess=hessian, method="Newton-CG" -).x -print( - f" Newton: time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_newton - x_ref) ** 2)):.2f}, f error {f(x_newton) - f_ref:.2f}" -) - -plt.show() diff --git a/advanced/mathematical_optimization/examples/plot_gradient_descent.py b/advanced/mathematical_optimization/examples/plot_gradient_descent.py index edc08683c..03a02a96f 100644 --- a/advanced/mathematical_optimization/examples/plot_gradient_descent.py +++ b/advanced/mathematical_optimization/examples/plot_gradient_descent.py @@ -10,11 +10,10 @@ import matplotlib.pyplot as plt import scipy as sp -import collections import sys import os -sys.path.append(os.path.abspath("helper")) +sys.path.append(os.path.abspath("../helper")) from cost_functions import ( mk_quad, mk_gauss, diff --git a/advanced/mathematical_optimization/examples/helper/compare_optimizers.py b/advanced/mathematical_optimization/helper/compare_optimizers.py similarity index 53% rename from advanced/mathematical_optimization/examples/helper/compare_optimizers.py rename to advanced/mathematical_optimization/helper/compare_optimizers.py index 48140a4a6..4753a7ae6 100644 --- a/advanced/mathematical_optimization/examples/helper/compare_optimizers.py +++ b/advanced/mathematical_optimization/helper/compare_optimizers.py @@ -7,7 +7,6 @@ import functools import pickle -import sys import numpy as np import scipy as sp @@ -107,91 +106,85 @@ def mk_costs(ndim=2): # Compare methods without gradient mem = Memory(".", verbose=3) -if True: - gradient_less_benchs = {} - - for ndim in (2, 8, 32, 128): - this_dim_benchs = {} - costs, starting_points = mk_costs(ndim) - for cost_name, cost_function in costs.items(): - # We don't need the derivative or the hessian - cost_function = cost_function[0] - function_bench = {} - for x0 in starting_points: - all_bench = [] - # Bench gradient-less - for method_name, method in methods.items(): - if method_name in ("Newton", "L-BFGS w f'"): - continue - this_bench = function_bench.get(method_name, []) - this_costs = mem.cache(bencher)(cost_name, ndim, method_name, x0) - if np.all(this_costs > 0.25 * ndim**2 * 1e-9): - convergence = 2 * len(this_costs) - else: - convergence = ( - np.where(np.diff(this_costs > 0.25 * ndim**2 * 1e-9))[ - 0 - ].max() - + 1 - ) - this_bench.append(convergence) - all_bench.append(convergence) - function_bench[method_name] = this_bench - - # Bench with gradients - for method_name, method in methods.items(): - if method_name in ("Newton", "Powell", "Nelder-mead", "L-BFGS"): - continue - this_method_name = method_name - if method_name.endswith(" w f'"): - this_method_name = method_name[:-4] - this_method_name = this_method_name + "\nw f'" - this_bench = function_bench.get(this_method_name, []) - this_costs, this_counts = mem.cache(bencher_gradient)( - cost_name, ndim, method_name, x0 - ) - if np.all(this_costs > 0.25 * ndim**2 * 1e-9): - convergence = 2 * this_counts.max() - else: - convergence = ( - np.where(np.diff(this_costs > 0.25 * ndim**2 * 1e-9))[ - 0 - ].max() - + 1 - ) - convergence = this_counts[convergence] - this_bench.append(convergence) - all_bench.append(convergence) - function_bench[this_method_name] = this_bench - - # Bench Newton with Hessian - method_name = "Newton" +gradient_less_benchs = {} + +for ndim in (2, 8, 32, 128): + this_dim_benchs = {} + costs, starting_points = mk_costs(ndim) + for cost_name, cost_function in costs.items(): + # We don't need the derivative or the hessian + cost_function = cost_function[0] + function_bench = {} # type: ignore[var-annotated] + for x0 in starting_points: + all_bench = [] + # Bench gradient-less + for method_name, method in methods.items(): + if method_name in ("Newton", "L-BFGS w f'"): + continue this_bench = function_bench.get(method_name, []) - this_costs, this_counts = mem.cache(bencher_hessian)( + this_costs = mem.cache(bencher)(cost_name, ndim, method_name, x0) + if np.all(this_costs > 0.25 * ndim**2 * 1e-9): + convergence = 2 * len(this_costs) + else: + convergence = ( + np.where(np.diff(this_costs > 0.25 * ndim**2 * 1e-9))[0].max() + + 1 + ) + this_bench.append(convergence) + all_bench.append(convergence) + function_bench[method_name] = this_bench + + # Bench with gradients + for method_name, method in methods.items(): + if method_name in ("Newton", "Powell", "Nelder-mead", "L-BFGS"): + continue + this_method_name = method_name + if method_name.endswith(" w f'"): + this_method_name = method_name[:-4] + this_method_name = this_method_name + "\nw f'" + this_bench = function_bench.get(this_method_name, []) + this_costs, this_counts = mem.cache(bencher_gradient)( cost_name, ndim, method_name, x0 ) if np.all(this_costs > 0.25 * ndim**2 * 1e-9): - convergence = 2 * len(this_costs) + convergence = 2 * this_counts.max() else: convergence = ( np.where(np.diff(this_costs > 0.25 * ndim**2 * 1e-9))[0].max() + 1 ) + convergence = this_counts[convergence] this_bench.append(convergence) all_bench.append(convergence) - function_bench[method_name + "\nw Hessian "] = this_bench - - # Normalize across methods - x0_mean = np.mean(all_bench) - for _, values in function_bench.items(): - values[-1] /= x0_mean - this_dim_benchs[cost_name] = function_bench - gradient_less_benchs[ndim] = this_dim_benchs - print(80 * "_") - print(f"Done cost {cost_name}, ndim {ndim}") - print(80 * "_") - - pickle.dump( - gradient_less_benchs, - open(f"compare_optimizers_py{sys.version_info[0]}.pkl", "wb"), - ) + function_bench[this_method_name] = this_bench + + # Bench Newton with Hessian + method_name = "Newton" + this_bench = function_bench.get(method_name, []) + this_costs, this_counts = mem.cache(bencher_hessian)( + cost_name, ndim, method_name, x0 + ) + if np.all(this_costs > 0.25 * ndim**2 * 1e-9): + convergence = 2 * len(this_costs) + else: + convergence = ( + np.where(np.diff(this_costs > 0.25 * ndim**2 * 1e-9))[0].max() + 1 + ) + this_bench.append(convergence) + all_bench.append(convergence) + function_bench[method_name + "\nw Hessian "] = this_bench + + # Normalize across methods + x0_mean = np.mean(all_bench) + for _, values in function_bench.items(): + values[-1] /= x0_mean + this_dim_benchs[cost_name] = function_bench + gradient_less_benchs[ndim] = this_dim_benchs + print(80 * "_") + print(f"Done cost {cost_name}, ndim {ndim}") + print(80 * "_") + +pickle.dump( + gradient_less_benchs, + open("compare_optimizers_py3.pkl", "wb"), +) diff --git a/advanced/mathematical_optimization/helper/compare_optimizers_py3.pkl b/advanced/mathematical_optimization/helper/compare_optimizers_py3.pkl new file mode 100644 index 000000000..f566ef095 Binary files /dev/null and b/advanced/mathematical_optimization/helper/compare_optimizers_py3.pkl differ diff --git a/advanced/mathematical_optimization/examples/helper/cost_functions.py b/advanced/mathematical_optimization/helper/cost_functions.py similarity index 100% rename from advanced/mathematical_optimization/examples/helper/cost_functions.py rename to advanced/mathematical_optimization/helper/cost_functions.py diff --git a/advanced/mathematical_optimization/index.md b/advanced/mathematical_optimization/index.md new file mode 100644 index 000000000..6081cef86 --- /dev/null +++ b/advanced/mathematical_optimization/index.md @@ -0,0 +1,1253 @@ +--- +jupytext: + notebook_metadata_filter: all,-language_info + split_at_heading: true + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(mathematical-optimization)= + ++++ + +# Mathematical optimization: finding minima of functions + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import matplotlib.pyplot as plt +import scipy as sp +``` + +**Authors**: _Gaël Varoquaux_ + +[Mathematical optimization](https://en.wikipedia.org/wiki/Mathematical_optimization) deals with the +problem of finding numerically minimums (or maximums or zeros) of +a function. In this context, the function is called _cost function_, or +_objective function_, or _energy_. + +Here, we are interested in using {mod}`scipy.optimize` for black-box +optimization: we do not rely on the mathematical expression of the +function that we are optimizing. Note that this expression can often be +used for more efficient, non black-box, optimization. + +:::{admonition} Prerequisites + +- {ref}`NumPy ` +- {ref}`SciPy ` +- {ref}`Matplotlib ` + +::: + +:::{admonition} See also + +**References** + +Mathematical optimization is very ... mathematical. If you want +performance, it really pays to read the books: + +- [Convex Optimization](https://web.stanford.edu/~boyd/cvxbook/) + by Boyd and Vandenberghe (pdf available free online). +- [Numerical + Optimization](https://users.eecs.northwestern.edu/~nocedal/book/num-opt.html) + by Nocedal and Wright. Detailed reference on gradient descent methods. +- [Practical Methods of + Optimization](https://www.amazon.com/gp/product/0471494631/ref=ox_sc_act_title_1?ie=UTF8&smid=ATVPDKIKX0DER) + by Fletcher. Good at hand-waving explanations. + +::: + + + ++++ + +## Knowing your problem + +Not all optimization problems are equal. Knowing your problem enables you +to choose the right tool. + +:::{admonition} Dimensionality of the problem +The scale of an optimization problem is pretty much set by the +_dimensionality of the problem_, i.e. the number of scalar variables +on which the search is performed. + +::: + ++++ + +### Convex versus non-convex optimization + ++++ + +::: {list-table} + +- - ::: {glue} convex_func + :doc: optimization_examples.md + ::: + - ::: {glue} non_convex_func + :doc: optimization_examples.md + ::: +- - **A convex function**: + + - $f$ is above all its tangents. + - Equivalently, for two points $A, B$, $f(C)$ lies below the segment + $[f(A), f(B])]$, if $A < C < B$. + + - **A non-convex function** + +::: + +::: {admonition} Plot code +:class: dropdown + +See [convex, non-convex function plots](convex-function-eg). + +::: + +**Optimizing convex functions is easy. Optimizing non-convex functions can +be very hard.** + +:::{note} +It can be proven that for a convex function a local minimum is +also a global minimum. Then, in some sense, the minimum is unique. +::: + ++++ + +### Smooth and non-smooth problems + +::: {list-table} + +- - ::: {glue} smooth_func + :doc: optimization_examples.md + ::: + - ::: {glue} non_smooth_func + :doc: optimization_examples.md + ::: + +- - **A smooth function**: + + The gradient is defined everywhere, and is a continuous function + + - **A non-smooth function** + +::: + +::: {admonition} Plot code +:class: dropdown + +See [smooth, non-smooth function plots](smooth-function-eg). + +::: + ++++ + +**Optimizing smooth functions is easier** +(true in the context of _black-box_ optimization, otherwise +[Linear Programming](https://en.wikipedia.org/wiki/Linear_programming) +is an example of methods which deal very efficiently with +piece-wise linear functions). + ++++ + +### Noisy versus exact cost functions + +::: {list-table} + +- - Noisy (blue) and non-noisy (orange) functions + - ::: {glue} noisy_non_noisy + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [noisy, non-noisy function plots](noisy-non-noisy-eg). + +::: + +:::{admonition} Noisy gradients +Many optimization methods rely on gradients of the objective function. +If the gradient function is not given, they are computed numerically, +which induces errors. In such situation, even if the objective +function is not noisy, a gradient-based optimization may be a noisy +optimization. +::: + ++++ + +### Constraints + +::: {list-table} + +- - Optimizations under constraints + + Here: + + $-1 < x_1 < 1$ + + $-1 < x_2 < 1$ + + - ::: {glue} constraints_no_path + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [constraint plots](constraints-eg). + +::: + ++++ + +## A review of the different optimizers + ++++ + +### Getting started: 1D optimization + +Let's get started by finding the minimum of the scalar function +$f(x)=\exp[(x-0.5)^2]$. {func}`scipy.optimize.minimize_scalar` uses +Brent's method to find the minimum of a function: + +```{code-cell} +def f(x): + return -np.exp(-(x - 0.5)**2) + +result = sp.optimize.minimize_scalar(f) +result.success # check if solver was successful +``` + +```{code-cell} +x_min = result.x +x_min +``` + +```{code-cell} +x_min - 0.5 +``` + +::: {list-table} **Brent's method on a quadratic function**: it converges in 3 iterations, as the quadratic approximation is then exact. + +- - ::: {glue} brent_epsilon_0_func + :doc: optimization_examples.md + ::: + - ::: {glue} brent_epsilon_0_err + :doc: optimization_examples.md + ::: + +::: + +::: {list-table} **Brent's method on a non-convex function**: note that the fact that the optimizer avoided the local minimum is a matter of luck. + +- - ::: {glue} brent_epsilon_1_func + :doc: optimization_examples.md + ::: + - ::: {glue} brent_epsilon_1_err + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [Brent's method figures](brents-method-eg). + +::: + +:::{note} + +You can use different solvers using the parameter `method`. + +::: + +:::{note} + +{func}`scipy.optimize.minimize_scalar` can also be used for optimization +constrained to an interval using the parameter `bounds`. + +::: + ++++ + +### Gradient based methods + ++++ + +#### Some intuitions about gradient descent + +Here we focus on **intuitions**, not code. Code will follow. + +[Gradient descent](https://en.wikipedia.org/wiki/Gradient_descent) +basically consists in taking small steps in the direction of the +gradient, that is the direction of the _steepest descent_. + ++++ + +::: {list-table} **Fixed step gradient descent** + +- - **A well-conditioned quadratic function.** + + - ::: {glue} gradient_descent_q_07_gd_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_q_07_gd_err + :doc: optimization_examples.md + ::: + +- - **An ill-conditioned quadratic function.** + + The core problem of gradient-methods on ill-conditioned problems is + that the gradient tends not to point in the direction of the + minimum. + + - ::: {glue} gradient_descent_q_002_gd_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_q_002_gd_err + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [gradient descent plots](gradient-descent-eg). + +::: + +We can see that very anisotropic ([ill-conditioned](https://en.wikipedia.org/wiki/Condition_number)) functions are harder +to optimize. + +:::{admonition} Take home message: conditioning number and preconditioning +If you know natural scaling for your variables, prescale them so that +they behave similarly. This is related to [preconditioning](https://en.wikipedia.org/wiki/Preconditioner). +::: + +Also, it clearly can be advantageous to take bigger steps. This +is done in gradient descent code using a +[line search](https://en.wikipedia.org/wiki/Line_search). + +::: {list-table} **Adaptive step gradient descent** + +- - A well-conditioned quadratic function. + + - ::: {glue} gradient_descent_q_07_gda_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_q_07_gda_err + :doc: optimization_examples.md + ::: + +- - An ill-conditioned quadratic function. + + - ::: {glue} gradient_descent_q_002_gda_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_q_002_gda_err + :doc: optimization_examples.md + ::: + +- - An ill-conditioned non-quadratic function. + + - ::: {glue} gradient_descent_g_002_gda_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_g_002_gda_err + :doc: optimization_examples.md + ::: + +- - An ill-conditioned very non-quadratic function. + + - ::: {glue} gradient_descent_rb_gda_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_rb_gda_err + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [gradient descent plots](gradient-descent-eg). + +::: + +The more a function looks like a quadratic function (elliptic +iso-curves), the easier it is to optimize. + ++++ + +#### Conjugate gradient descent + +The gradient descent algorithms above are toys not to be used on real +problems. + +As can be seen from the above experiments, one of the problems of the +simple gradient descent algorithms, is that it tends to oscillate across +a valley, each time following the direction of the gradient, that makes +it cross the valley. The conjugate gradient solves this problem by adding +a _friction_ term: each step depends on the two last values of the +gradient and sharp turns are reduced. + +::: {list-table} **Conjugate gradient descent** + +- - An ill-conditioned non-quadratic function. + + - ::: {glue} gradient_descent_g_002_cg_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_g_002_cg_err + :doc: optimization_examples.md + ::: + +- - An ill-conditioned very non-quadratic function. + + - ::: {glue} gradient_descent_rb_cg_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_rb_cg_err + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [gradient descent plots](gradient-descent-eg). + +::: + +SciPy provides {func}`scipy.optimize.minimize` to find the minimum of scalar +functions of one or more variables. The simple conjugate gradient method can +be used by setting the parameter `method` to CG + +```{code-cell} +def f(x): # The rosenbrock function + return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 + +sp.optimize.minimize(f, [2, -1], method="CG") +``` + +Gradient methods need the Jacobian (gradient) of the function. They can +compute it numerically, but will perform better if you can pass them the +gradient: + +```{code-cell} +def jacobian(x): + return np.array((-2*.5*(1 - x[0]) - 4*x[0]*(x[1] - x[0]**2), 2*(x[1] - x[0]**2))) + +sp.optimize.minimize(f, [2, 1], method="CG", jac=jacobian) +``` + +Note that the function has only been evaluated 27 times, compared to 108 +without the gradient. + ++++ + +### Newton and quasi-newton methods + ++++ + +#### Newton methods: using the Hessian (2nd differential) + +[Newton methods](https://en.wikipedia.org/wiki/Newton%27s_method_in_optimization) use a +local quadratic approximation to compute the jump direction. For this +purpose, they rely on the 2 first derivative of the function: the +_gradient_ and the [Hessian](https://en.wikipedia.org/wiki/Hessian_matrix). + +::: {list-table} + +- - **An ill-conditioned quadratic function:** + + Note that, as the quadratic approximation is exact, the Newton + method is blazing fast + + - ::: {glue} gradient_descent_q_002_ncg_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_q_002_ncg_err + :doc: optimization_examples.md + ::: + +- - **An ill-conditioned non-quadratic function:** + + Here we are optimizing a Gaussian, which is always below its + quadratic approximation. As a result, the Newton method overshoots + and leads to oscillations. + + - ::: {glue} gradient_descent_g_002_ncg_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_g_002_ncg_err + :doc: optimization_examples.md + ::: + +- - **An ill-conditioned very non-quadratic function:** + + - ::: {glue} gradient_descent_rb_ncg_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_rb_ncg_err + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [gradient descent plots](gradient-descent-eg). + +::: + +In SciPy, you can use the Newton method by setting `method` to Newton-CG in +{func}`scipy.optimize.minimize`. Here, CG refers to the fact that an internal +inversion of the Hessian is performed by conjugate gradient. + +```{code-cell} +def f(x): # The rosenbrock function + return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 + +def jacobian(x): + return np.array((-2*.5*(1 - x[0]) - 4*x[0]*(x[1] - x[0]**2), 2*(x[1] - x[0]**2))) + +sp.optimize.minimize(f, [2,-1], method="Newton-CG", jac=jacobian) +``` + +Note that compared to a conjugate gradient (above), Newton's method has +required less function evaluations, but more gradient evaluations, as it +uses it to approximate the Hessian. Let's compute the Hessian and pass it +to the algorithm: + +```{code-cell} +def hessian(x): # Computed with sympy + return np.array(((1 - 4*x[1] + 12*x[0]**2, -4*x[0]), (-4*x[0], 2))) + +sp.optimize.minimize(f, [2,-1], method="Newton-CG", jac=jacobian, hess=hessian) +``` + +:::{note} + +At very high-dimension, the inversion of the Hessian can be costly +and unstable (large scale > 250). + +::: + +:::{note} +Newton optimizers should not to be confused with Newton's root finding +method, based on the same principles, {func}`scipy.optimize.newton`. +::: + +(quasi-newton)= + ++++ + +#### Quasi-Newton methods: approximating the Hessian on the fly + +**BFGS**: BFGS (Broyden-Fletcher-Goldfarb-Shanno algorithm) refines at +each step an approximation of the Hessian. + +::: {list-table} + +- - **An ill-conditioned quadratic function:** + + On a exactly quadratic function, BFGS is not as fast as Newton's + method, but still very fast. + + - ::: {glue} gradient_descent_q_002_bgfs_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_q_002_bgfs_err + :doc: optimization_examples.md + ::: + +- - **An ill-conditioned non-quadratic function:** + + Here BFGS does better than Newton, as its empirical estimate of the + curvature is better than that given by the Hessian. + + - ::: {glue} gradient_descent_g_002_bgfs_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_g_002_bgfs_err + :doc: optimization_examples.md + ::: + +- - **An ill-conditioned very non-quadratic function:** + + - ::: {glue} gradient_descent_rb_bgfs_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_rb_bgfs_err + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [gradient descent plots](gradient-descent-eg). + +::: + +```{code-cell} +def f(x): # The rosenbrock function + return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 + +def jacobian(x): + return np.array((-2*.5*(1 - x[0]) - 4*x[0]*(x[1] - x[0]**2), 2*(x[1] - x[0]**2))) + +sp.optimize.minimize(f, [2, -1], method="BFGS", jac=jacobian) +``` + +**L-BFGS:** Limited-memory BFGS sits between BFGS and conjugate gradient: in +very high dimensions (> 250) the Hessian matrix is too costly to compute and +invert. L-BFGS keeps a low-rank version. In addition, box bounds are also +supported by L-BFGS-B: + +```{code-cell} +def f(x): # The rosenbrock function + return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 + +def jacobian(x): + return np.array((-2*.5*(1 - x[0]) - 4*x[0]*(x[1] - x[0]**2), 2*(x[1] - x[0]**2))) + +sp.optimize.minimize(f, [2, 2], method="L-BFGS-B", jac=jacobian) +``` + +### Gradient-less methods + ++++ + +#### A shooting method: the Powell algorithm + +Almost a gradient approach: + +::: {list-table} + +- - **An ill-conditioned quadratic function:** + + Powell's method isn't too sensitive to local ill-conditionning in + low dimensions + + - ::: {glue} gradient_descent_q_002_pow_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_q_002_pow_err + :doc: optimization_examples.md + ::: + +- - **An ill-conditioned very non-quadratic function:** + + - ::: {glue} gradient_descent_rb_pow_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_rb_pow_err + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [gradient descent plots](gradient-descent-eg). + +::: + ++++ + +#### Simplex method: the Nelder-Mead + +The Nelder-Mead algorithms are a generalization of dichotomy approaches to +high-dimensional spaces. The algorithm works by refining +a [simplex](https://en.wikipedia.org/wiki/Simplex), the generalization of +intervals and triangles to high-dimensional spaces, to bracket the minimum. + +**Strong points**: it is robust to noise, as it does not rely on +computing gradients. Thus it can work on functions that are not locally +smooth such as experimental data points, as long as they display a +large-scale bell-shape behavior. However it is slower than gradient-based +methods on smooth, non-noisy functions. + +::: {list-table} + +- - **An ill-conditioned non-quadratic function:** + + - ::: {glue} gradient_descent_g_002_nm_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_g_002_nm_err + :doc: optimization_examples.md + ::: + +- - **An ill-conditioned very non-quadratic function:** + + - ::: {glue} gradient_descent_rb_nm_func + :doc: optimization_examples.md + ::: + - ::: {glue} gradient_descent_rb_nm_err + :doc: optimization_examples.md + ::: + +::: + +::: {admonition} Plot code +:class: dropdown + +See [gradient descent plots](gradient-descent-eg). + +::: + +Using the Nelder-Mead solver in {func}`scipy.optimize.minimize`: + +```{code-cell} +def f(x): # The rosenbrock function + return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 + +sp.optimize.minimize(f, [2, -1], method="Nelder-Mead") +``` + +### Global optimizers + +If your problem does not admit a unique local minimum (which can be hard +to test unless the function is convex), and you do not have prior +information to initialize the optimization close to the solution, you +may need a global optimizer. + ++++ + +#### Brute force: a grid search + +{func}`scipy.optimize.brute` evaluates the function on a given grid of +parameters and returns the parameters corresponding to the minimum +value. The parameters are specified with ranges given to +{obj}`numpy.mgrid`. By default, 20 steps are taken in each direction: + +```{code-cell} +def f(x): # The rosenbrock function + return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 + +sp.optimize.brute(f, ((-1, 2), (-1, 2))) +``` + +## Practical guide to optimization with SciPy + ++++ + +### Choosing a method + +All methods are exposed as the `method` argument of +{func}`scipy.optimize.minimize`. + +::: {glue} compare_optimizers +:doc: optimization_examples.md +::: + +::: {admonition} Code for plot above +:class: dropdown + +See [compare optimizers](compare-optimizers-eg). + +::: + +::: {list-table} **Rules of thumb for choosing a method** + +- - Without knowledge of the gradient + + - - In general, prefer **BFGS** or **L-BFGS**, even if you have to + approximate numerically gradients. These are also the default if you + omit the parameter `method` - depending if the problem has constraints + or bounds. + - On well-conditioned problems, **Powell** and **Nelder-Mead**, both + gradient-free methods, work well in high dimension, but they collapse + for ill-conditioned problems. + +- - With knowledge of the gradient + + - - **BFGS** or **L-BFGS**. + - Computational overhead of BFGS is larger than that L-BFGS, itself + larger than that of conjugate gradient. On the other side, BFGS usually + needs less function evaluations than CG. Thus conjugate gradient method + is better than BFGS at optimizing computationally cheap functions. + +- - With the Hessian + + - - If you can compute the Hessian, prefer the Newton method (**Newton-CG** + or **TCG**). + +- - If you have noisy measurements + + - - Use **Nelder-Mead** or **Powell**. + +::: + ++++ + +### Making your optimizer faster + +- Choose the right method (see above), do compute analytically the + gradient and Hessian, if you can. +- Use [preconditionning](https://en.wikipedia.org/wiki/Preconditioner) + when possible. +- Choose your initialization points wisely. For instance, if you are + running many similar optimizations, warm-restart one with the results of + another. +- Relax the tolerance if you don't need precision using the parameter `tol`. + ++++ + +### Computing gradients + +Computing gradients, and even more Hessians, is very tedious but worth +the effort. Symbolic computation with {ref}`Sympy ` may come in +handy. + +**Warning** + +A _very_ common source of optimization not converging well is human +error in the computation of the gradient. You can use +{func}`scipy.optimize.check_grad` to check that your gradient is +correct. It returns the norm of the different between the gradient +given, and a gradient computed numerically: + +```{code-cell} +sp.optimize.check_grad(f, jacobian, [2, -1]) +``` + +See also {func}`scipy.optimize.approx_fprime` to find your errors. + ++++ + +### Synthetic exercises + +**A simple (?) quadratic function** + +::: {exercise-start} +:label: mo-simple-quad-ex +:class: dropdown +::: + +Optimize the following function, using K[0] as a starting point: + +```{code-cell} +rng = np.random.default_rng(27446968) +K = rng.normal(size=(100, 100)) + +def f(x): + return np.sum((K @ (x - 1))**2) + np.sum(x**2)**2 +``` + +Time your approach. Find the fastest approach. Why is BFGS not +working well? + +::: {exercise-end} +::: + +::: {solution-start} mo-simple-quad-ex +:class: dropdown +::: + +**Alternating optimization** + +The challenge here is that Hessian of the problem is a very ill-conditioned +matrix. This can easily be seen, as the Hessian of the first term in simply +`2 * K.T @ K`. Thus the conditioning of the problem can be judged from looking +at the conditioning of `K`. + +```{code-cell} +import time + +rng = np.random.default_rng(27446968) + +K = rng.normal(size=(100, 100)) + + +def f(x): + return np.sum((K @ (x - 1)) ** 2) + np.sum(x**2) ** 2 + + +def f_prime(x): + return 2 * K.T @ K @ (x - 1) + 4 * np.sum(x**2) * x + + +def hessian(x): + H = 2 * K.T @ K + 4 * 2 * x * x[:, np.newaxis] + return H + 4 * np.eye(H.shape[0]) * np.sum(x**2) +``` + +Some pretty plotting + +```{code-cell} +plt.figure() +Z = X, Y = np.mgrid[-1.5:1.5:100j, -1.1:1.1:100j] # type: ignore[misc] +# Complete in the additional dimensions with zeros +Z = np.reshape(Z, (2, -1)).copy() +Z.resize((100, Z.shape[-1])) +Z = np.apply_along_axis(f, 0, Z) +Z = np.reshape(Z, X.shape) +plt.imshow(Z.T, cmap="gray_r", extent=(-1.5, 1.5, -1.1, 1.1), origin="lower") +plt.contour(X, Y, Z, cmap="gnuplot") +``` + +A reference but slow solution: + +```{code-cell} +t0 = time.time() +x_ref = sp.optimize.minimize(f, K[0], method="Powell").x +print(f" Powell: time {time.time() - t0:.2f}s") +f_ref = f(x_ref) +``` + +Compare different approaches + +```{code-cell} +t0 = time.time() +x_bfgs = sp.optimize.minimize(f, K[0], method="BFGS").x +print( + f" BFGS: time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_bfgs - x_ref) ** 2)):.2f}, f error {f(x_bfgs) - f_ref:.2f}" +) + +t0 = time.time() +x_l_bfgs = sp.optimize.minimize(f, K[0], method="L-BFGS-B").x +print( + f" L-BFGS: time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_l_bfgs - x_ref) ** 2)):.2f}, f error {f(x_l_bfgs) - f_ref:.2f}" +) +``` + +```{code-cell} +t0 = time.time() +x_bfgs = sp.optimize.minimize(f, K[0], jac=f_prime, method="BFGS").x +print( + f" BFGS w f': time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_bfgs - x_ref) ** 2)):.2f}, f error {f(x_bfgs) - f_ref:.2f}" +) + +t0 = time.time() +x_l_bfgs = sp.optimize.minimize(f, K[0], jac=f_prime, method="L-BFGS-B").x +print( + f"L-BFGS w f': time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_l_bfgs - x_ref) ** 2)):.2f}, f error {f(x_l_bfgs) - f_ref:.2f}" +) +``` + +```{code-cell} +t0 = time.time() +x_newton = sp.optimize.minimize( + f, K[0], jac=f_prime, hess=hessian, method="Newton-CG" +).x +print( + f" Newton: time {time.time() - t0:.2f}s, x error {np.sqrt(np.sum((x_newton - x_ref) ** 2)):.2f}, f error {f(x_newton) - f_ref:.2f}" +) +``` + +::: {solution-end} +::: + +**A locally flat minimum** + +::: {exercise-start} +:label: mo-flat-min-ex +:class: dropdown +::: + +Consider the function `exp(-1/(.1*x**2 + y**2)`. This function admits +a minimum in (0, 0). Starting from an initialization at (1, 1), try +to get within 1e-8 of this minimum point. + +This exercise is hard because the function is very flat around the minimum +(all its derivatives are zero). Thus gradient information is unreliable. + +::: {exercise-end} +::: + +::: {solution-start} mo-flat-min-ex +:class: dropdown +::: + +**Finding a minimum in a flat neighborhood** + +The function admits a minimum in [0, 0]. The challenge is to get within +1e-7 of this minimum, starting at x0 = [1, 1]. + +The solution that we adopt here is to give up on using gradient or +information based on local differences, and to rely on the Powell +algorithm. With 162 function evaluations, we get to 1e-8 of the +solution. + +```{code-cell} +def f(x): + return np.exp(-1 / (0.01 * x[0] ** 2 + x[1] ** 2)) +``` + +A well-conditioned version of f: + +```{code-cell} +def g(x): + return f([10 * x[0], x[1]]) +``` + +The gradient of g. We won't use it here for the optimization. + +```{code-cell} +def g_prime(x): + r = np.sqrt(x[0] ** 2 + x[1] ** 2) + return 2 / r**3 * g(x) * x / r + +result = sp.optimize.minimize(g, [1, 1], method="Powell", tol=1e-10) +x_min = result.x +x_min +``` + +Some pretty plotting: + +```{code-cell} +t = np.linspace(-1.1, 1.1, 100) +plt.plot(t, f([0, t])); +``` + +```{code-cell} +X, Y = np.mgrid[-1.5:1.5:100j, -1.1:1.1:100j] # type: ignore[misc] +plt.imshow(f([X, Y]).T, cmap="gray_r", extent=(-1.5, 1.5, -1.1, 1.1), origin="lower") +plt.contour(X, Y, f([X, Y]), cmap="gnuplot") + +# Plot the gradient +dX, dY = g_prime([0.1 * X[::5, ::5], Y[::5, ::5]]) +# Adjust for our preconditioning +dX *= 0.1 +plt.quiver(X[::5, ::5], Y[::5, ::5], dX, dY, color=".5") + +# Plot our solution +plt.plot(x_min[0], x_min[1], "r+", markersize=15); +``` + +::: {solution-end} +::: + ++++ + +## Special case: non-linear least-squares + ++++ + +### Minimizing the norm of a vector function + +Least square problems, minimizing the norm of a vector function, have a +specific structure that can be used in the [Levenberg–Marquardt algorithm](https://en.wikipedia.org/wiki/Levenberg-Marquardt_algorithm) +implemented in {func}`scipy.optimize.leastsq`. + +Lets try to minimize the norm of the following vectorial function: + +```{code-cell} +def f(x): + return np.arctan(x) - np.arctan(np.linspace(0, 1, len(x))) +``` + +```{code-cell} +x0 = np.zeros(10) +sp.optimize.leastsq(f, x0) +``` + +This took 67 function evaluations (check it with 'full_output=True'). What +if we compute the norm ourselves and use a good generic optimizer (BFGS): + +```{code-cell} +def g(x): + return np.sum(f(x)**2) + +result = sp.optimize.minimize(g, x0, method="BFGS") +result.fun +``` + +BFGS needs more function calls, and gives a less precise result. + +:::{note} +`leastsq` is interesting compared to BFGS only if the +dimensionality of the output vector is large, and larger than the number +of parameters to optimize. +::: + +:::{warning} +If the function is linear, this is a linear-algebra problem, and +should be solved with {func}`scipy.linalg.lstsq`. +::: + ++++ + +### Curve fitting + +Least square problems occur often when fitting a non-linear to data. +While it is possible to construct our optimization problem ourselves, +SciPy provides a helper function for this purpose: +{func}`scipy.optimize.curve_fit`: + +```{code-cell} +def f(t, omega, phi): + return np.cos(omega * t + phi) +``` + +```{code-cell} +x = np.linspace(0, 3, 50) +rng = np.random.default_rng(27446968) +y = f(x, 1.5, 1) + .1*rng.normal(size=50) +``` + +```{code-cell} +sp.optimize.curve_fit(f, x, y) +``` + +```{code-cell} +:tags: [hide-input] + +rng = np.random.default_rng(27446968) + + +# Our test function +def f(t, omega, phi): + return np.cos(omega * t + phi) + + +# Our x and y data +x = np.linspace(0, 3, 50) +y = f(x, 1.5, 1) + 0.1 * np.random.normal(size=50) + +# Fit the model: the parameters omega and phi can be found in the +# `params` vector +params, params_cov = sp.optimize.curve_fit(f, x, y) + +# plot the data and the fitted curve +t = np.linspace(0, 3, 1000) + +plt.plot(x, y, "bx") +plt.plot(t, f(t, *params), "r-"); +``` + +::: {exercise-start} +:label: mo-omega3-ex +:class: dropdown +::: + +Do the same with omega = 3. What is the difficulty? + +::: {exercise-end} +::: + ++++ + +## Optimization with constraints + ++++ + +### Box bounds + +Box bounds correspond to limiting each of the individual parameters of +the optimization. Note that some problems that are not originally written +as box bounds can be rewritten as such via change of variables. Both +{func}`scipy.optimize.minimize_scalar` and {func}`scipy.optimize.minimize` +support bound constraints with the parameter `bounds`: + +```{code-cell} +def f(x): + return np.sqrt((x[0] - 3)**2 + (x[1] - 2)**2) + +sp.optimize.minimize(f, np.array([0, 0]), bounds=((-1.5, 1.5), (-1.5, 1.5))) +``` + +::: {glue} constraints_path +:doc: optimization_examples.md +::: + +::: {admonition} Plot code +:class: dropdown + +See [constraint plots](constraints-eg). + +::: + ++++ + +### General constraints + +Equality and inequality constraints specified as functions: $f(x) = 0$ +and $g(x) < 0$. + ++++ + +#### {func}`scipy.optimize.fmin_slsqp` Sequential least square programming: equality and inequality constraints + +::: {glue} constraints_non_bounds +:doc: optimization_examples.md +::: + +::: {admonition} Plot code +:class: dropdown + +See [constraint non-bounds](constraints-non-bounds-eg). + +::: + +```{code-cell} +def f(x): + return np.sqrt((x[0] - 3)**2 + (x[1] - 2)**2) +``` + +```{code-cell} +def constraint(x): + return np.atleast_1d(1.5 - np.sum(np.abs(x))) +``` + +```{code-cell} +x0 = np.array([0, 0]) +sp.optimize.minimize(f, x0, constraints={"fun": constraint, "type": "ineq"}) +``` + +:::{warning} +The above problem is known as the [Lasso]() +problem in statistics, and there exist very efficient solvers for it +(for instance in [scikit-learn](https://scikit-learn.org)). In +general do not use generic solvers when specific ones exist. + +::: + +:::{admonition} Lagrange multipliers +If you are ready to do a bit of math, many constrained optimization +problems can be converted to non-constrained optimization problems +using a mathematical trick known as [Lagrange multipliers](https://en.wikipedia.org/wiki/Lagrange_multiplier). + +::: + ++++ + +:::{admonition} See also + +**Other Software** + +SciPy tries to include the best well-established, general-use, +and permissively-licensed optimization algorithms available. However, +even better options for a given task may be available in other libraries; +please also see [IPOPT] and [PyGMO]. + +::: + +[ipopt]: https://github.com/xuy/pyipopt +[pygmo]: https://esa.github.io/pygmo2/ diff --git a/advanced/mathematical_optimization/index.rst b/advanced/mathematical_optimization/index.rst deleted file mode 100644 index 73658e561..000000000 --- a/advanced/mathematical_optimization/index.rst +++ /dev/null @@ -1,1043 +0,0 @@ -.. - For doctesting - >>> import numpy as np - -.. _mathematical_optimization: - -======================================================= -Mathematical optimization: finding minima of functions -======================================================= - -**Authors**: *Gaël Varoquaux* - -`Mathematical optimization -`_ deals with the -problem of finding numerically minimums (or maximums or zeros) of -a function. In this context, the function is called *cost function*, or -*objective function*, or *energy*. - -Here, we are interested in using :mod:`scipy.optimize` for black-box -optimization: we do not rely on the mathematical expression of the -function that we are optimizing. Note that this expression can often be -used for more efficient, non black-box, optimization. - -.. topic:: Prerequisites - - .. rst-class:: horizontal - - * :ref:`NumPy ` - * :ref:`SciPy ` - * :ref:`Matplotlib ` - -.. seealso:: **References** - - Mathematical optimization is very ... mathematical. If you want - performance, it really pays to read the books: - - * `Convex Optimization `_ - by Boyd and Vandenberghe (pdf available free online). - - * `Numerical Optimization - `_, - by Nocedal and Wright. Detailed reference on gradient descent methods. - - * `Practical Methods of Optimization - `_ by Fletcher: good at hand-waving explanations. - -.. include:: ../../includes/big_toc_css.rst - :start-line: 1 - - -.. contents:: Chapters contents - :local: - :depth: 2 - -.. XXX: should I discuss root finding? - - -Knowing your problem -====================== - -Not all optimization problems are equal. Knowing your problem enables you -to choose the right tool. - -.. topic:: **Dimensionality of the problem** - - The scale of an optimization problem is pretty much set by the - *dimensionality of the problem*, i.e. the number of scalar variables - on which the search is performed. - -Convex versus non-convex optimization ---------------------------------------- - -.. |convex_1d_1| image:: auto_examples/images/sphx_glr_plot_convex_001.png - -.. |convex_1d_2| image:: auto_examples/images/sphx_glr_plot_convex_002.png - -.. list-table:: - - * - |convex_1d_1| - - - |convex_1d_2| - - * - **A convex function**: - - - `f` is above all its tangents. - - equivalently, for two point A, B, f(C) lies below the segment - [f(A), f(B])], if A < C < B - - - **A non-convex function** - -**Optimizing convex functions is easy. Optimizing non-convex functions can -be very hard.** - -.. note:: It can be proven that for a convex function a local minimum is - also a global minimum. Then, in some sense, the minimum is unique. - -Smooth and non-smooth problems -------------------------------- - -.. |smooth_1d_1| image:: auto_examples/images/sphx_glr_plot_smooth_001.png - -.. |smooth_1d_2| image:: auto_examples/images/sphx_glr_plot_smooth_002.png - -.. list-table:: - - * - |smooth_1d_1| - - - |smooth_1d_2| - - * - **A smooth function**: - - The gradient is defined everywhere, and is a continuous function - - - **A non-smooth function** - -**Optimizing smooth functions is easier** -(true in the context of *black-box* optimization, otherwise -`Linear Programming `_ -is an example of methods which deal very efficiently with -piece-wise linear functions). - - - -Noisy versus exact cost functions ----------------------------------- - -.. |noisy| image:: auto_examples/images/sphx_glr_plot_noisy_001.png - -.. list-table:: - - * - Noisy (blue) and non-noisy (green) functions - - - |noisy| - -.. topic:: **Noisy gradients** - - Many optimization methods rely on gradients of the objective function. - If the gradient function is not given, they are computed numerically, - which induces errors. In such situation, even if the objective - function is not noisy, a gradient-based optimization may be a noisy - optimization. - -Constraints ------------- - -.. |constraints| image:: auto_examples/images/sphx_glr_plot_constraints_001.png - :target: auto_examples/plot_constraints.html - -.. list-table:: - - * - Optimizations under constraints - - Here: - - :math:`-1 < x_1 < 1` - - :math:`-1 < x_2 < 1` - - - |constraints| - - -A review of the different optimizers -====================================== - -Getting started: 1D optimization ---------------------------------- - -Let's get started by finding the minimum of the scalar function -:math:`f(x)=\exp[(x-0.5)^2]`. :func:`scipy.optimize.minimize_scalar` uses -Brent's method to find the minimum of a function: - -:: - - >>> import numpy as np - >>> import scipy as sp - >>> def f(x): - ... return -np.exp(-(x - 0.5)**2) - >>> result = sp.optimize.minimize_scalar(f) - >>> result.success # check if solver was successful - True - >>> x_min = result.x - >>> x_min - np.float64(0.50...) - >>> x_min - 0.5 - np.float64(5.8...e-09) - - -.. |1d_optim_1| image:: auto_examples/images/sphx_glr_plot_1d_optim_001.png - :scale: 90% - -.. |1d_optim_2| image:: auto_examples/images/sphx_glr_plot_1d_optim_002.png - :scale: 75% - -.. |1d_optim_3| image:: auto_examples/images/sphx_glr_plot_1d_optim_003.png - :scale: 90% - -.. |1d_optim_4| image:: auto_examples/images/sphx_glr_plot_1d_optim_004.png - :scale: 75% - -.. list-table:: **Brent's method on a quadratic function**: it - converges in 3 iterations, as the quadratic - approximation is then exact. - - * - |1d_optim_1| - - - |1d_optim_2| - -.. list-table:: **Brent's method on a non-convex function**: note that - the fact that the optimizer avoided the local minimum - is a matter of luck. - - * - |1d_optim_3| - - - |1d_optim_4| - -.. note:: - - You can use different solvers using the parameter ``method``. - -.. note:: - - :func:`scipy.optimize.minimize_scalar` can also be used for optimization - constrained to an interval using the parameter ``bounds``. - -Gradient based methods ------------------------ - -Some intuitions about gradient descent -....................................... - -Here we focus on **intuitions**, not code. Code will follow. - -`Gradient descent `_ -basically consists in taking small steps in the direction of the -gradient, that is the direction of the *steepest descent*. - -.. |gradient_quad_cond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_001.png - :scale: 90% - -.. |gradient_quad_cond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_020.png - :scale: 75% - -.. |gradient_quad_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_003.png - :scale: 90% - -.. |gradient_quad_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_022.png - :scale: 75% - -.. list-table:: **Fixed step gradient descent** - :widths: 1 1 1 - - * - **A well-conditioned quadratic function.** - - - |gradient_quad_cond| - - - |gradient_quad_cond_conv| - - * - **An ill-conditioned quadratic function.** - - The core problem of gradient-methods on ill-conditioned problems is - that the gradient tends not to point in the direction of the - minimum. - - - |gradient_quad_icond| - - - |gradient_quad_icond_conv| - -We can see that very anisotropic (`ill-conditioned -`_) functions are harder -to optimize. - -.. topic:: **Take home message: conditioning number and preconditioning** - - If you know natural scaling for your variables, prescale them so that - they behave similarly. This is related to `preconditioning - `_. - -Also, it clearly can be advantageous to take bigger steps. This -is done in gradient descent code using a -`line search `_. - -.. |agradient_quad_cond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_002.png - :scale: 90% - -.. |agradient_quad_cond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_021.png - :scale: 75% - -.. |agradient_quad_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_004.png - :scale: 90% - -.. |agradient_quad_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_023.png - :scale: 75% - -.. |agradient_gauss_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_005.png - :scale: 90% - -.. |agradient_gauss_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_024.png - :scale: 75% - -.. |agradient_rosen_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_006.png - :scale: 90% - -.. |agradient_rosen_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_025.png - :scale: 75% - - -.. list-table:: **Adaptive step gradient descent** - :widths: 1 1 1 - - * - A well-conditioned quadratic function. - - - |agradient_quad_cond| - - - |agradient_quad_cond_conv| - - * - An ill-conditioned quadratic function. - - - |agradient_quad_icond| - - - |agradient_quad_icond_conv| - - * - An ill-conditioned non-quadratic function. - - - |agradient_gauss_icond| - - - |agradient_gauss_icond_conv| - - * - An ill-conditioned very non-quadratic function. - - - |agradient_rosen_icond| - - - |agradient_rosen_icond_conv| - -The more a function looks like a quadratic function (elliptic -iso-curves), the easier it is to optimize. - -Conjugate gradient descent -........................... - -The gradient descent algorithms above are toys not to be used on real -problems. - -As can be seen from the above experiments, one of the problems of the -simple gradient descent algorithms, is that it tends to oscillate across -a valley, each time following the direction of the gradient, that makes -it cross the valley. The conjugate gradient solves this problem by adding -a *friction* term: each step depends on the two last values of the -gradient and sharp turns are reduced. - -.. |cg_gauss_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_007.png - :scale: 90% - -.. |cg_gauss_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_026.png - :scale: 75% - -.. |cg_rosen_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_008.png - :scale: 90% - -.. |cg_rosen_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_027.png - :scale: 75% - - -.. list-table:: **Conjugate gradient descent** - :widths: 1 1 1 - - * - An ill-conditioned non-quadratic function. - - - |cg_gauss_icond| - - - |cg_gauss_icond_conv| - - * - An ill-conditioned very non-quadratic function. - - - |cg_rosen_icond| - - - |cg_rosen_icond_conv| - -SciPy provides :func:`scipy.optimize.minimize` to find the minimum of scalar -functions of one or more variables. The simple conjugate gradient method can -be used by setting the parameter ``method`` to CG :: - - >>> def f(x): # The rosenbrock function - ... return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 - >>> sp.optimize.minimize(f, [2, -1], method="CG") - message: Optimization terminated successfully. - success: True - status: 0 - fun: 1.650...e-11 - x: [ 1.000e+00 1.000e+00] - nit: 13 - jac: [-6.15...e-06 2.53...e-07] - nfev: 81 - njev: 27 - -Gradient methods need the Jacobian (gradient) of the function. They can compute it -numerically, but will perform better if you can pass them the gradient:: - - >>> def jacobian(x): - ... return np.array((-2*.5*(1 - x[0]) - 4*x[0]*(x[1] - x[0]**2), 2*(x[1] - x[0]**2))) - >>> sp.optimize.minimize(f, [2, 1], method="CG", jac=jacobian) - message: Optimization terminated successfully. - success: True - status: 0 - fun: 2.95786...e-14 - x: [ 1.000e+00 1.000e+00] - nit: 8 - jac: [ 7.183e-07 -2.990e-07] - nfev: 16 - njev: 16 - -Note that the function has only been evaluated 27 times, compared to 108 -without the gradient. - -Newton and quasi-newton methods --------------------------------- - -Newton methods: using the Hessian (2nd differential) -..................................................... - -`Newton methods -`_ use a -local quadratic approximation to compute the jump direction. For this -purpose, they rely on the 2 first derivative of the function: the -*gradient* and the `Hessian -`_. - -.. |ncg_quad_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_009.png - :scale: 90% - -.. |ncg_quad_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_028.png - :scale: 75% - -.. |ncg_gauss_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_010.png - :scale: 90% - -.. |ncg_gauss_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_029.png - :scale: 75% - -.. |ncg_rosen_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_011.png - :scale: 90% - -.. |ncg_rosen_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_030.png - :scale: 75% - - -.. list-table:: - :widths: 1 1 1 - - * - **An ill-conditioned quadratic function:** - - Note that, as the quadratic approximation is exact, the Newton - method is blazing fast - - - |ncg_quad_icond| - - - |ncg_quad_icond_conv| - - * - **An ill-conditioned non-quadratic function:** - - Here we are optimizing a Gaussian, which is always below its - quadratic approximation. As a result, the Newton method overshoots - and leads to oscillations. - - - |ncg_gauss_icond| - - - |ncg_gauss_icond_conv| - - * - **An ill-conditioned very non-quadratic function:** - - - |ncg_rosen_icond| - - - |ncg_rosen_icond_conv| - -In SciPy, you can use the Newton method by setting ``method`` to Newton-CG in -:func:`scipy.optimize.minimize`. Here, CG refers to the fact that an internal -inversion of the Hessian is performed by conjugate gradient :: - - >>> def f(x): # The rosenbrock function - ... return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 - >>> def jacobian(x): - ... return np.array((-2*.5*(1 - x[0]) - 4*x[0]*(x[1] - x[0]**2), 2*(x[1] - x[0]**2))) - >>> sp.optimize.minimize(f, [2,-1], method="Newton-CG", jac=jacobian) - message: Optimization terminated successfully. - success: True - status: 0 - fun: 1.5601357400786612e-15 - x: [ 1.000e+00 1.000e+00] - nit: 10 - jac: [ 1.058e-07 -7.483e-08] - nfev: 11 - njev: 33 - nhev: 0 - -Note that compared to a conjugate gradient (above), Newton's method has -required less function evaluations, but more gradient evaluations, as it -uses it to approximate the Hessian. Let's compute the Hessian and pass it -to the algorithm:: - - >>> def hessian(x): # Computed with sympy - ... return np.array(((1 - 4*x[1] + 12*x[0]**2, -4*x[0]), (-4*x[0], 2))) - >>> sp.optimize.minimize(f, [2,-1], method="Newton-CG", jac=jacobian, hess=hessian) - message: Optimization terminated successfully. - success: True - status: 0 - fun: 1.6277298383706738e-15 - x: [ 1.000e+00 1.000e+00] - nit: 10 - jac: [ 1.110e-07 -7.781e-08] - nfev: 11 - njev: 11 - nhev: 10 - -.. note:: - - At very high-dimension, the inversion of the Hessian can be costly - and unstable (large scale > 250). - -.. note:: - - Newton optimizers should not to be confused with Newton's root finding - method, based on the same principles, :func:`scipy.optimize.newton`. - -.. _quasi_newton: - -Quasi-Newton methods: approximating the Hessian on the fly -........................................................... - -**BFGS**: BFGS (Broyden-Fletcher-Goldfarb-Shanno algorithm) refines at -each step an approximation of the Hessian. - -.. |bfgs_quad_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_012.png - :scale: 90% - -.. |bfgs_quad_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_031.png - :scale: 75% - -.. |bfgs_gauss_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_013.png - :scale: 90% - -.. |bfgs_gauss_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_032.png - :scale: 75% - -Full code examples -================== - -.. include the gallery. Skip the first line to avoid the "orphan" - declaration - -.. include:: auto_examples/index.rst - :start-line: 1 - - -.. |bfgs_rosen_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_014.png - :scale: 90% - -.. |bfgs_rosen_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_033.png - :scale: 75% - - -.. list-table:: - :widths: 1 1 1 - - * - **An ill-conditioned quadratic function:** - - On a exactly quadratic function, BFGS is not as fast as Newton's - method, but still very fast. - - - |bfgs_quad_icond| - - - |bfgs_quad_icond_conv| - - * - **An ill-conditioned non-quadratic function:** - - Here BFGS does better than Newton, as its empirical estimate of the - curvature is better than that given by the Hessian. - - - |bfgs_gauss_icond| - - - |bfgs_gauss_icond_conv| - - * - **An ill-conditioned very non-quadratic function:** - - - |bfgs_rosen_icond| - - - |bfgs_rosen_icond_conv| - -:: - - >>> def f(x): # The rosenbrock function - ... return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 - >>> def jacobian(x): - ... return np.array((-2*.5*(1 - x[0]) - 4*x[0]*(x[1] - x[0]**2), 2*(x[1] - x[0]**2))) - >>> sp.optimize.minimize(f, [2, -1], method="BFGS", jac=jacobian) - message: Optimization terminated successfully. - success: True - status: 0 - fun: 2.630637192365927e-16 - x: [ 1.000e+00 1.000e+00] - nit: 8 - jac: [ 6.709e-08 -3.222e-08] - hess_inv: [[ 9.999e-01 2.000e+00] - [ 2.000e+00 4.499e+00]] - nfev: 10 - njev: 10 - -**L-BFGS:** Limited-memory BFGS Sits between BFGS and conjugate gradient: -in very high dimensions (> 250) the Hessian matrix is too costly to -compute and invert. L-BFGS keeps a low-rank version. In addition, box bounds -are also supported by L-BFGS-B:: - - >>> def f(x): # The rosenbrock function - ... return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 - >>> def jacobian(x): - ... return np.array((-2*.5*(1 - x[0]) - 4*x[0]*(x[1] - x[0]**2), 2*(x[1] - x[0]**2))) - >>> sp.optimize.minimize(f, [2, 2], method="L-BFGS-B", jac=jacobian) - message: CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL - success: True - status: 0 - fun: 1.4417677473...e-15 - x: [ 1.000e+00 1.000e+00] - nit: 16 - jac: [ 1.023e-07 -2.593e-08] - nfev: 17 - njev: 17 - hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64> - -Gradient-less methods ----------------------- - -A shooting method: the Powell algorithm -........................................ - -Almost a gradient approach - -.. |powell_quad_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_015.png - :scale: 90% - -.. |powell_quad_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_034.png - :scale: 75% - -.. |powell_gauss_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_016.png - :scale: 90% - -.. |powell_gauss_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_035.png - :scale: 75% - - -.. |powell_rosen_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_017.png - :scale: 90% - -.. |powell_rosen_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_036.png - :scale: 75% - - -.. list-table:: - :widths: 1 1 1 - - * - **An ill-conditioned quadratic function:** - - Powell's method isn't too sensitive to local ill-conditionning in - low dimensions - - - |powell_quad_icond| - - - |powell_quad_icond_conv| - - * - **An ill-conditioned very non-quadratic function:** - - - |powell_rosen_icond| - - - |powell_rosen_icond_conv| - - -Simplex method: the Nelder-Mead -................................ - -The Nelder-Mead algorithms is a generalization of dichotomy approaches to -high-dimensional spaces. The algorithm works by refining a `simplex -`_, the generalization of intervals -and triangles to high-dimensional spaces, to bracket the minimum. - -**Strong points**: it is robust to noise, as it does not rely on -computing gradients. Thus it can work on functions that are not locally -smooth such as experimental data points, as long as they display a -large-scale bell-shape behavior. However it is slower than gradient-based -methods on smooth, non-noisy functions. - -.. |nm_gauss_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_018.png - :scale: 90% - -.. |nm_gauss_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_037.png - :scale: 75% - - -.. |nm_rosen_icond| image:: auto_examples/images/sphx_glr_plot_gradient_descent_019.png - :scale: 90% - -.. |nm_rosen_icond_conv| image:: auto_examples/images/sphx_glr_plot_gradient_descent_038.png - :scale: 75% - - -.. list-table:: - :widths: 1 1 1 - - * - **An ill-conditioned non-quadratic function:** - - - |nm_gauss_icond| - - - |nm_gauss_icond_conv| - - * - **An ill-conditioned very non-quadratic function:** - - - |nm_rosen_icond| - - - |nm_rosen_icond_conv| - -Using the Nelder-Mead solver in :func:`scipy.optimize.minimize`:: - - >>> def f(x): # The rosenbrock function - ... return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 - >>> sp.optimize.minimize(f, [2, -1], method="Nelder-Mead") - message: Optimization terminated successfully. - success: True - status: 0 - fun: 1.11527915993744e-10 - x: [ 1.000e+00 1.000e+00] - nit: 58 - nfev: 111 - final_simplex: (array([[ 1.000e+00, 1.000e+00], - [ 1.000e+00, 1.000e+00], - [ 1.000e+00, 1.000e+00]]), array([ 1.115e-10, 1.537e-10, 4.988e-10])) - -Global optimizers ------------------- - -If your problem does not admit a unique local minimum (which can be hard -to test unless the function is convex), and you do not have prior -information to initialize the optimization close to the solution, you -may need a global optimizer. - -Brute force: a grid search -.......................... - -:func:`scipy.optimize.brute` evaluates the function on a given grid of -parameters and returns the parameters corresponding to the minimum -value. The parameters are specified with ranges given to -:obj:`numpy.mgrid`. By default, 20 steps are taken in each direction:: - - >>> def f(x): # The rosenbrock function - ... return .5*(1 - x[0])**2 + (x[1] - x[0]**2)**2 - >>> sp.optimize.brute(f, ((-1, 2), (-1, 2))) # doctest: +ELLIPSIS - array([1.0000..., 1.0000...]) - - -Practical guide to optimization with SciPy -========================================== - -Choosing a method ------------------- - -All methods are exposed as the ``method`` argument of -:func:`scipy.optimize.minimize`. - -.. image:: auto_examples/images/sphx_glr_plot_compare_optimizers_001.png - :align: center - :width: 95% - -:Without knowledge of the gradient: - - * In general, prefer **BFGS** or **L-BFGS**, even if you have to approximate - numerically gradients. These are also the default if you omit the parameter - ``method`` - depending if the problem has constraints or bounds - - * On well-conditioned problems, **Powell** - and **Nelder-Mead**, both gradient-free methods, work well in - high dimension, but they collapse for ill-conditioned problems. - -:With knowledge of the gradient: - - * **BFGS** or **L-BFGS**. - - * Computational overhead of BFGS is larger than that L-BFGS, itself - larger than that of conjugate gradient. On the other side, BFGS usually - needs less function evaluations than CG. Thus conjugate gradient method - is better than BFGS at optimizing computationally cheap functions. - -:With the Hessian: - - * If you can compute the Hessian, prefer the Newton method - (**Newton-CG** or **TCG**). - -:If you have noisy measurements: - - * Use **Nelder-Mead** or **Powell**. - -Making your optimizer faster ------------------------------ - -* Choose the right method (see above), do compute analytically the - gradient and Hessian, if you can. - -* Use `preconditionning `_ - when possible. - -* Choose your initialization points wisely. For instance, if you are - running many similar optimizations, warm-restart one with the results of - another. - -* Relax the tolerance if you don't need precision using the parameter ``tol``. - -Computing gradients -------------------- - -Computing gradients, and even more Hessians, is very tedious but worth -the effort. Symbolic computation with :ref:`Sympy ` may come in -handy. - -.. warning:: - - A *very* common source of optimization not converging well is human - error in the computation of the gradient. You can use - :func:`scipy.optimize.check_grad` to check that your gradient is - correct. It returns the norm of the different between the gradient - given, and a gradient computed numerically: - - >>> sp.optimize.check_grad(f, jacobian, [2, -1]) - np.float64(2.384185791015625e-07) - - See also :func:`scipy.optimize.approx_fprime` to find your errors. - -Synthetic exercises -------------------- - -.. |flat_min_0| image:: auto_examples/images/sphx_glr_plot_exercise_flat_minimum_001.png - :scale: 48% - :target: auto_examples/plot_exercise_flat_minimum.html - -.. |flat_min_1| image:: auto_examples/images/sphx_glr_plot_exercise_flat_minimum_002.png - :scale: 48% - :target: auto_examples/plot_exercise_flat_minimum.html - -.. image:: auto_examples/images/sphx_glr_plot_exercise_ill_conditioned_001.png - :scale: 35% - :target: auto_examples/plot_exercise_ill_conditioned.html - :align: right - -.. topic:: **Exercise: A simple (?) quadratic function** - :class: green - - Optimize the following function, using K[0] as a starting point:: - - rng = np.random.default_rng(27446968) - K = rng.normal(size=(100, 100)) - - def f(x): - return np.sum((K @ (x - 1))**2) + np.sum(x**2)**2 - - Time your approach. Find the fastest approach. Why is BFGS not - working well? - -.. topic:: **Exercise: A locally flat minimum** - :class: green - - Consider the function `exp(-1/(.1*x**2 + y**2)`. This function admits - a minimum in (0, 0). Starting from an initialization at (1, 1), try - to get within 1e-8 of this minimum point. - - .. centered:: |flat_min_0| |flat_min_1| - - -Special case: non-linear least-squares -======================================== - -Minimizing the norm of a vector function -------------------------------------------- - -Least square problems, minimizing the norm of a vector function, have a -specific structure that can be used in the `Levenberg–Marquardt algorithm -`_ -implemented in :func:`scipy.optimize.leastsq`. - -Lets try to minimize the norm of the following vectorial function:: - - >>> def f(x): - ... return np.arctan(x) - np.arctan(np.linspace(0, 1, len(x))) - - >>> x0 = np.zeros(10) - >>> sp.optimize.leastsq(f, x0) - (array([0. , 0.11111111, 0.22222222, 0.33333333, 0.44444444, - 0.55555556, 0.66666667, 0.77777778, 0.88888889, 1. ]), ...) - -This took 67 function evaluations (check it with 'full_output=True'). What -if we compute the norm ourselves and use a good generic optimizer (BFGS):: - - >>> def g(x): - ... return np.sum(f(x)**2) - >>> result = sp.optimize.minimize(g, x0, method="BFGS") - >>> result.fun - np.float64(2.6940...e-11) - -BFGS needs more function calls, and gives a less precise result. - -.. note:: - - `leastsq` is interesting compared to BFGS only if the - dimensionality of the output vector is large, and larger than the number - of parameters to optimize. - -.. warning:: - - If the function is linear, this is a linear-algebra problem, and - should be solved with :func:`scipy.linalg.lstsq`. - -Curve fitting --------------- - -.. image:: auto_examples/images/sphx_glr_plot_curve_fitting_001.png - :scale: 48% - :target: auto_examples/plot_curve_fitting.html - :align: right - -Least square problems occur often when fitting a non-linear to data. -While it is possible to construct our optimization problem ourselves, -SciPy provides a helper function for this purpose: -:func:`scipy.optimize.curve_fit`:: - - - >>> def f(t, omega, phi): - ... return np.cos(omega * t + phi) - - >>> x = np.linspace(0, 3, 50) - >>> rng = np.random.default_rng(27446968) - >>> y = f(x, 1.5, 1) + .1*rng.normal(size=50) - - >>> sp.optimize.curve_fit(f, x, y) - (array([1.4812..., 0.9999...]), array([[ 0.0003..., -0.0004...], - [-0.0004..., 0.0010...]])) - - -.. topic:: **Exercise** - :class: green - - Do the same with omega = 3. What is the difficulty? - -Optimization with constraints -============================== - -Box bounds ----------- - -Box bounds correspond to limiting each of the individual parameters of -the optimization. Note that some problems that are not originally written -as box bounds can be rewritten as such via change of variables. Both -:func:`scipy.optimize.minimize_scalar` and :func:`scipy.optimize.minimize` -support bound constraints with the parameter ``bounds``:: - - >>> def f(x): - ... return np.sqrt((x[0] - 3)**2 + (x[1] - 2)**2) - >>> sp.optimize.minimize(f, np.array([0, 0]), bounds=((-1.5, 1.5), (-1.5, 1.5))) - message: CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL - success: True - status: 0 - fun: 1.5811388300841898 - x: [ 1.500e+00 1.500e+00] - nit: 2 - jac: [-9.487e-01 -3.162e-01] - nfev: 9 - njev: 3 - hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64> - -.. image:: auto_examples/images/sphx_glr_plot_constraints_002.png - :target: auto_examples/plot_constraints.html - :align: right - :scale: 75% - - -General constraints --------------------- - -Equality and inequality constraints specified as functions: :math:`f(x) = 0` -and :math:`g(x) < 0`. - -* :func:`scipy.optimize.fmin_slsqp` Sequential least square programming: - equality and inequality constraints: - - .. image:: auto_examples/images/sphx_glr_plot_non_bounds_constraints_001.png - :target: auto_examples/plot_non_bounds_constraints.html - :align: right - :scale: 75% - - :: - - >>> def f(x): - ... return np.sqrt((x[0] - 3)**2 + (x[1] - 2)**2) - - >>> def constraint(x): - ... return np.atleast_1d(1.5 - np.sum(np.abs(x))) - - >>> x0 = np.array([0, 0]) - >>> sp.optimize.minimize(f, x0, constraints={"fun": constraint, "type": "ineq"}) - message: Optimization terminated successfully - success: True - status: 0 - fun: 2.47487373504... - x: [ 1.250e+00 2.500e-01] - nit: 5 - jac: [-7.071e-01 -7.071e-01] - nfev: 15 - njev: 5 - -.. warning:: - - The above problem is known as the `Lasso - `_ - problem in statistics, and there exist very efficient solvers for it - (for instance in `scikit-learn `_). In - general do not use generic solvers when specific ones exist. - -.. topic:: **Lagrange multipliers** - - If you are ready to do a bit of math, many constrained optimization - problems can be converted to non-constrained optimization problems - using a mathematical trick known as `Lagrange multipliers - `_. - -Full code examples -================== - -.. include the gallery. Skip the first line to avoid the "orphan" - declaration - -.. include:: auto_examples/index.rst - :start-line: 1 - -.. seealso:: **Other Software** - - SciPy tries to include the best well-established, general-use, - and permissively-licensed optimization algorithms available. However, - even better options for a given task may be available in other libraries; - please also see IPOPT_ and PyGMO_. - -.. _IPOPT: https://github.com/xuy/pyipopt -.. _PyGMO: https://esa.github.io/pygmo2/ diff --git a/advanced/mathematical_optimization/optimization_examples.md b/advanced/mathematical_optimization/optimization_examples.md new file mode 100644 index 000000000..69ab603e3 --- /dev/null +++ b/advanced/mathematical_optimization/optimization_examples.md @@ -0,0 +1,732 @@ +--- +jupytext: + notebook_metadata_filter: all,-language_info + split_at_heading: true + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +(optimization-examples)= + ++++ + +# Examples for mathematical optimization page + +```{code-cell} +import numpy as np +import scipy as sp +import matplotlib.pyplot as plt +``` + +```{code-cell} +# Machinery to store outputs for later use. +# This is for rendering in the Jupyter Book version of these pages. +from myst_nb import glue +``` + +(convex-function-eg)= + ++++ + +## Convex function + + + +A figure showing the definition of a convex function: + +```{code-cell} +x = np.linspace(-1, 2) +``` + +```{code-cell} +plt.figure(figsize=(6, 4)) +# A convex function +plt.plot(x, x**2, linewidth=2) +plt.text(-0.7, -(0.6**2), "$f$", size=20) + +# The tangent in one point +plt.plot(x, 2 * x - 1) +plt.plot(1, 1, "k+") +plt.text(0.3, -0.75, "Tangent to $f$", size=15) +plt.text(1, 1 - 0.5, "C", size=15) + +# Convexity as barycenter +plt.plot([0.35, 1.85], [0.35**2, 1.85**2]) +plt.plot([0.35, 1.85], [0.35**2, 1.85**2], "k+") +plt.text(0.35 - 0.2, 0.35**2 + 0.1, "A", size=15) +plt.text(1.85 - 0.2, 1.85**2, "B", size=15) + +plt.ylim(ymin=-1) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in page. +glue("convex_func", plt.gcf(), display=False) +``` + +```{code-cell} +# Convexity as barycenter +plt.figure(figsize=(6, 4)) +plt.plot(x, x**2 + np.exp(-5 * (x - 0.5) ** 2), linewidth=2) +plt.text(-0.7, -(0.6**2), "$f$", size=20) + +plt.ylim(ymin=-1) +plt.xticks([]) +plt.yticks([]) +plt.tight_layout() + +# Store figure for use in page. +glue("non_convex_func", plt.gcf(), display=False) +``` + +(smooth-function-eg)= + ++++ + +## Smooth and non-smooth functions + +```{code-cell} +plt.figure(figsize=(4, 4)) +x = np.linspace(-1.5, 1.5, 101) + +# A smooth function + +plt.plot(x, np.sqrt(0.2 + x**2), linewidth=2) +plt.text(-1, 0, "$f$", size=20) + +plt.ylim(ymin=-0.2) +plt.axis("off") +plt.tight_layout() + +# Store figure for use in page. +glue("smooth_func", plt.gcf(), display=False) +``` + +```{code-cell} +# A non-smooth function +plt.figure(figsize=(4, 4)) +plt.plot(x, np.abs(x), linewidth=2) +plt.text(-1, 0, "$f$", size=20) + +plt.ylim(ymin=-0.2) +plt.axis("off") +plt.tight_layout() + +# Store figure for use in page. +glue("non_smooth_func", plt.gcf(), display=False) +``` + +(noisy-non-noisy-eg)= + ++++ + +## Noisy and non-noisy functions + +```{code-cell} +rng = np.random.default_rng(27446968) + +x = np.linspace(-5, 5, 101) +x_ = np.linspace(-5, 5, 31) + +# A smooth function +def f(x): + return -np.exp(-(x**2)) + +plt.figure(figsize=(5, 4)) +plt.plot(x_, f(x_) + 0.2 * rng.normal(size=31), linewidth=2) +plt.plot(x, f(x), linewidth=2) + +plt.ylim(ymin=-1.3) +plt.axis("off") +plt.tight_layout() + +# Store figure for use in page. +glue("noisy_non_noisy", plt.gcf(), display=False) +``` + +(constraints-eg)= + ++++ + +## Optimizing with constraints + +```{code-cell} +x, y = np.mgrid[-2.9:5.8:0.05, -2.5:5:0.05] # type: ignore[misc] +x = x.T +y = y.T + +def make_constraint_fig(): + fig = plt.figure(figsize=(3, 2.5)) + contours = plt.contour( + np.sqrt((x - 3) ** 2 + (y - 2) ** 2), + extent=[-3, 6, -2.5, 5], + cmap="gnuplot", + ) + plt.clabel(contours, inline=1, fmt="%1.1f", fontsize=14) + plt.plot( + [-1.5, -1.5, 1.5, 1.5, -1.5], [-1.5, 1.5, 1.5, -1.5, -1.5], "k", linewidth=2 + ) + plt.fill_between([-1.5, 1.5], [-1.5, -1.5], [1.5, 1.5], color=".8") + plt.axvline(0, color="k") + plt.axhline(0, color="k") + + plt.text(-0.9, 4.4, "$x_2$", size=20) + plt.text(5.6, -0.6, "$x_1$", size=20) + plt.axis("scaled") + plt.axis("off") + return fig + +# Store figure for use in page. +glue("constraints_no_path", make_constraint_fig(), display=False) + +# And now plot the optimization path +accumulator = [] + +def f(x): + # Store the list of function calls + accumulator.append(x) + return np.sqrt((x[0] - 3) ** 2 + (x[1] - 2) ** 2) + + +# We don't use the gradient, as with the gradient, L-BFGS is too fast, +# and finds the optimum without showing us a pretty path +def f_prime(x): + r = np.sqrt((x[0] - 3) ** 2 + (x[0] - 2) ** 2) + return np.array(((x[0] - 3) / r, (x[0] - 2) / r)) + + +sp.optimize.minimize( + f, np.array([0, 0]), method="L-BFGS-B", bounds=((-1.5, 1.5), (-1.5, 1.5)) +) +accumulated = np.array(accumulator) + +fig = make_constraint_fig() +plt.plot(accumulated[:, 0], accumulated[:, 1]); + +glue("constraints_path", fig, display=False) +``` + +(brents-method-eg)= + ++++ + +## Brent's method for convex and not-convex functions + +```{code-cell} +x = np.linspace(-1, 3, 100) +x_0 = np.exp(-1) + +def func(x, epsilon): + return (x - x_0)**2 + epsilon * np.exp(-5 * (x - .5 - x_0)**2) +``` + +```{code-cell} +for epsilon in (0, 1): + + f = lambda x : func(x, epsilon) + + plt.figure(figsize=(3, 2.5)) + plt.axes((0, 0, 1, 1)) + + # A convex function + plt.plot(x, f(x), linewidth=2) + + # Apply Brent method. To have access to the iteration, do this in an + # artificial way: allow the algorithm to iter only once + all_x = [] + all_y = [] + for iter in range(30): + result = sp.optimize.minimize_scalar( + f, + bracket=(-5, 2.9, 4.5), + method="Brent", + options={"maxiter": iter}, + tol=np.finfo(1.0).eps, + ) + if result.success: + print("Converged at ", iter) + break + + this_x = result.x + all_x.append(this_x) + all_y.append(f(this_x)) + if iter < 6: + plt.text( + this_x - 0.05 * np.sign(this_x) - 0.05, + f(this_x) + 1.2 * (0.3 - iter % 2), + str(iter + 1), + size=12, + ) + + plt.plot(all_x[:10], all_y[:10], "k+", markersize=12, markeredgewidth=2) + + plt.plot(all_x[-1], all_y[-1], "rx", markersize=12) + plt.axis("off") + plt.ylim(ymin=-1, ymax=8) + + # Store figure for use in page. + glue(f"brent_epsilon_{epsilon}_func", plt.gcf(), display=False) + + plt.figure(figsize=(4, 3)) + plt.semilogy(np.abs(all_y - all_y[-1]), linewidth=2) + plt.ylabel("Error on f(x)") + plt.xlabel("Iteration") + plt.tight_layout() + + # Store figure for use in page. + glue(f"brent_epsilon_{epsilon}_err", plt.gcf(), display=False) +``` + +(gradient-descent-eg)= + ++++ + +## Gradient descent examples + +An example demoing gradient descent by creating figures that trace the +evolution of the optimizer. + +```{code-cell} +# Preparatory work for loading helper code. +import sys +import os + +sys.path.append(os.path.abspath("helper")) + +from cost_functions import ( + mk_quad, + mk_gauss, + rosenbrock, + rosenbrock_prime, + rosenbrock_hessian, + LoggingFunction, + CountingFunction, +) +``` + +```{code-cell} +x_min, x_max = -1, 2 +y_min, y_max = 2.25 / 3 * x_min - 0.2, 2.25 / 3 * x_max - 0.2 +``` + +A formatter to print values on contours: + +```{code-cell} +def super_fmt(value): + if value > 1: + if np.abs(int(value) - value) < 0.1: + out = f"$10^{{{int(value):d}}}$" + else: + out = f"$10^{{{value:.1f}}}$" + else: + value = np.exp(value - 0.01) + if value > 0.1: + out = f"{value:1.1f}" + elif value > 0.01: + out = f"{value:.2f}" + else: + out = f"{value:.2e}" + return out +``` + +A gradient descent algorithm. + +Do not use for production work: its a toy, use scipy's `optimize.fmin_cg` + +```{code-cell} +def gradient_descent(x0, f, f_prime, hessian=None, adaptative=False): + x_i, y_i = x0 + all_x_i = [] + all_y_i = [] + all_f_i = [] + + for i in range(1, 100): + all_x_i.append(x_i) + all_y_i.append(y_i) + all_f_i.append(f([x_i, y_i])) + dx_i, dy_i = f_prime(np.asarray([x_i, y_i])) + if adaptative: + # Compute a step size using a line_search to satisfy the Wolf + # conditions + step = sp.optimize.line_search( + f, + f_prime, + np.r_[x_i, y_i], + -np.r_[dx_i, dy_i], + np.r_[dx_i, dy_i], + c2=0.05, + ) + step = step[0] + if step is None: + step = 0 + else: + step = 1 + x_i += -step * dx_i + y_i += -step * dy_i + if np.abs(all_f_i[-1]) < 1e-16: + break + return all_x_i, all_y_i, all_f_i + + +def gradient_descent_adaptative(x0, f, f_prime, hessian=None): + return gradient_descent(x0, f, f_prime, adaptative=True) + + +def conjugate_gradient(x0, f, f_prime, hessian=None): + all_x_i = [x0[0]] + all_y_i = [x0[1]] + all_f_i = [f(x0)] + + def store(X): + x, y = X + all_x_i.append(x) + all_y_i.append(y) + all_f_i.append(f(X)) + + sp.optimize.minimize( + f, x0, jac=f_prime, method="CG", callback=store, options={"gtol": 1e-12} + ) + return all_x_i, all_y_i, all_f_i + + +def newton_cg(x0, f, f_prime, hessian): + all_x_i = [x0[0]] + all_y_i = [x0[1]] + all_f_i = [f(x0)] + + def store(X): + x, y = X + all_x_i.append(x) + all_y_i.append(y) + all_f_i.append(f(X)) + + sp.optimize.minimize( + f, + x0, + method="Newton-CG", + jac=f_prime, + hess=hessian, + callback=store, + options={"xtol": 1e-12}, + ) + return all_x_i, all_y_i, all_f_i + + +def bfgs(x0, f, f_prime, hessian=None): + all_x_i = [x0[0]] + all_y_i = [x0[1]] + all_f_i = [f(x0)] + + def store(X): + x, y = X + all_x_i.append(x) + all_y_i.append(y) + all_f_i.append(f(X)) + + sp.optimize.minimize( + f, x0, method="BFGS", jac=f_prime, callback=store, options={"gtol": 1e-12} + ) + return all_x_i, all_y_i, all_f_i + + +def powell(x0, f, f_prime, hessian=None): + all_x_i = [x0[0]] + all_y_i = [x0[1]] + all_f_i = [f(x0)] + + def store(X): + x, y = X + all_x_i.append(x) + all_y_i.append(y) + all_f_i.append(f(X)) + + sp.optimize.minimize( + f, x0, method="Powell", callback=store, options={"ftol": 1e-12} + ) + return all_x_i, all_y_i, all_f_i + + +def nelder_mead(x0, f, f_prime, hessian=None): + all_x_i = [x0[0]] + all_y_i = [x0[1]] + all_f_i = [f(x0)] + + def store(X): + x, y = X + all_x_i.append(x) + all_y_i.append(y) + all_f_i.append(f(X)) + + sp.optimize.minimize( + f, x0, method="Nelder-Mead", callback=store, options={"ftol": 1e-12} + ) + return all_x_i, all_y_i, all_f_i +``` + +Run different optimizers on these problems. + +```{code-cell} +levels = {} + +for name, (f, f_prime, hessian), optimizer in ( + ('q_07_gd', mk_quad(0.7), gradient_descent), + ('q_07_gda', mk_quad(0.7), gradient_descent_adaptative), + ('q_002_gd', mk_quad(0.02), gradient_descent), + ('q_002_gda', mk_quad(0.02), gradient_descent_adaptative), + ('g_002_gda', mk_gauss(0.02), gradient_descent_adaptative), + ( + 'rb_gda', + (rosenbrock, rosenbrock_prime, rosenbrock_hessian), + gradient_descent_adaptative, + ), + ('g_002_cg', mk_gauss(0.02), conjugate_gradient), + ( + 'rb_cg', + (rosenbrock, rosenbrock_prime, rosenbrock_hessian), + conjugate_gradient, + ), + ('q_002_ncg', mk_quad(0.02), newton_cg), + ('g_002_ncg', mk_gauss(0.02), newton_cg), + ( + 'rb_ncg', + (rosenbrock, rosenbrock_prime, rosenbrock_hessian), + newton_cg, + ), + ('q_002_bgfs', mk_quad(0.02), bfgs), + ('g_002_bgfs', mk_gauss(0.02), bfgs), + ('rb_bgfs', (rosenbrock, rosenbrock_prime, rosenbrock_hessian), bfgs), + ('q_002_pow', mk_quad(0.02), powell), + ('g_002_pow', mk_gauss(0.02), powell), + ('rb_pow', (rosenbrock, rosenbrock_prime, rosenbrock_hessian), powell), + ('g_002_nm', mk_gauss(0.02), nelder_mead), + ('rb_nm', (rosenbrock, rosenbrock_prime, rosenbrock_hessian), nelder_mead), +): + # Compute a gradient-descent + x_i, y_i = 1.6, 1.1 + counting_f_prime = CountingFunction(f_prime) + counting_hessian = CountingFunction(hessian) + logging_f = LoggingFunction(f, counter=counting_f_prime.counter) + all_x_i, all_y_i, all_f_i = optimizer( + np.array([x_i, y_i]), logging_f, counting_f_prime, hessian=counting_hessian + ) + + # Plot the contour plot + if not max(all_y_i) < y_max: + x_min *= 1.2 + x_max *= 1.2 + y_min *= 1.2 + y_max *= 1.2 + x, y = np.mgrid[x_min:x_max:100j, y_min:y_max:100j] + x = x.T + y = y.T + + plt.figure(figsize=(3, 2.5)) + plt.axes([0, 0, 1, 1]) + + X = np.concatenate((x[np.newaxis, ...], y[np.newaxis, ...]), axis=0) + z = np.apply_along_axis(f, 0, X) + log_z = np.log(z + 0.01) + plt.imshow( + log_z, + extent=[x_min, x_max, y_min, y_max], + cmap=plt.cm.gray_r, + origin="lower", + vmax=log_z.min() + 1.5 * np.ptp(log_z), + ) + contours = plt.contour( + log_z, + levels=levels.get(f), + extent=[x_min, x_max, y_min, y_max], + cmap=plt.cm.gnuplot, + origin="lower", + ) + levels[f] = contours.levels + plt.clabel(contours, inline=1, fmt=super_fmt, fontsize=14) + + plt.plot(all_x_i, all_y_i, "b-", linewidth=2) + plt.plot(all_x_i, all_y_i, "k+") + + plt.plot(logging_f.all_x_i, logging_f.all_y_i, "k.", markersize=2) + + plt.plot([0], [0], "rx", markersize=12) + + plt.xticks(()) + plt.yticks(()) + plt.xlim(x_min, x_max) + plt.ylim(y_min, y_max) + + # Store figure for use in page. + glue(f'gradient_descent_{name}_func', plt.gcf(), display=False) + + plt.figure(figsize=(4, 3)) + plt.semilogy(np.maximum(np.abs(all_f_i), 1e-30), + linewidth=2, + label="# iterations") + plt.ylabel("Error on f(x)") + plt.semilogy( + logging_f.counts, + np.maximum(np.abs(logging_f.all_f_i), 1e-30), + linewidth=2, + color="g", + label="# function calls", + ) + plt.legend( + loc="upper right", + frameon=True, + prop={"size": 11}, + borderaxespad=0, + handlelength=1.5, + handletextpad=0.5, + ) + plt.tight_layout() + + # Store figure for use in page. + glue(f'gradient_descent_{name}_err', plt.gcf(), display=False) +``` + +(compare-optimizers-eg)= + ++++ + +## Plotting the comparison of optimizers + +Plots the results from the comparison of optimizers. + +```{code-cell} +import pickle + +with open('helper/compare_optimizers_py3.pkl', 'rb') as fobj: + results = pickle.load(fobj) + +n_methods = len(list(results.values())[0]["Rosenbrock "]) +n_dims = len(results) + +symbols = "o>*Ds" + +plt.figure(1, figsize=(10, 4)) +plt.clf() + +nipy_spectral = plt.colormaps["nipy_spectral"] +colors = nipy_spectral(np.linspace(0, 1, n_dims))[:, :3] + +method_names = list(list(results.values())[0]["Rosenbrock "].keys()) +method_names.sort(key=lambda x: x[::-1], reverse=True) + +for n_dim_index, ((n_dim, n_dim_bench), color) in enumerate( + zip(sorted(results.items()), colors, strict=True) +): + for (cost_name, cost_bench), symbol in zip( + sorted(n_dim_bench.items()), symbols, strict=True + ): + for ( + method_index, + method_name, + ) in enumerate(method_names): + this_bench = cost_bench[method_name] + bench = np.mean(this_bench) + plt.semilogy([method_index + 0.1 * n_dim_index], + [bench], + marker=symbol, + color=color) + +# Create a legend for the problem type +for cost_name, symbol in zip(sorted(n_dim_bench.keys()), symbols, strict=True): + plt.semilogy([-10], [0], symbol, color=".5", label=cost_name) + +plt.xticks(np.arange(n_methods), method_names, size=11) +plt.xlim(-0.2, n_methods - 0.5) +plt.legend(loc="best", numpoints=1, handletextpad=0, prop={"size": 12}, frameon=False) +plt.ylabel("# function calls (a.u.)") + +# Create a second legend for the problem dimensionality +plt.twinx() + +for n_dim, color in zip(sorted(results.keys()), colors, strict=True): + plt.plot([-10], [0], "o", color=color, label=f"# dim: {n_dim}") + +plt.legend( + loc=(0.47, 0.07), + numpoints=1, + handletextpad=0, + prop={"size": 12}, + frameon=False, + ncol=2, +) +plt.xlim(-0.2, n_methods - 0.5) + +plt.xticks(np.arange(n_methods), method_names) +plt.yticks(()) + +plt.tight_layout() + +# Store figure for use in page. +glue(f'compare_optimizers', plt.gcf(), display=False) +``` + +(constraints-non-bounds-eg)= + ++++ + +## Optimization with constraints, SLSQP and COBYLA + +An example showing how to do optimization with general constraints using SLSQP +and COBYLA. + +```{code-cell} +x, y = np.mgrid[-2.03:4.2:0.04, -1.6:3.2:0.04] +x = x.T +y = y.T +``` + +```{code-cell} +plt.figure(figsize=(3, 2.5)) +plt.axes((0, 0, 1, 1)) + +contours = plt.contour( + np.sqrt((x - 3) ** 2 + (y - 2) ** 2), + extent=[-2.03, 4.2, -1.6, 3.2], + cmap="gnuplot", +) +plt.clabel(contours, inline=1, fmt="%1.1f", fontsize=14) +plt.plot([-1.5, 0, 1.5, 0, -1.5], [0, 1.5, 0, -1.5, 0], "k", linewidth=2) +plt.fill_between([-1.5, 0, 1.5], [0, -1.5, 0], [0, 1.5, 0], color=".8") +plt.axvline(0, color="k") +plt.axhline(0, color="k") + +plt.text(-0.9, 2.8, "$x_2$", size=20) +plt.text(3.6, -0.6, "$x_1$", size=20) +plt.axis("tight") +plt.axis("off") + +# And now plot the optimization path +accumulator = [] + + +def f(x): + # Store the list of function calls + accumulator.append(x) + return np.sqrt((x[0] - 3) ** 2 + (x[1] - 2) ** 2) + + +def constraint(x): + return np.atleast_1d(1.5 - np.sum(np.abs(x))) + + +sp.optimize.minimize( + f, np.array([0, 0]), method="SLSQP", constraints={"fun": constraint, "type": "ineq"} +) + +accumulated = np.array(accumulator) +plt.plot(accumulated[:, 0], accumulated[:, 1]) + +# Store figure for use in page. +glue(f'constraints_non_bounds', plt.gcf(), display=False) +``` diff --git a/advanced/optimizing/index.md b/advanced/optimizing/index.md new file mode 100644 index 000000000..d99683e1d --- /dev/null +++ b/advanced/optimizing/index.md @@ -0,0 +1,429 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(optimizing-code-chapter)= + +# Optimizing code + +:::{sidebar} Donald Knuth +_“Premature optimization is the root of all evil”_ +::: + +**Author**: _Gaël Varoquaux_ + +This chapter deals with strategies to make Python code go faster. + +:::{admonition} Prerequisites + +- [line_profiler](https://pypi.org/project/line-profiler/) + ::: + ++++ + +## Optimization workflow + +1. Make it work: write the code in a simple **legible** ways. +2. Make it work reliably: write automated test cases, make really sure + that your algorithm is right and that if you break it, the tests will + capture the breakage. +3. Optimize the code by profiling simple use-cases to find the + bottlenecks and speeding up these bottleneck, finding a better + algorithm or implementation. Keep in mind that a trade off should be + found between profiling on a realistic example and the simplicity and + speed of execution of the code. For efficient work, it is best to work + with profiling runs lasting around 10s. + +## Profiling Python code + +:::{admonition} **No optimization without measuring!** + +- **Measure:** profiling, timing +- You'll have surprises: the fastest code is not always what you think + ::: + +### Timeit + +In Jupyter or IPython, use `timeit` +() to time elementary +operations: + +```{code-cell} +import numpy as np + +a = np.arange(1000) + +%timeit a ** 2 +``` + +```{code-cell} +%timeit a ** 2.1 +``` + +```{code-cell} +%timeit a * a +``` + +Use this to guide your choice between strategies. + +:::{note} +For long running calls, using `%time` instead of `%timeit`; it is +less precise but faster. +::: + +### Profiler + +Useful when you have a large program to profile, for example the +{download}`following file `: + +```{literalinclude} demo.py + +``` + +:::{note} +This is a combination of two unsupervised learning techniques, principal +component analysis ([PCA](https://en.wikipedia.org/wiki/Principal_component_analysis)) and +independent component analysis +([ICA](https://en.wikipedia.org/wiki/Independent_component_analysis)). PCA +is a technique for dimensionality reduction, i.e. an algorithm to explain +the observed variance in your data using less dimensions. ICA is a source +separation technique, for example to unmix multiple signals that have been +recorded through multiple sensors. Doing a PCA first and then an ICA can be +useful if you have more sensors than signals. For more information see: +[the FastICA example from scikits-learn](https://scikit-learn.org/stable/auto_examples/decomposition/plot_ica_blind_source_separation.html). +::: + +To run it, you also need to download the {download}`ica module `. +In IPython we can time the script: + +```python +In [1]: %run -t demo.py +IPython CPU timings (estimated): + User : 14.3929 s. + System: 0.256016 s. +``` + +and profile it: + +```python +In [2]: %run -p demo.py + 916 function calls in 14.551 CPU seconds +Ordered by: internal time +ncalls tottime percall cumtime percall filename:lineno (function) + 1 14.457 14.457 14.479 14.479 decomp.py:849 (svd) + 1 0.054 0.054 0.054 0.054 {method 'random_sample' of 'mtrand.RandomState' objects} + 1 0.017 0.017 0.021 0.021 function_base.py:645 (asarray_chkfinite) + 54 0.011 0.000 0.011 0.000 {numpy.core._dotblas.dot} + 2 0.005 0.002 0.005 0.002 {method 'any' of 'numpy.ndarray' objects} + 6 0.001 0.000 0.001 0.000 ica.py:195 (gprime) + 6 0.001 0.000 0.001 0.000 ica.py:192 (g) + 14 0.001 0.000 0.001 0.000 {numpy.linalg.lapack_lite.dsyevd} + 19 0.001 0.000 0.001 0.000 twodim_base.py:204 (diag) + 1 0.001 0.001 0.008 0.008 ica.py:69 (_ica_par) + 1 0.001 0.001 14.551 14.551 {execfile} + 107 0.000 0.000 0.001 0.000 defmatrix.py:239 (__array_finalize__) + 7 0.000 0.000 0.004 0.001 ica.py:58 (_sym_decorrelation) + 7 0.000 0.000 0.002 0.000 linalg.py:841 (eigh) + 172 0.000 0.000 0.000 0.000 {isinstance} + 1 0.000 0.000 14.551 14.551 demo.py:1 () + 29 0.000 0.000 0.000 0.000 numeric.py:180 (asarray) + 35 0.000 0.000 0.000 0.000 defmatrix.py:193 (__new__) + 35 0.000 0.000 0.001 0.000 defmatrix.py:43 (asmatrix) + 21 0.000 0.000 0.001 0.000 defmatrix.py:287 (__mul__) + 41 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros} + 28 0.000 0.000 0.000 0.000 {method 'transpose' of 'numpy.ndarray' objects} + 1 0.000 0.000 0.008 0.008 ica.py:97 (fastica) + ... +``` + +Clearly the `svd` (in `decomp.py`) is what takes most of our time, a.k.a. the +bottleneck. We have to find a way to make this step go faster, or to avoid this +step (algorithmic optimization). Spending time on the rest of the code is +useless. + +:::{admonition} **Profiling outside of IPython, running \`\`cProfile\`\`** +Similar profiling can be done outside of IPython, simply calling the +built-in [Python profilers](https://docs.python.org/3/library/profile.html) `cProfile` and +`profile`. + +```console +$ python -m cProfile -o demo.prof demo.py +``` + +Using the `-o` switch will output the profiler results to the file +`demo.prof` to view with an external tool. This can be useful if +you wish to process the profiler output with a visualization tool. +::: + +### Line-profiler + +The profiler tells us which function takes most of the time, but not +where it is called. + +For this, we use the +[line_profiler](https://pypi.org/project/line-profiler/): in the +source file, we decorate a few functions that we want to inspect with +`@profile` (no need to import it) + +```python +@profile +def test(): + rng = np.random.default_rng() + data = rng.random((5000, 100)) + u, s, v = linalg.svd(data) + pca = u[:, :10] @ data + results = fastica(pca.T, whiten=False) +``` + +Then we run the script using the [kernprof](https://pypi.org/project/line-profiler/) command, with switches `-l, --line-by-line` and `-v, --view` to use the line-by-line profiler and view the results in addition to saving them: + +```console +$ kernprof -l -v demo.py + +Wrote profile results to demo.py.lprof +Timer unit: 1e-06 s + +Total time: 1.27874 s +File: demo.py +Function: test at line 9 + +Line # Hits Time Per Hit % Time Line Contents +============================================================== + 9 @profile + 10 def test(): + 11 1 69.0 69.0 0.0 rng = np.random.default_rng() + 12 1 2453.0 2453.0 0.2 data = rng.random((5000, 100)) + 13 1 1274715.0 1274715.0 99.7 u, s, v = sp.linalg.svd(data) + 14 1 413.0 413.0 0.0 pca = u[:, :10].T @ data + 15 1 1094.0 1094.0 0.1 results = fastica(pca.T, whiten=False) +``` + +**The SVD is taking all the time.** We need to optimise this line. + +## Making code go faster + +Once we have identified the bottlenecks, we need to make the +corresponding code go faster. + +### Algorithmic optimization + +The first thing to look for is algorithmic optimization: are there ways +to compute less, or better? + +For a high-level view of the problem, a good understanding of the maths +behind the algorithm helps. However, it is not uncommon to find simple +changes, like **moving computation or memory allocation outside a for +loop**, that bring in big gains. + +#### Example of the SVD + +In both examples above, the SVD - +[Singular Value Decomposition](https://en.wikipedia.org/wiki/Singular_value_decomposition) +\- is what +takes most of the time. Indeed, the computational cost of this algorithm is +roughly $n^3$ in the size of the input matrix. + +However, in both of these example, we are not using all the output of +the SVD, but only the first few rows of its first return argument. If +we use the `svd` implementation of SciPy, we can ask for an incomplete +version of the SVD. Note that implementations of linear algebra in +SciPy are richer then those in NumPy and should be preferred. + +```python +In [3]: %timeit np.linalg.svd(data) +1 loops, best of 3: 14.5 s per loop + +In [4]: import scipy as sp + +In [5]: %timeit sp.linalg.svd(data) +1 loops, best of 3: 14.2 s per loop + +In [6]: %timeit sp.linalg.svd(data, full_matrices=False) +1 loops, best of 3: 295 ms per loop + +In [7]: %timeit np.linalg.svd(data, full_matrices=False) +1 loops, best of 3: 293 ms per loop +``` + +We can then use this insight to {download}`optimize the previous code `: + +```{literalinclude} demo_opt.py +:pyobject: test +``` + +```python +In [1]: import demo + +In [2]: %timeit demo. +demo.fastica demo.np demo.prof.pdf demo.py demo.pyc +demo.linalg demo.prof demo.prof.png demo.py.lprof demo.test + +In [2]: %timeit demo.test() +ica.py:65: RuntimeWarning: invalid value encountered in sqrt + W = (u * np.diag(1.0/np.sqrt(s)) * u.T) * W # W = (W * W.T) ^{-1/2} * W +1 loops, best of 3: 17.5 s per loop + +In [3]: import demo_opt + +In [4]: %timeit demo_opt.test() +1 loops, best of 3: 208 ms per loop +``` + +Real incomplete SVDs, e.g. computing only the first 10 eigenvectors, can +be computed with ARPACK, available in `scipy.sparse.linalg.eigsh`. + +:::{admonition} Computational linear algebra +For certain algorithms, many of the bottlenecks will be linear +algebra computations. In this case, using the right function to solve +the right problem is key. For instance, an eigenvalue problem with a +symmetric matrix is easier to solve than with a general matrix. Also, +most often, you can avoid inverting a matrix and use a less costly +(and more numerically stable) operation. + +Know your computational linear algebra. When in doubt, explore +`scipy.linalg`, and use `%timeit` to try out different alternatives +on your data. +::: + +## Writing faster numerical code + +A complete discussion on advanced use of NumPy is found in chapter +{ref}`advanced-numpy`, or in the article [The NumPy array: a structure for +efficient numerical computation](https://hal.inria.fr/inria-00564007/en). +by van der Walt _et al._ Here we discuss only some commonly encountered tricks +to make code faster. + ++++ + +### Vectorizing for loops + +Find tricks to avoid for loops using NumPy arrays. For this, masks and +indices arrays can be useful. + +### Broadcasting + +Use {ref}`broadcasting ` to do operations on arrays as +small as possible before combining them. + + + +### In place operations + +```{code-cell} +a = np.zeros(10_000_000) + +%timeit global a ; a = 0*a +``` + +```{code-cell} +%timeit global a ; a *= 0 +``` + +**note**: we need `global a` in the `timeit` so that it works as expected, as +otherwise it is assigning to `a`, and thus considers it as a local variable. + +### Be easy on the memory: use views, and not copies + +Copying big arrays is as costly as making simple numerical operations +on them: + +```{code-cell} +a = np.zeros(10_000_000) + +%timeit a.copy() +``` + +```{code-cell} +%timeit a + 1 +``` + +### Beware of cache effects + +Memory access is cheaper when it is grouped: accessing a big array in a +continuous way is much faster than random access. This implies amongst +other things that **smaller strides are faster** (see +{ref}`cache-effects`): + +```{code-cell} +c = np.zeros((5000, 5000), order='C') + +# Row elements are far apart in memory, for C ordering. +%timeit np.median(c, axis=0) +``` + +```{code-cell} +# Column elements are contiguous in memory, for C ordering. +%timeit np.median(c, axis=1) +``` + +```{code-cell} +c.strides +``` + +This is the reason why Fortran ordering or C ordering may make a big +difference on speed of operations: + +```{code-cell} +rng = np.random.default_rng() + +a = rng.random((20, 2**18)) + +b = rng.random((20, 2**18)) + +%timeit b @ a.T +``` + +```{code-cell} +c = np.ascontiguousarray(a.T) + +%timeit b @ c +``` + +Note that copying the data to work around this effect may not be worth it: + +```{code-cell} +%timeit c = np.ascontiguousarray(a.T) +``` + +Using [numexpr](https://github.com/pydata/numexpr) can be useful to +automatically optimize code for such effects. + +### Use compiled code + +The last resort, once you are sure that all the high-level optimizations have +been explored, is to transfer the hot spots, i.e. the few lines or functions +in which most of the time is spent, to compiled code. For compiled code, the +preferred option is to use [Cython](https://www.cython.org): it is easy to +transform exiting Python code in compiled code, and with a good use of the +[NumPy support](https://docs.cython.org/en/latest/src/tutorial/numpy.html) +yields efficient code on NumPy arrays, for instance by unrolling loops. + +:::{warning} +For all the above: profile and time your choices. Don't base your +optimization on theoretical considerations. +::: + +## Additional Links + +- If you need to profile memory usage, you could try the + [memory_profiler](https://pypi.org/project/memory-profiler) +- If you need to profile down into C extensions, you could try using + [gperftools](https://github.com/gperftools/gperftools) from Python with + [yep](https://pypi.org/project/yep). +- If you would like to track performance of your code across time, i.e. as you + make new commits to your repository, you could try: + [asv](https://asv.readthedocs.io/en/stable/) +- If you need some interactive visualization why not try + [RunSnakeRun](https://www.vrplumber.com/programming/runsnakerun/) diff --git a/advanced/optimizing/index.rst b/advanced/optimizing/index.rst deleted file mode 100644 index 3d0b73304..000000000 --- a/advanced/optimizing/index.rst +++ /dev/null @@ -1,441 +0,0 @@ -.. _optimizing_code_chapter: - -================= -Optimizing code -================= - -.. sidebar:: Donald Knuth - - *“Premature optimization is the root of all evil”* - -**Author**: *Gaël Varoquaux* - -This chapter deals with strategies to make Python code go faster. - -.. topic:: Prerequisites - - * `line_profiler `_ - -.. contents:: Chapters contents - :local: - :depth: 4 - - -Optimization workflow -====================== - -#. Make it work: write the code in a simple **legible** ways. - -#. Make it work reliably: write automated test cases, make really sure - that your algorithm is right and that if you break it, the tests will - capture the breakage. - -#. Optimize the code by profiling simple use-cases to find the - bottlenecks and speeding up these bottleneck, finding a better - algorithm or implementation. Keep in mind that a trade off should be - found between profiling on a realistic example and the simplicity and - speed of execution of the code. For efficient work, it is best to work - with profiling runs lasting around 10s. - - -Profiling Python code -===================== - -.. topic:: **No optimization without measuring!** - - * **Measure:** profiling, timing - - * You'll have surprises: the fastest code is not always what you - think - - -Timeit ---------- - -In IPython, use ``timeit`` (https://docs.python.org/3/library/timeit.html) to time elementary operations: - -.. ipython:: - - In [1]: import numpy as np - - In [2]: a = np.arange(1000) - - In [3]: %timeit a ** 2 - 100000 loops, best of 3: 5.73 us per loop - - In [4]: %timeit a ** 2.1 - 1000 loops, best of 3: 154 us per loop - - In [5]: %timeit a * a - 100000 loops, best of 3: 5.56 us per loop - -Use this to guide your choice between strategies. - -.. note:: - - For long running calls, using ``%time`` instead of ``%timeit``; it is - less precise but faster - -Profiler ------------ - -Useful when you have a large program to profile, for example the -:download:`following file `: - -.. literalinclude:: demo.py - - -.. note:: - This is a combination of two unsupervised learning techniques, principal - component analysis (`PCA - `_) and - independent component analysis - (`ICA `_). PCA - is a technique for dimensionality reduction, i.e. an algorithm to explain - the observed variance in your data using less dimensions. ICA is a source - separation technique, for example to unmix multiple signals that have been - recorded through multiple sensors. Doing a PCA first and then an ICA can be - useful if you have more sensors than signals. For more information see: - `the FastICA example from scikits-learn `_. - -To run it, you also need to download the :download:`ica module `. -In IPython we can time the script: - -.. ipython:: - :verbatim: - - In [1]: %run -t demo.py - IPython CPU timings (estimated): - User : 14.3929 s. - System: 0.256016 s. - -and profile it: - -.. ipython:: - :verbatim: - - In [2]: %run -p demo.py - 916 function calls in 14.551 CPU seconds - Ordered by: internal time - ncalls tottime percall cumtime percall filename:lineno (function) - 1 14.457 14.457 14.479 14.479 decomp.py:849 (svd) - 1 0.054 0.054 0.054 0.054 {method 'random_sample' of 'mtrand.RandomState' objects} - 1 0.017 0.017 0.021 0.021 function_base.py:645 (asarray_chkfinite) - 54 0.011 0.000 0.011 0.000 {numpy.core._dotblas.dot} - 2 0.005 0.002 0.005 0.002 {method 'any' of 'numpy.ndarray' objects} - 6 0.001 0.000 0.001 0.000 ica.py:195 (gprime) - 6 0.001 0.000 0.001 0.000 ica.py:192 (g) - 14 0.001 0.000 0.001 0.000 {numpy.linalg.lapack_lite.dsyevd} - 19 0.001 0.000 0.001 0.000 twodim_base.py:204 (diag) - 1 0.001 0.001 0.008 0.008 ica.py:69 (_ica_par) - 1 0.001 0.001 14.551 14.551 {execfile} - 107 0.000 0.000 0.001 0.000 defmatrix.py:239 (__array_finalize__) - 7 0.000 0.000 0.004 0.001 ica.py:58 (_sym_decorrelation) - 7 0.000 0.000 0.002 0.000 linalg.py:841 (eigh) - 172 0.000 0.000 0.000 0.000 {isinstance} - 1 0.000 0.000 14.551 14.551 demo.py:1 () - 29 0.000 0.000 0.000 0.000 numeric.py:180 (asarray) - 35 0.000 0.000 0.000 0.000 defmatrix.py:193 (__new__) - 35 0.000 0.000 0.001 0.000 defmatrix.py:43 (asmatrix) - 21 0.000 0.000 0.001 0.000 defmatrix.py:287 (__mul__) - 41 0.000 0.000 0.000 0.000 {numpy.core.multiarray.zeros} - 28 0.000 0.000 0.000 0.000 {method 'transpose' of 'numpy.ndarray' objects} - 1 0.000 0.000 0.008 0.008 ica.py:97 (fastica) - ... - -Clearly the ``svd`` (in `decomp.py`) is what takes most of our time, a.k.a. the -bottleneck. We have to find a way to make this step go faster, or to avoid this -step (algorithmic optimization). Spending time on the rest of the code is -useless. - -.. topic:: **Profiling outside of IPython, running ``cProfile``** - - Similar profiling can be done outside of IPython, simply calling the - built-in `Python profilers - `_ ``cProfile`` and - ``profile``. - - .. sourcecode:: console - - $ python -m cProfile -o demo.prof demo.py - - Using the ``-o`` switch will output the profiler results to the file - ``demo.prof`` to view with an external tool. This can be useful if - you wish to process the profiler output with a visualization tool. - - -Line-profiler --------------- - -The profiler tells us which function takes most of the time, but not -where it is called. - -For this, we use the -`line_profiler `_: in the -source file, we decorate a few functions that we want to inspect with -``@profile`` (no need to import it) - -.. sourcecode:: python - - @profile - def test(): - rng = np.random.default_rng() - data = rng.random((5000, 100)) - u, s, v = linalg.svd(data) - pca = u[:, :10] @ data - results = fastica(pca.T, whiten=False) - -Then we run the script using the `kernprof -`_ command, with switches ``-l, --line-by-line`` and ``-v, --view`` to use the line-by-line profiler and view the results in addition to saving them: - -.. sourcecode:: console - - $ kernprof -l -v demo.py - - Wrote profile results to demo.py.lprof - Timer unit: 1e-06 s - - Total time: 1.27874 s - File: demo.py - Function: test at line 9 - - Line # Hits Time Per Hit % Time Line Contents - ============================================================== - 9 @profile - 10 def test(): - 11 1 69.0 69.0 0.0 rng = np.random.default_rng() - 12 1 2453.0 2453.0 0.2 data = rng.random((5000, 100)) - 13 1 1274715.0 1274715.0 99.7 u, s, v = sp.linalg.svd(data) - 14 1 413.0 413.0 0.0 pca = u[:, :10].T @ data - 15 1 1094.0 1094.0 0.1 results = fastica(pca.T, whiten=False) - -**The SVD is taking all the time.** We need to optimise this line. - - -Making code go faster -====================== - -Once we have identified the bottlenecks, we need to make the -corresponding code go faster. - -Algorithmic optimization -------------------------- - -The first thing to look for is algorithmic optimization: are there ways -to compute less, or better? - -For a high-level view of the problem, a good understanding of the maths -behind the algorithm helps. However, it is not uncommon to find simple -changes, like **moving computation or memory allocation outside a for -loop**, that bring in big gains. - -Example of the SVD -................... - -In both examples above, the SVD - -`Singular Value Decomposition `_ -- is what -takes most of the time. Indeed, the computational cost of this algorithm is -roughly :math:`n^3` in the size of the input matrix. - -However, in both of these example, we are not using all the output of -the SVD, but only the first few rows of its first return argument. If -we use the ``svd`` implementation of SciPy, we can ask for an incomplete -version of the SVD. Note that implementations of linear algebra in -SciPy are richer then those in NumPy and should be preferred. - -.. ipython:: - :verbatim: - - In [3]: %timeit np.linalg.svd(data) - 1 loops, best of 3: 14.5 s per loop - - In [4]: import scipy as sp - - In [5]: %timeit sp.linalg.svd(data) - 1 loops, best of 3: 14.2 s per loop - - In [6]: %timeit sp.linalg.svd(data, full_matrices=False) - 1 loops, best of 3: 295 ms per loop - - In [7]: %timeit np.linalg.svd(data, full_matrices=False) - 1 loops, best of 3: 293 ms per loop - -We can then use this insight to :download:`optimize the previous code `: - -.. literalinclude:: demo_opt.py - :pyobject: test - -.. ipython:: - :verbatim: - - In [1]: import demo - - In [2]: %timeit demo. - demo.fastica demo.np demo.prof.pdf demo.py demo.pyc - demo.linalg demo.prof demo.prof.png demo.py.lprof demo.test - - In [2]: %timeit demo.test() - ica.py:65: RuntimeWarning: invalid value encountered in sqrt - W = (u * np.diag(1.0/np.sqrt(s)) * u.T) * W # W = (W * W.T) ^{-1/2} * W - 1 loops, best of 3: 17.5 s per loop - - In [3]: import demo_opt - - In [4]: %timeit demo_opt.test() - 1 loops, best of 3: 208 ms per loop - -Real incomplete SVDs, e.g. computing only the first 10 eigenvectors, can -be computed with arpack, available in ``scipy.sparse.linalg.eigsh``. - -.. topic:: Computational linear algebra - - For certain algorithms, many of the bottlenecks will be linear - algebra computations. In this case, using the right function to solve - the right problem is key. For instance, an eigenvalue problem with a - symmetric matrix is easier to solve than with a general matrix. Also, - most often, you can avoid inverting a matrix and use a less costly - (and more numerically stable) operation. - - Know your computational linear algebra. When in doubt, explore - ``scipy.linalg``, and use ``%timeit`` to try out different alternatives - on your data. - -Writing faster numerical code -=============================== - -A complete discussion on advanced use of NumPy is found in chapter -:ref:`advanced_numpy`, or in the article `The NumPy array: a structure -for efficient numerical computation -`_ -by van der Walt et al. Here we -discuss only some commonly encountered tricks to make code faster. - -* **Vectorizing for loops** - - Find tricks to avoid for loops using NumPy arrays. For this, masks and - indices arrays can be useful. - -* **Broadcasting** - - Use :ref:`broadcasting ` to do operations on arrays as - small as possible before combining them. - -.. XXX: complement broadcasting in the NumPy chapter with the example of - the 3D grid - -* **In place operations** - - .. ipython:: - :verbatim: - - In [1]: a = np.zeros(1e7) - - In [2]: %timeit global a ; a = 0*a - 10 loops, best of 3: 111 ms per loop - - In [3]: %timeit global a ; a *= 0 - 10 loops, best of 3: 48.4 ms per loop - - **note**: we need `global a` in the timeit so that it work, as it is - assigning to `a`, and thus considers it as a local variable. - -* **Be easy on the memory: use views, and not copies** - - Copying big arrays is as costly as making simple numerical operations - on them: - - .. ipython:: - :verbatim: - - In [1]: a = np.zeros(1e7) - - In [2]: %timeit a.copy() - 10 loops, best of 3: 124 ms per loop - - In [3]: %timeit a + 1 - 10 loops, best of 3: 112 ms per loop - -* **Beware of cache effects** - - Memory access is cheaper when it is grouped: accessing a big array in a - continuous way is much faster than random access. This implies amongst - other things that **smaller strides are faster** (see - :ref:`cache_effects`): - - .. ipython:: - :verbatim: - - In [1]: c = np.zeros((1e4, 1e4), order='C') - - In [2]: %timeit c.sum(axis=0) - 1 loops, best of 3: 3.89 s per loop - - In [3]: %timeit c.sum(axis=1) - 1 loops, best of 3: 188 ms per loop - - In [4]: c.strides - Out[4]: (80000, 8) - - This is the reason why Fortran ordering or C ordering may make a big - difference on operations: - - .. ipython:: - - In [5]: rng = np.random.default_rng() - - In [6]: a = rng.random((20, 2**18)) - - In [7]: b = rng.random((20, 2**18)) - - In [8]: %timeit b @ a.T - 1 loops, best of 3: 194 ms per loop - - In [9]: c = np.ascontiguousarray(a.T) - - In [10]: %timeit b @ c - 10 loops, best of 3: 84.2 ms per loop - - Note that copying the data to work around this effect may not be worth it: - - .. ipython:: - - In [11]: %timeit c = np.ascontiguousarray(a.T) - 10 loops, best of 3: 106 ms per loop - - Using `numexpr `_ can be useful to - automatically optimize code for such effects. - -* **Use compiled code** - - The last resort, once you are sure that all the high-level - optimizations have been explored, is to transfer the hot spots, i.e. - the few lines or functions in which most of the time is spent, to - compiled code. For compiled code, the preferred option is to use - `Cython `_: it is easy to transform exiting - Python code in compiled code, and with a good use of the - `NumPy support `_ - yields efficient code on NumPy arrays, for instance by unrolling loops. - -.. warning:: - - For all the above: profile and time your choices. Don't base your - optimization on theoretical considerations. - -Additional Links ----------------- - -* If you need to profile memory usage, you could try the `memory_profiler - `_ - -* If you need to profile down into C extensions, you could try using - `gperftools `_ - from Python with - `yep `_. - -* If you would like to track performance of your code across time, i.e. as you - make new commits to your repository, you could try: - `asv `_ - -* If you need some interactive visualization why not try `RunSnakeRun - `_ diff --git a/advanced/scipy_sparse/bsr_array.md b/advanced/scipy_sparse/bsr_array.md new file mode 100644 index 000000000..95ead58fe --- /dev/null +++ b/advanced/scipy_sparse/bsr_array.md @@ -0,0 +1,122 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import scipy as sp +``` + +# Block Compressed Row Format (BSR) + +- basically a CSR with dense sub-matrices of fixed shape instead of scalar + items + + - block size `(R, C)` must evenly divide the shape of the matrix `(M, N)` + - three NumPy arrays: `indices`, `indptr`, `data` + + - `indices` is array of column indices for each block + + - `data` is array of corresponding nonzero values of shape `(nnz, R, C)` + + - ... + + - subclass of {class}`_cs_matrix` (common CSR/CSC functionality) + - subclass of {class}`_data_matrix` (sparse matrix classes with + `.data` attribute) + +- fast matrix vector products and other arithmetic (sparsetools) +- constructor accepts: + - dense array/matrix + - sparse array/matrix + - shape tuple (create empty array) + - `(data, coords)` tuple + - `(data, indices, indptr)` tuple +- many arithmetic operations considerably more efficient than CSR for + sparse matrices with dense sub-matrices +- use: + - like CSR + - vector-valued finite element discretizations + +## Examples + +### Create empty BSR array with (1, 1) block size (like CSR...): + +```{code-cell} +mtx = sp.sparse.bsr_array((3, 4), dtype=np.int8) +mtx +``` + +```{code-cell} +mtx.toarray() +``` + +### Create empty BSR array with (3, 2) block size: + +```{code-cell} +mtx = sp.sparse.bsr_array((3, 4), blocksize=(3, 2), dtype=np.int8) +mtx +``` + +```{code-cell} +mtx.toarray() +``` + + + +### Create using `(data, coords)` tuple with (1, 1) block size (like CSR...): + +```{code-cell} +row = np.array([0, 0, 1, 2, 2, 2]) +col = np.array([0, 2, 2, 0, 1, 2]) +data = np.array([1, 2, 3, 4, 5, 6]) +mtx = sp.sparse.bsr_array((data, (row, col)), shape=(3, 3)) +mtx +``` + +```{code-cell} +mtx.toarray() +``` + +```{code-cell} +mtx.data +``` + +```{code-cell} +mtx.indices +``` + +```{code-cell} +mtx.indptr +``` + +### Create using `(data, indices, indptr)` tuple with (2, 2) block size: + +```{code-cell} +indptr = np.array([0, 2, 3, 6]) +indices = np.array([0, 2, 2, 0, 1, 2]) +data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2) +mtx = sp.sparse.bsr_array((data, indices, indptr), shape=(6, 6)) +mtx.toarray() +``` + +```{code-cell} +data +``` diff --git a/advanced/scipy_sparse/bsr_array.rst b/advanced/scipy_sparse/bsr_array.rst deleted file mode 100644 index a01d26436..000000000 --- a/advanced/scipy_sparse/bsr_array.rst +++ /dev/null @@ -1,118 +0,0 @@ -.. For doctests - >>> import numpy as np - >>> import scipy as sp - - -Block Compressed Row Format (BSR) -================================= - -* basically a CSR with dense sub-matrices of fixed shape instead of scalar items - * block size `(R, C)` must evenly divide the shape of the matrix `(M, N)` - * three NumPy arrays: `indices`, `indptr`, `data` - * `indices` is array of column indices for each block - * `data` is array of corresponding nonzero values of shape `(nnz, R, C)` - * ... - * subclass of :class:`_cs_matrix` (common CSR/CSC functionality) - * subclass of :class:`_data_matrix` (sparse matrix classes with - `.data` attribute) -* fast matrix vector products and other arithmetic (sparsetools) -* constructor accepts: - * dense array/matrix - * sparse array/matrix - * shape tuple (create empty array) - * `(data, coords)` tuple - * `(data, indices, indptr)` tuple -* many arithmetic operations considerably more efficient than CSR for - sparse matrices with dense sub-matrices -* use: - * like CSR - * vector-valued finite element discretizations - -Examples --------- - -* create empty BSR array with (1, 1) block size (like CSR...):: - - >>> mtx = sp.sparse.bsr_array((3, 4), dtype=np.int8) - >>> mtx - - >>> mtx.toarray() - array([[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], dtype=int8) - -* create empty BSR array with (3, 2) block size:: - - >>> mtx = sp.sparse.bsr_array((3, 4), blocksize=(3, 2), dtype=np.int8) - >>> mtx - - >>> mtx.toarray() - array([[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], dtype=int8) - - * a bug? - -* create using `(data, coords)` tuple with (1, 1) block size (like CSR...):: - - >>> row = np.array([0, 0, 1, 2, 2, 2]) - >>> col = np.array([0, 2, 2, 0, 1, 2]) - >>> data = np.array([1, 2, 3, 4, 5, 6]) - >>> mtx = sp.sparse.bsr_array((data, (row, col)), shape=(3, 3)) - >>> mtx - - >>> mtx.toarray() - array([[1, 0, 2], - [0, 0, 3], - [4, 5, 6]]...) - >>> mtx.data - array([[[1]], - - [[2]], - - [[3]], - - [[4]], - - [[5]], - - [[6]]]...) - >>> mtx.indices - array([0, 2, 2, 0, 1, 2]) - >>> mtx.indptr - array([0, 2, 3, 6]) - -* create using `(data, indices, indptr)` tuple with (2, 2) block size:: - - >>> indptr = np.array([0, 2, 3, 6]) - >>> indices = np.array([0, 2, 2, 0, 1, 2]) - >>> data = np.array([1, 2, 3, 4, 5, 6]).repeat(4).reshape(6, 2, 2) - >>> mtx = sp.sparse.bsr_array((data, indices, indptr), shape=(6, 6)) - >>> mtx.toarray() - array([[1, 1, 0, 0, 2, 2], - [1, 1, 0, 0, 2, 2], - [0, 0, 0, 0, 3, 3], - [0, 0, 0, 0, 3, 3], - [4, 4, 5, 5, 6, 6], - [4, 4, 5, 5, 6, 6]]) - >>> data - array([[[1, 1], - [1, 1]], - - [[2, 2], - [2, 2]], - - [[3, 3], - [3, 3]], - - [[4, 4], - [4, 4]], - - [[5, 5], - [5, 5]], - - [[6, 6], - [6, 6]]]) diff --git a/advanced/scipy_sparse/coo_array.md b/advanced/scipy_sparse/coo_array.md new file mode 100644 index 000000000..13d5e522b --- /dev/null +++ b/advanced/scipy_sparse/coo_array.md @@ -0,0 +1,90 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import scipy as sp +``` + +# Coordinate Format (COO) + +- also known as the 'ijv' or 'triplet' format + - three NumPy arrays: `row`, `col`, `data`. + - attribute `coords` is the tuple `(row, col)` + - `data[i]` is value at `(row[i], col[i])` position + - permits duplicate entries + - subclass of {class}`_data_matrix` (sparse matrix classes with + `.data` attribute) +- fast format for constructing sparse arrays +- constructor accepts: + - dense array/matrix + - sparse array/matrix + - shape tuple (create empty matrix) + - `(data, coords)` tuple +- very fast conversion to and from CSR/CSC formats +- fast matrix \* vector (sparsetools) +- fast and easy item-wise operations + - manipulate data array directly (fast NumPy machinery) +- no slicing, no arithmetic (directly, converts to CSR) +- use: + + - facilitates fast conversion among sparse formats + + - when converting to other format (usually CSR or CSC), duplicate + entries are summed together + + - facilitates efficient construction of finite element matrices + +## Examples + +### Create empty COO array: + +```{code-cell} +mtx = sp.sparse.coo_array((3, 4), dtype=np.int8) +mtx.toarray() +``` + +### Create using `(data, ij)` tuple: + +```{code-cell} +row = np.array([0, 3, 1, 0]) +col = np.array([0, 3, 1, 2]) +data = np.array([4, 5, 7, 9]) +mtx = sp.sparse.coo_array((data, (row, col)), shape=(4, 4)) +mtx +``` + +```{code-cell} +mtx.toarray() +``` + +**Note**: duplicate entries are summed together: + +```{code-cell} +row = np.array([0, 0, 1, 3, 1, 0, 0]) +col = np.array([0, 2, 1, 3, 1, 0, 0]) +data = np.array([1, 1, 1, 1, 1, 1, 1]) +mtx = sp.sparse.coo_array((data, (row, col)), shape=(4, 4)) +mtx.toarray() +``` + +**Note**: no slicing...: + +```{code-cell} +:tags: [raises-exception] + +mtx[2, 3] +``` diff --git a/advanced/scipy_sparse/coo_array.rst b/advanced/scipy_sparse/coo_array.rst deleted file mode 100644 index 595178eaf..000000000 --- a/advanced/scipy_sparse/coo_array.rst +++ /dev/null @@ -1,77 +0,0 @@ -.. for doctests - >>> import numpy as np - >>> import scipy as sp - - -Coordinate Format (COO) -======================= - -* also known as the 'ijv' or 'triplet' format - * three NumPy arrays: `row`, `col`, `data`. - * attribute `coords` is the tuple `(row, col)` - * `data[i]` is value at `(row[i], col[i])` position - * permits duplicate entries - * subclass of :class:`_data_matrix` (sparse matrix classes with - `.data` attribute) -* fast format for constructing sparse arrays -* constructor accepts: - * dense array/matrix - * sparse array/matrix - * shape tuple (create empty matrix) - * `(data, coords)` tuple -* very fast conversion to and from CSR/CSC formats -* fast matrix * vector (sparsetools) -* fast and easy item-wise operations - * manipulate data array directly (fast NumPy machinery) -* no slicing, no arithmetic (directly, converts to CSR) -* use: - * facilitates fast conversion among sparse formats - * when converting to other format (usually CSR or CSC), duplicate - entries are summed together - - * facilitates efficient construction of finite element matrices - -Examples --------- - -* create empty COO array:: - - >>> mtx = sp.sparse.coo_array((3, 4), dtype=np.int8) - >>> mtx.toarray() - array([[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], dtype=int8) - -* create using `(data, ij)` tuple:: - - >>> row = np.array([0, 3, 1, 0]) - >>> col = np.array([0, 3, 1, 2]) - >>> data = np.array([4, 5, 7, 9]) - >>> mtx = sp.sparse.coo_array((data, (row, col)), shape=(4, 4)) - >>> mtx - - >>> mtx.toarray() - array([[4, 0, 9, 0], - [0, 7, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 5]]) - -* duplicates entries are summed together:: - - >>> row = np.array([0, 0, 1, 3, 1, 0, 0]) - >>> col = np.array([0, 2, 1, 3, 1, 0, 0]) - >>> data = np.array([1, 1, 1, 1, 1, 1, 1]) - >>> mtx = sp.sparse.coo_array((data, (row, col)), shape=(4, 4)) - >>> mtx.toarray() - array([[3, 0, 1, 0], - [0, 2, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 1]]) - -* no slicing...:: - - >>> mtx[2, 3] - Traceback (most recent call last): - ... - TypeError: 'coo_array' object ... diff --git a/advanced/scipy_sparse/csc_array.md b/advanced/scipy_sparse/csc_array.md new file mode 100644 index 000000000..5661cf8e2 --- /dev/null +++ b/advanced/scipy_sparse/csc_array.md @@ -0,0 +1,93 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import scipy as sp +``` + +# Compressed Sparse Column Format (CSC) + +- column oriented + - three NumPy arrays: `indices`, `indptr`, `data` + - `indices` is array of row indices + - `data` is array of corresponding nonzero values + - `indptr` points to column starts in `indices` and `data` + - length is `n_col + 1`, last item = number of values = length of both + `indices` and `data` + - nonzero values of the `i`-th column are `data[indptr[i]:indptr[i+1]]` + with row indices `indices[indptr[i]:indptr[i+1]]` + - item `(i, j)` can be accessed as `data[indptr[j]+k]`, where `k` is + position of `i` in `indices[indptr[j]:indptr[j+1]]` + - subclass of {class}`_cs_matrix` (common CSR/CSC functionality) + - subclass of {class}`_data_matrix` (sparse array classes with `.data` + attribute) +- fast matrix vector products and other arithmetic (sparsetools) +- constructor accepts: + - dense array/matrix + - sparse array/matrix + - shape tuple (create empty array) + - `(data, coords)` tuple + - `(data, indices, indptr)` tuple +- efficient column slicing, column-oriented operations +- slow row slicing, expensive changes to the sparsity structure +- use: + - actual computations (most linear solvers support this format) + +## Examples + +- create empty CSC array: + +```{code-cell} +mtx = sp.sparse.csc_array((3, 4), dtype=np.int8) +mtx.toarray() +``` + +### Create using `(data, coords)` tuple: + +```{code-cell} +row = np.array([0, 0, 1, 2, 2, 2]) +col = np.array([0, 2, 2, 0, 1, 2]) +data = np.array([1, 2, 3, 4, 5, 6]) +mtx = sp.sparse.csc_array((data, (row, col)), shape=(3, 3)) +mtx +``` + +```{code-cell} +mtx.toarray() +``` + +```{code-cell} +mtx.data +``` + +```{code-cell} +mtx.indices +``` + +```{code-cell} +mtx.indptr +``` + +### Create using `(data, indices, indptr)` tuple: + +```{code-cell} +data = np.array([1, 4, 5, 2, 3, 6]) +indices = np.array([0, 2, 2, 0, 1, 2]) +indptr = np.array([0, 2, 3, 6]) +mtx = sp.sparse.csc_array((data, indices, indptr), shape=(3, 3)) +mtx.toarray() +``` diff --git a/advanced/scipy_sparse/csc_array.rst b/advanced/scipy_sparse/csc_array.rst deleted file mode 100644 index 3b709733c..000000000 --- a/advanced/scipy_sparse/csc_array.rst +++ /dev/null @@ -1,75 +0,0 @@ -.. For doctests - >>> import numpy as np - >>> import scipy as sp - - -Compressed Sparse Column Format (CSC) -===================================== - -* column oriented - * three NumPy arrays: `indices`, `indptr`, `data` - * `indices` is array of row indices - * `data` is array of corresponding nonzero values - * `indptr` points to column starts in `indices` and `data` - * length is `n_col + 1`, last item = number of values = length of both - `indices` and `data` - * nonzero values of the `i`-th column are `data[indptr[i]:indptr[i+1]]` - with row indices `indices[indptr[i]:indptr[i+1]]` - * item `(i, j)` can be accessed as `data[indptr[j]+k]`, where `k` is - position of `i` in `indices[indptr[j]:indptr[j+1]]` - * subclass of :class:`_cs_matrix` (common CSR/CSC functionality) - * subclass of :class:`_data_matrix` (sparse array classes with - `.data` attribute) -* fast matrix vector products and other arithmetic (sparsetools) -* constructor accepts: - * dense array/matrix - * sparse array/matrix - * shape tuple (create empty array) - * `(data, coords)` tuple - * `(data, indices, indptr)` tuple -* efficient column slicing, column-oriented operations -* slow row slicing, expensive changes to the sparsity structure -* use: - * actual computations (most linear solvers support this format) - -Examples --------- - -* create empty CSC array:: - - >>> mtx = sp.sparse.csc_array((3, 4), dtype=np.int8) - >>> mtx.toarray() - array([[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], dtype=int8) - -* create using `(data, coords)` tuple:: - - >>> row = np.array([0, 0, 1, 2, 2, 2]) - >>> col = np.array([0, 2, 2, 0, 1, 2]) - >>> data = np.array([1, 2, 3, 4, 5, 6]) - >>> mtx = sp.sparse.csc_array((data, (row, col)), shape=(3, 3)) - >>> mtx - - >>> mtx.toarray() - array([[1, 0, 2], - [0, 0, 3], - [4, 5, 6]]...) - >>> mtx.data - array([1, 4, 5, 2, 3, 6]...) - >>> mtx.indices - array([0, 2, 2, 0, 1, 2]) - >>> mtx.indptr - array([0, 2, 3, 6]) - -* create using `(data, indices, indptr)` tuple:: - - >>> data = np.array([1, 4, 5, 2, 3, 6]) - >>> indices = np.array([0, 2, 2, 0, 1, 2]) - >>> indptr = np.array([0, 2, 3, 6]) - >>> mtx = sp.sparse.csc_array((data, indices, indptr), shape=(3, 3)) - >>> mtx.toarray() - array([[1, 0, 2], - [0, 0, 3], - [4, 5, 6]]) diff --git a/advanced/scipy_sparse/csr_array.md b/advanced/scipy_sparse/csr_array.md new file mode 100644 index 000000000..2dfa5d0b7 --- /dev/null +++ b/advanced/scipy_sparse/csr_array.md @@ -0,0 +1,93 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import scipy as sp +``` + +# Compressed Sparse Row Format (CSR) + +- row oriented + - three NumPy arrays: `indices`, `indptr`, `data` + - `indices` is array of column indices + - `data` is array of corresponding nonzero values + - `indptr` points to row starts in `indices` and `data` + - length of `indptr` is `n_row + 1`, + last item = number of values = length of both `indices` and `data` + - nonzero values of the `i`-th row are `data[indptr[i]:indptr[i + 1]]` + with column indices `indices[indptr[i]:indptr[i + 1]]` + - item `(i, j)` can be accessed as `data[indptr[i] + k]`, where `k` is + position of `j` in `indices[indptr[i]:indptr[i + 1]]` + - subclass of {class}`_cs_matrix` (common CSR/CSC functionality) + - subclass of {class}`_data_matrix` (sparse array classes with + `.data` attribute) +- fast matrix vector products and other arithmetic (sparsetools) +- constructor accepts: + - dense array/matrix + - sparse array/matrix + - shape tuple (create empty array) + - `(data, coords)` tuple + - `(data, indices, indptr)` tuple +- efficient row slicing, row-oriented operations +- slow column slicing, expensive changes to the sparsity structure +- use: + - actual computations (most linear solvers support this format) + +## Examples + +### Create empty CSR array: + +```{code-cell} +mtx = sp.sparse.csr_array((3, 4), dtype=np.int8) +mtx.toarray() +``` + +### Create using `(data, coords)` tuple: + +```{code-cell} +row = np.array([0, 0, 1, 2, 2, 2]) +col = np.array([0, 2, 2, 0, 1, 2]) +data = np.array([1, 2, 3, 4, 5, 6]) +mtx = sp.sparse.csr_array((data, (row, col)), shape=(3, 3)) +mtx +``` + +```{code-cell} +mtx.toarray() +``` + +```{code-cell} +mtx.data +``` + +```{code-cell} +mtx.indices +``` + +```{code-cell} +mtx.indptr +``` + +### Create using `(data, indices, indptr)` tuple: + +```{code-cell} +data = np.array([1, 2, 3, 4, 5, 6]) +indices = np.array([0, 2, 2, 0, 1, 2]) +indptr = np.array([0, 2, 3, 6]) +mtx = sp.sparse.csr_array((data, indices, indptr), shape=(3, 3)) +mtx.toarray() +``` diff --git a/advanced/scipy_sparse/csr_array.rst b/advanced/scipy_sparse/csr_array.rst deleted file mode 100644 index f8d997b3e..000000000 --- a/advanced/scipy_sparse/csr_array.rst +++ /dev/null @@ -1,74 +0,0 @@ -.. for doctests - >>> import numpy as np - >>> import scipy as sp - -Compressed Sparse Row Format (CSR) -================================== - -* row oriented - * three NumPy arrays: `indices`, `indptr`, `data` - * `indices` is array of column indices - * `data` is array of corresponding nonzero values - * `indptr` points to row starts in `indices` and `data` - * length of `indptr` is `n_row + 1`, - last item = number of values = length of both `indices` and `data` - * nonzero values of the `i`-th row are `data[indptr[i]:indptr[i + 1]]` - with column indices `indices[indptr[i]:indptr[i + 1]]` - * item `(i, j)` can be accessed as `data[indptr[i] + k]`, where `k` is - position of `j` in `indices[indptr[i]:indptr[i + 1]]` - * subclass of :class:`_cs_matrix` (common CSR/CSC functionality) - * subclass of :class:`_data_matrix` (sparse array classes with - `.data` attribute) -* fast matrix vector products and other arithmetic (sparsetools) -* constructor accepts: - * dense array/matrix - * sparse array/matrix - * shape tuple (create empty array) - * `(data, coords)` tuple - * `(data, indices, indptr)` tuple -* efficient row slicing, row-oriented operations -* slow column slicing, expensive changes to the sparsity structure -* use: - * actual computations (most linear solvers support this format) - -Examples --------- - -* create empty CSR array:: - - >>> mtx = sp.sparse.csr_array((3, 4), dtype=np.int8) - >>> mtx.toarray() - array([[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]], dtype=int8) - -* create using `(data, coords)` tuple:: - - >>> row = np.array([0, 0, 1, 2, 2, 2]) - >>> col = np.array([0, 2, 2, 0, 1, 2]) - >>> data = np.array([1, 2, 3, 4, 5, 6]) - >>> mtx = sp.sparse.csr_array((data, (row, col)), shape=(3, 3)) - >>> mtx - - >>> mtx.toarray() - array([[1, 0, 2], - [0, 0, 3], - [4, 5, 6]]...) - >>> mtx.data - array([1, 2, 3, 4, 5, 6]...) - >>> mtx.indices - array([0, 2, 2, 0, 1, 2]) - >>> mtx.indptr - array([0, 2, 3, 6]) - -* create using `(data, indices, indptr)` tuple:: - - >>> data = np.array([1, 2, 3, 4, 5, 6]) - >>> indices = np.array([0, 2, 2, 0, 1, 2]) - >>> indptr = np.array([0, 2, 3, 6]) - >>> mtx = sp.sparse.csr_array((data, indices, indptr), shape=(3, 3)) - >>> mtx.toarray() - array([[1, 0, 2], - [0, 0, 3], - [4, 5, 6]]) diff --git a/advanced/scipy_sparse/dia_array.md b/advanced/scipy_sparse/dia_array.md new file mode 100644 index 000000000..519628d04 --- /dev/null +++ b/advanced/scipy_sparse/dia_array.md @@ -0,0 +1,115 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import scipy as sp +``` + +# Diagonal Format (DIA) + +- very simple scheme +- diagonals in dense NumPy array of shape `(n_diag, length)` + - fixed length -> waste space a bit when far from main diagonal + - subclass of {class}`_data_matrix` (sparse array classes with + `.data` attribute) +- offset for each diagonal + - 0 is the main diagonal + - negative offset = below + - positive offset = above +- fast matrix \* vector (sparsetools) +- fast and easy item-wise operations + - manipulate data array directly (fast NumPy machinery) +- constructor accepts: + - dense array/matrix + - sparse array/matrix + - shape tuple (create empty array) + - `(data, offsets)` tuple +- no slicing, no individual item access +- use: + - rather specialized + - solving PDEs by finite differences + - with an iterative solver + +## Examples + +### Create some DIA arrays: + +```{code-cell} +data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0) +data +``` + +```{code-cell} +offsets = np.array([0, -1, 2]) +mtx = sp.sparse.dia_array((data, offsets), shape=(4, 4)) +mtx +``` + +```{code-cell} +mtx.toarray() +``` + +```{code-cell} +data = np.arange(12).reshape((3, 4)) + 1 +data +``` + +```{code-cell} +mtx = sp.sparse.dia_array((data, offsets), shape=(4, 4)) +mtx.data +``` + +```{code-cell} +mtx.offsets +``` + +```{code-cell} +print(mtx) +``` + +```{code-cell} +mtx.toarray() +``` + +### Explanation with a scheme: + +``` +offset: row + + 2: 9 + 1: --10------ + 0: 1 . 11 . + -1: 5 2 . 12 + -2: . 6 3 . + -3: . . 7 4 + ---------8 +``` + +### Matrix-vector multiplication + +```{code-cell} +vec = np.ones((4, )) +vec +``` + +```{code-cell} +mtx @ vec +``` + +```{code-cell} +(mtx * vec).toarray() +``` diff --git a/advanced/scipy_sparse/dia_array.rst b/advanced/scipy_sparse/dia_array.rst deleted file mode 100644 index 1afc79193..000000000 --- a/advanced/scipy_sparse/dia_array.rst +++ /dev/null @@ -1,107 +0,0 @@ -.. for doctests - >>> import numpy as np - >>> import scipy as sp - - -Diagonal Format (DIA) -===================== - -* very simple scheme -* diagonals in dense NumPy array of shape `(n_diag, length)` - * fixed length -> waste space a bit when far from main diagonal - * subclass of :class:`_data_matrix` (sparse array classes with - `.data` attribute) -* offset for each diagonal - * 0 is the main diagonal - * negative offset = below - * positive offset = above -* fast matrix * vector (sparsetools) -* fast and easy item-wise operations - * manipulate data array directly (fast NumPy machinery) -* constructor accepts: - * dense array/matrix - * sparse array/matrix - * shape tuple (create empty array) - * `(data, offsets)` tuple -* no slicing, no individual item access -* use: - * rather specialized - * solving PDEs by finite differences - * with an iterative solver - -Examples --------- - -* create some DIA arrays:: - - >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0) - >>> data - array([[1, 2, 3, 4], - [1, 2, 3, 4], - [1, 2, 3, 4]]) - >>> offsets = np.array([0, -1, 2]) - >>> mtx = sp.sparse.dia_array((data, offsets), shape=(4, 4)) - >>> mtx - - >>> mtx.toarray() - array([[1, 0, 3, 0], - [1, 2, 0, 4], - [0, 2, 3, 0], - [0, 0, 3, 4]]) - - >>> data = np.arange(12).reshape((3, 4)) + 1 - >>> data - array([[ 1, 2, 3, 4], - [ 5, 6, 7, 8], - [ 9, 10, 11, 12]]) - >>> mtx = sp.sparse.dia_array((data, offsets), shape=(4, 4)) - >>> mtx.data - array([[ 1, 2, 3, 4], - [ 5, 6, 7, 8], - [ 9, 10, 11, 12]]) - >>> mtx.offsets - array([ 0, -1, 2], dtype=int32) - >>> print(mtx) - - Coords Values - (0, 0) 1 - (1, 1) 2 - (2, 2) 3 - (3, 3) 4 - (1, 0) 5 - (2, 1) 6 - (3, 2) 7 - (0, 2) 11 - (1, 3) 12 - >>> mtx.toarray() - array([[ 1, 0, 11, 0], - [ 5, 2, 0, 12], - [ 0, 6, 3, 0], - [ 0, 0, 7, 4]]) - -* explanation with a scheme:: - - offset: row - - 2: 9 - 1: --10------ - 0: 1 . 11 . - -1: 5 2 . 12 - -2: . 6 3 . - -3: . . 7 4 - ---------8 - -* matrix-vector multiplication - - >>> vec = np.ones((4, )) - >>> vec - array([1., 1., 1., 1.]) - >>> mtx @ vec - array([12., 19., 9., 11.]) - >>> (mtx * vec).toarray() - array([[ 1., 0., 11., 0.], - [ 5., 2., 0., 12.], - [ 0., 6., 3., 0.], - [ 0., 0., 7., 4.]]) diff --git a/advanced/scipy_sparse/dok_array.md b/advanced/scipy_sparse/dok_array.md new file mode 100644 index 000000000..d88037753 --- /dev/null +++ b/advanced/scipy_sparse/dok_array.md @@ -0,0 +1,75 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import scipy as sp +``` + +# Dictionary of Keys Format (DOK) + +- subclass of Python dict + - keys are `(row, column)` index tuples (no duplicate entries allowed) + - values are corresponding non-zero values +- efficient for constructing sparse arrays incrementally +- constructor accepts: + - dense array/matrix + - sparse array/matrix + - shape tuple (create empty array) +- efficient O(1) access to individual elements +- flexible slicing, changing sparsity structure is efficient +- can be efficiently converted to a coo_array once constructed +- slow arithmetic (`for` loops with `dict.items()`) +- use: + - when sparsity pattern is not known apriori or changes + +## Examples + +### Create a DOK array element by element: + +```{code-cell} +mtx = sp.sparse.dok_array((5, 5), dtype=np.float64) +mtx +``` + +```{code-cell} +for ir in range(5): + for ic in range(5): + mtx[ir, ic] = 1.0 * (ir != ic) +mtx +``` + +```{code-cell} +mtx.toarray() +``` + +### Slicing and indexing: + +```{code-cell} +mtx[1, 1] +``` + +```{code-cell} +mtx[[1], 1:3] +``` + +```{code-cell} +mtx[[1], 1:3].toarray() +``` + +```{code-cell} +mtx[[2, 1], 1:3].toarray() +``` diff --git a/advanced/scipy_sparse/dok_array.rst b/advanced/scipy_sparse/dok_array.rst deleted file mode 100644 index fb1a90a1f..000000000 --- a/advanced/scipy_sparse/dok_array.rst +++ /dev/null @@ -1,57 +0,0 @@ -.. For doctests - >>> import numpy as np - >>> import scipy as sp - - -Dictionary of Keys Format (DOK) -=============================== - -* subclass of Python dict - * keys are `(row, column)` index tuples (no duplicate entries allowed) - * values are corresponding non-zero values -* efficient for constructing sparse arrays incrementally -* constructor accepts: - * dense array/matrix - * sparse array/matrix - * shape tuple (create empty array) -* efficient O(1) access to individual elements -* flexible slicing, changing sparsity structure is efficient -* can be efficiently converted to a coo_array once constructed -* slow arithmetic (`for` loops with `dict.items()`) -* use: - * when sparsity pattern is not known apriori or changes - -Examples --------- - -* create a DOK array element by element:: - - >>> mtx = sp.sparse.dok_array((5, 5), dtype=np.float64) - >>> mtx - - >>> for ir in range(5): - ... for ic in range(5): - ... mtx[ir, ic] = 1.0 * (ir != ic) - >>> mtx - - >>> mtx.toarray() - array([[0., 1., 1., 1., 1.], - [1., 0., 1., 1., 1.], - [1., 1., 0., 1., 1.], - [1., 1., 1., 0., 1.], - [1., 1., 1., 1., 0.]]) - -* slicing and indexing:: - - >>> mtx[1, 1] - np.float64(0.0) - >>> mtx[[1], 1:3] - - >>> mtx[[1], 1:3].toarray() - array([[0., 1.]]) - >>> mtx[[2, 1], 1:3].toarray() - array([[1., 0.], - [0., 1.]]) diff --git a/advanced/scipy_sparse/index.rst b/advanced/scipy_sparse/index.rst deleted file mode 100644 index dd449b245..000000000 --- a/advanced/scipy_sparse/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -Sparse Arrays in SciPy -====================== - -**Author**: *Robert Cimrman* - -| - -.. toctree:: - :maxdepth: 3 - - introduction - storage_schemes - solvers - other_packages diff --git a/advanced/scipy_sparse/introduction.md b/advanced/scipy_sparse/introduction.md new file mode 100644 index 000000000..b72e4ff99 --- /dev/null +++ b/advanced/scipy_sparse/introduction.md @@ -0,0 +1,87 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Scipy sparse arrays + +**Section author**: _Robert Cimrman_ + +(Dense) matrix is: + +- mathematical object +- data structure for storing a 2D array of values + +Important features: + +- memory allocated once for all items + - usually a contiguous chunk, think NumPy ndarray +- _fast_ access to individual items (\*) + +## Why Sparse Matrices? + +- the memory grows like `n**2` for dense matrix + +- small example (double precision matrix): + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt + +x = np.linspace(0, 1e6, 10) +plt.plot(x, 8.0 * (x**2) / 1e6, lw=5) +plt.xlabel('size n') +plt.ylabel('memory [MB]') +``` + +## Sparse Matrices vs. Sparse Matrix Storage Schemes + +- sparse matrix is a matrix, which is _almost empty_ +- storing all the zeros is wasteful -> store only nonzero items +- think **compression** +- pros: huge memory savings +- cons: slow access to individual items, but it depends on actual storage scheme. + +## Typical Applications + +- solution of partial differential equations (PDEs) + + - the _finite element method_ + - mechanical engineering, electrotechnics, physics, ... + +- graph theory + + - nonzero at `(i, j)` means that node `i` is connected to node `j` + +- natural language processing + + - nonzero at `(i, j)` means that the document `i` contains the word `j` + +- ... + +:::{admonition} Prerequisites + +- {ref}`numpy ` +- {ref}`scipy ` +- {ref}`matplotlib (optional) ` +- {ref}`ipython (the enhancements come handy) ` + ::: + +## Sparsity Structure Visualization + +- {func}`spy` from `matplotlib` +- example plots: + +![](figures/graph.png) + +![](figures/graph_g.png) + +![](figures/graph_rcm.png) diff --git a/advanced/scipy_sparse/introduction.rst b/advanced/scipy_sparse/introduction.rst deleted file mode 100644 index 17107c5e1..000000000 --- a/advanced/scipy_sparse/introduction.rst +++ /dev/null @@ -1,75 +0,0 @@ -.. For doctests - >>> import numpy as np - >>> # For doctest on headless environments - >>> import matplotlib.pyplot as plt - -Introduction -============ - -(dense) matrix is: - -* mathematical object -* data structure for storing a 2D array of values - -important features: - -* memory allocated once for all items - * usually a contiguous chunk, think NumPy ndarray -* *fast* access to individual items (*) - -Why Sparse Matrices? --------------------- - -* the memory grows like `n**2` for dense matrix -* small example (double precision matrix):: - - >>> import numpy as np - >>> import matplotlib.pyplot as plt - >>> x = np.linspace(0, 1e6, 10) - >>> plt.plot(x, 8.0 * (x**2) / 1e6, lw=5) - [] - >>> plt.xlabel('size n') - Text(...'size n') - >>> plt.ylabel('memory [MB]') - Text(...'memory [MB]') - -Sparse Matrices vs. Sparse Matrix Storage Schemes -------------------------------------------------- - -* sparse matrix is a matrix, which is *almost empty* -* storing all the zeros is wasteful -> store only nonzero items -* think **compression** -* pros: huge memory savings -* cons: slow access to individual items, but it depends on actual storage scheme. - -Typical Applications --------------------- - -* solution of partial differential equations (PDEs) - * the *finite element method* - * mechanical engineering, electrotechnics, physics, ... -* graph theory - * nonzero at `(i, j)` means that node `i` is connected to node `j` -* natural language processing - * nonzero at `(i, j)` means that the document `i` contains the word `j` -* ... - -Prerequisites -------------- - -.. rst-class:: horizontal - - * :ref:`numpy ` - * :ref:`scipy ` - * :ref:`matplotlib (optional) ` - * :ref:`ipython (the enhancements come handy) ` - -Sparsity Structure Visualization --------------------------------- - -* :func:`spy` from ``matplotlib`` -* example plots: - -.. image:: figures/graph.png -.. image:: figures/graph_g.png -.. image:: figures/graph_rcm.png diff --git a/advanced/scipy_sparse/lil_array.md b/advanced/scipy_sparse/lil_array.md new file mode 100644 index 000000000..79abdf730 --- /dev/null +++ b/advanced/scipy_sparse/lil_array.md @@ -0,0 +1,99 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import scipy as sp +``` + +# List of Lists Format (LIL) + +- row-based linked list + - each row is a Python list (sorted) of column indices of non-zero elements + - rows stored in a NumPy array (`dtype=np.object`) + - non-zero values data stored analogously +- efficient for constructing sparse arrays incrementally +- constructor accepts: + - dense array/matrix + - sparse array/matrix + - shape tuple (create empty array) +- flexible slicing, changing sparsity structure is efficient +- slow arithmetic, slow column slicing due to being row-based +- use: + - when sparsity pattern is not known _apriori_ or changes + - example: reading a sparse array from a text file + +## Examples + +### Create an empty LIL array: + +```{code-cell} +mtx = sp.sparse.lil_array((4, 5)) +``` + +### Prepare random data + +```{code-cell} +rng = np.random.default_rng(27446968) +data = np.round(rng.random((2, 3))) +data +``` + +### Assign the data using fancy indexing + +```{code-cell} +mtx[:2, [1, 2, 3]] = data +mtx +``` + +```{code-cell} +print(mtx) +``` + +```{code-cell} +mtx.toarray() +``` + +```{code-cell} +mtx.toarray() +``` + +### More slicing and indexing + +```{code-cell} +mtx = sp.sparse.lil_array([[0, 1, 2, 0], [3, 0, 1, 0], [1, 0, 0, 1]]) +mtx.toarray() +``` + +```{code-cell} +print(mtx) +``` + +```{code-cell} +mtx[:2, :] +``` + +```{code-cell} +mtx[:2, :].toarray() +``` + +```{code-cell} +mtx[1:2, [0,2]].toarray() +``` + +```{code-cell} +mtx.toarray() +``` diff --git a/advanced/scipy_sparse/lil_array.rst b/advanced/scipy_sparse/lil_array.rst deleted file mode 100644 index 5e1d5c24a..000000000 --- a/advanced/scipy_sparse/lil_array.rst +++ /dev/null @@ -1,90 +0,0 @@ -.. - >>> import numpy as np - >>> import scipy as sp - -List of Lists Format (LIL) -========================== - -* row-based linked list - * each row is a Python list (sorted) of column indices of non-zero elements - * rows stored in a NumPy array (`dtype=np.object`) - * non-zero values data stored analogously -* efficient for constructing sparse arrays incrementally -* constructor accepts: - * dense array/matrix - * sparse array/matrix - * shape tuple (create empty array) -* flexible slicing, changing sparsity structure is efficient -* slow arithmetic, slow column slicing due to being row-based -* use: - * when sparsity pattern is not known apriori or changes - * example: reading a sparse array from a text file - -Examples --------- - -* create an empty LIL array:: - - >>> mtx = sp.sparse.lil_array((4, 5)) - -* prepare random data:: - - >>> rng = np.random.default_rng(27446968) - >>> data = np.round(rng.random((2, 3))) - >>> data - array([[1., 0., 1.], - [0., 0., 1.]]) - -* assign the data using fancy indexing:: - - >>> mtx[:2, [1, 2, 3]] = data - >>> mtx - - >>> print(mtx) - - Coords Values - (0, 1) 1.0 - (0, 3) 1.0 - (1, 3) 1.0 - >>> mtx.toarray() - array([[0., 1., 0., 1., 0.], - [0., 0., 0., 1., 0.], - [0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0.]]) - >>> mtx.toarray() - array([[0., 1., 0., 1., 0.], - [0., 0., 0., 1., 0.], - [0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0.]]) - -* more slicing and indexing:: - - >>> mtx = sp.sparse.lil_array([[0, 1, 2, 0], [3, 0, 1, 0], [1, 0, 0, 1]]) - >>> mtx.toarray() - array([[0, 1, 2, 0], - [3, 0, 1, 0], - [1, 0, 0, 1]]...) - >>> print(mtx) - - Coords Values - (0, 1) 1 - (0, 2) 2 - (1, 0) 3 - (1, 2) 1 - (2, 0) 1 - (2, 3) 1 - >>> mtx[:2, :] - - >>> mtx[:2, :].toarray() - array([[0, 1, 2, 0], - [3, 0, 1, 0]]...) - >>> mtx[1:2, [0,2]].toarray() - array([[3, 1]]...) - >>> mtx.toarray() - array([[0, 1, 2, 0], - [3, 0, 1, 0], - [1, 0, 0, 1]]...) diff --git a/advanced/scipy_sparse/other_packages.md b/advanced/scipy_sparse/other_packages.md new file mode 100644 index 000000000..57a7f36fa --- /dev/null +++ b/advanced/scipy_sparse/other_packages.md @@ -0,0 +1,9 @@ +# Other Interesting Packages + +- PyAMG + : - algebraic multigrid solvers + - +- Pysparse + : - own sparse matrix classes + - matrix and eigenvalue problem solvers + - diff --git a/advanced/scipy_sparse/other_packages.rst b/advanced/scipy_sparse/other_packages.rst deleted file mode 100644 index d6514f1e8..000000000 --- a/advanced/scipy_sparse/other_packages.rst +++ /dev/null @@ -1,10 +0,0 @@ -Other Interesting Packages -========================== - -* PyAMG - * algebraic multigrid solvers - * https://github.com/pyamg/pyamg -* Pysparse - * own sparse matrix classes - * matrix and eigenvalue problem solvers - * https://pysparse.sourceforge.net/ diff --git a/advanced/scipy_sparse/solvers.md b/advanced/scipy_sparse/solvers.md new file mode 100644 index 000000000..770935c58 --- /dev/null +++ b/advanced/scipy_sparse/solvers.md @@ -0,0 +1,222 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Linear System Solvers + +- sparse matrix/eigenvalue problem solvers live in {mod}`scipy.sparse.linalg` + +- the submodules: + - {mod}`dsolve`: direct factorization methods for solving linear systems + - {mod}`isolve`: iterative methods for solving linear systems + - {mod}`eigen`: sparse eigenvalue problem solvers + +All solvers are accessible from: + +```{code-cell} +import scipy as sp +sp.sparse.linalg.__all__ +``` + +## Sparse Direct Solvers + +- default solver: SuperLU + - included in SciPy + - real and complex systems + - both single and double precision +- optional: umfpack + - real and complex systems + - double precision only + - recommended for performance + - wrappers now live in {mod}`scikits.umfpack` + - check-out the new {mod}`scikits.suitesparse` by Nathaniel Smith + +### Examples + +Import the whole module, and see its docstring: + +```{code-cell} +help(sp.sparse.linalg.spsolve) +``` + +Both superlu and umfpack can be used (if the latter is installed) as follows. + +Prepare a linear system: + +```{code-cell} +import numpy as np +mtx = sp.sparse.spdiags([[1, 2, 3, 4, 5], [6, 5, 8, 9, 10]], [0, 1], 5, 5, "csc") +mtx.toarray() +``` + +```{code-cell} +rhs = np.array([1, 2, 3, 4, 5], dtype=np.float32) +``` + +Solve as single precision real: + +```{code-cell} +mtx1 = mtx.astype(np.float32) +x = sp.sparse.linalg.spsolve(mtx1, rhs, use_umfpack=False) +print(x) +``` + +```{code-cell} +print("Error: %s" % (mtx1 * x - rhs)) +``` + +Solve as double precision real: + +```{code-cell} +mtx2 = mtx.astype(np.float64) +x = sp.sparse.linalg.spsolve(mtx2, rhs, use_umfpack=True) +print(x) +``` + +```{code-cell} +print("Error: %s" % (mtx2 * x - rhs)) +``` + +Solve as single precision complex: + +```{code-cell} +mtx1 = mtx.astype(np.complex64) +x = sp.sparse.linalg.spsolve(mtx1, rhs, use_umfpack=False) +print(x) +``` + +```{code-cell} +print("Error: %s" % (mtx1 * x - rhs)) +``` + +Solve as double precision complex: + +```{code-cell} +mtx2 = mtx.astype(np.complex128) +x = sp.sparse.linalg.spsolve(mtx2, rhs, use_umfpack=True) +print(x) +``` + +```{code-cell} +print("Error: %s" % (mtx2 * x - rhs)) +``` + +{download}`examples/direct_solve.py` + ++++ + +## Iterative Solvers + +- the {mod}`isolve` module contains the following solvers: + - `bicg` (BIConjugate Gradient) + - `bicgstab` (BIConjugate Gradient STABilized) + - `cg` (Conjugate Gradient) - symmetric positive definite matrices + only + - `cgs` (Conjugate Gradient Squared) + - `gmres` (Generalized Minimal RESidual) + - `minres` (MINimum RESidual) + - `qmr` (Quasi-Minimal Residual) + ++++ + +### Common Parameters + +- mandatory: + + - `A` : The N-by-N matrix of the linear system. + - `b`: Right hand side of the linear system. Has shape (N,) or (N,1). + +- optional: + + - `x0`: Starting guess for the solution. + - `tol` : Relative tolerance to achieve before terminating. + - `maxiter` : Maximum number of iterations. Iteration will stop after maxiter + steps even if the specified tolerance has not been achieved. + - `M` : Preconditioner for A. The preconditioner should approximate the + inverse of A. Effective preconditioning dramatically improves the + rate of convergence, which implies that fewer iterations are needed + to reach a given error tolerance. + - `callback` : User-supplied function to call after each iteration. It is + called as `callback(xk)`, where `xk` is the current solution vector. + ++++ + +### LinearOperator Class + +- common interface for performing matrix vector products +- useful abstraction that enables using dense and sparse matrices within + the solvers, as well as _matrix-free_ solutions +- has `shape` and `matvec()` (+ some optional parameters) + +Here is an example: + +```{code-cell} +import numpy as np +import scipy as sp + +def mv(v): + return np.array([2 * v[0], 3 * v[1]]) +``` + +```{code-cell} +A = sp.sparse.linalg.LinearOperator((2, 2), matvec=mv) +A +``` + +```{code-cell} +A.matvec(np.ones(2)) +``` + +```{code-cell} +A * np.ones(2) +``` + +### A Few Notes on Preconditioning + +- problem specific +- often hard to develop +- if not sure, try ILU + - available in {mod}`scipy.sparse.linalg` as {func}`spilu()` + +## Eigenvalue Problem Solvers + +### The {mod}`eigen` module + +- `arpack`: a collection of Fortran77 subroutines designed to solve large scale eigenvalue problems +- `lobpcg`: (Locally Optimal Block Preconditioned Conjugate + Gradient Method); \* works very well in combination with + [PyAMG](https://github.com/pyamg/pyamg) + + - example by Nathan Bell: + + {download}`examples/pyamg_with_lobpcg.py` + +Another example by Nils Wagner: + +{download}`examples/lobpcg_sakurai.py` + +Output: + +```bash +$ python examples/lobpcg_sakurai.py +Results by LOBPCG for n=2500 + +[ 0.06250083 0.06250028 0.06250007] + +Exact eigenvalues + +[ 0.06250005 0.0625002 0.06250044] + +Elapsed time 7.01 +``` + +![](figures/lobpcg_eigenvalues.png) diff --git a/advanced/scipy_sparse/solvers.rst b/advanced/scipy_sparse/solvers.rst deleted file mode 100644 index ebe3fd2c2..000000000 --- a/advanced/scipy_sparse/solvers.rst +++ /dev/null @@ -1,202 +0,0 @@ -Linear System Solvers -===================== - -* sparse matrix/eigenvalue problem solvers live in :mod:`scipy.sparse.linalg` -* the submodules: - * :mod:`dsolve`: direct factorization methods for solving linear systems - * :mod:`isolve`: iterative methods for solving linear systems - * :mod:`eigen`: sparse eigenvalue problem solvers - -* all solvers are accessible from:: - - >>> import scipy as sp - >>> sp.sparse.linalg.__all__ - ['ArpackError', 'ArpackNoConvergence', ..., 'use_solver'] - - -Sparse Direct Solvers ---------------------- - -* default solver: SuperLU - * included in SciPy - * real and complex systems - * both single and double precision -* optional: umfpack - * real and complex systems - * double precision only - * recommended for performance - * wrappers now live in :mod:`scikits.umfpack` - * check-out the new :mod:`scikits.suitesparse` by Nathaniel Smith - -Examples -^^^^^^^^ -* import the whole module, and see its docstring:: - - >>> help(sp.sparse.linalg.spsolve) - Help on function spsolve in module scipy.sparse.linalg._dsolve.linsolve: - ... - -* both superlu and umfpack can be used (if the latter is installed) as - follows: - - * prepare a linear system:: - - >>> import numpy as np - >>> mtx = sp.sparse.spdiags([[1, 2, 3, 4, 5], [6, 5, 8, 9, 10]], [0, 1], 5, 5, "csc") - >>> mtx.toarray() - array([[ 1, 5, 0, 0, 0], - [ 0, 2, 8, 0, 0], - [ 0, 0, 3, 9, 0], - [ 0, 0, 0, 4, 10], - [ 0, 0, 0, 0, 5]]) - >>> rhs = np.array([1, 2, 3, 4, 5], dtype=np.float32) - - * solve as single precision real:: - - >>> mtx1 = mtx.astype(np.float32) - >>> x = sp.sparse.linalg.spsolve(mtx1, rhs, use_umfpack=False) - >>> print(x) - [106. -21. 5.5 -1.5 1. ] - >>> print("Error: %s" % (mtx1 * x - rhs)) - Error: [0. 0. 0. 0. 0.] - - * solve as double precision real:: - - >>> mtx2 = mtx.astype(np.float64) - >>> x = sp.sparse.linalg.spsolve(mtx2, rhs, use_umfpack=True) - >>> print(x) - [106. -21. 5.5 -1.5 1. ] - >>> print("Error: %s" % (mtx2 * x - rhs)) - Error: [0. 0. 0. 0. 0.] - - * solve as single precision complex:: - - >>> mtx1 = mtx.astype(np.complex64) - >>> x = sp.sparse.linalg.spsolve(mtx1, rhs, use_umfpack=False) - >>> print(x) - [106. +0.j -21. +0.j 5.5+0.j -1.5+0.j 1. +0.j] - >>> print("Error: %s" % (mtx1 * x - rhs)) - Error: [0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j] - - * solve as double precision complex:: - - >>> mtx2 = mtx.astype(np.complex128) - >>> x = sp.sparse.linalg.spsolve(mtx2, rhs, use_umfpack=True) - >>> print(x) - [106. +0.j -21. +0.j 5.5+0.j -1.5+0.j 1. +0.j] - >>> print("Error: %s" % (mtx2 * x - rhs)) - Error: [0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j] - -.. literalinclude:: examples/direct_solve.py - -* :download:`examples/direct_solve.py` - -Iterative Solvers ------------------ - -* the :mod:`isolve` module contains the following solvers: - * ``bicg`` (BIConjugate Gradient) - * ``bicgstab`` (BIConjugate Gradient STABilized) - * ``cg`` (Conjugate Gradient) - symmetric positive definite matrices - only - * ``cgs`` (Conjugate Gradient Squared) - * ``gmres`` (Generalized Minimal RESidual) - * ``minres`` (MINimum RESidual) - * ``qmr`` (Quasi-Minimal Residual) - -Common Parameters -^^^^^^^^^^^^^^^^^ - -* mandatory: - - A : {sparse array/matrix, dense array/matrix, LinearOperator} - The N-by-N matrix of the linear system. - b : {array, matrix} - Right hand side of the linear system. Has shape (N,) or (N,1). - -* optional: - - x0 : {array, matrix} - Starting guess for the solution. - tol : float - Relative tolerance to achieve before terminating. - maxiter : integer - Maximum number of iterations. Iteration will stop after maxiter - steps even if the specified tolerance has not been achieved. - M : {sparse array/matrix, dense array/matrix, LinearOperator} - Preconditioner for A. The preconditioner should approximate the - inverse of A. Effective preconditioning dramatically improves the - rate of convergence, which implies that fewer iterations are needed - to reach a given error tolerance. - callback : function - User-supplied function to call after each iteration. It is called - as callback(xk), where xk is the current solution vector. - -LinearOperator Class -^^^^^^^^^^^^^^^^^^^^ - -* common interface for performing matrix vector products -* useful abstraction that enables using dense and sparse matrices within - the solvers, as well as *matrix-free* solutions -* has `shape` and `matvec()` (+ some optional parameters) -* example: - -.. code-block:: pycon - - >>> import numpy as np - >>> import scipy as sp - >>> def mv(v): - ... return np.array([2 * v[0], 3 * v[1]]) - ... - >>> A = sp.sparse.linalg.LinearOperator((2, 2), matvec=mv) - >>> A - <2x2 _CustomLinearOperator with dtype=int8> - >>> A.matvec(np.ones(2)) - array([2., 3.]) - >>> A * np.ones(2) - array([2., 3.]) - -A Few Notes on Preconditioning -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -* problem specific -* often hard to develop -* if not sure, try ILU - * available in :mod:`scipy.sparse.linalg` as :func:`spilu()` - -Eigenvalue Problem Solvers --------------------------- - -The :mod:`eigen` module -^^^^^^^^^^^^^^^^^^^^^^^^ - -* ``arpack`` - * a collection of Fortran77 subroutines designed to solve large scale eigenvalue problems - -* ``lobpcg`` (Locally Optimal Block Preconditioned Conjugate - Gradient Method) - * works very well in combination with `PyAMG `_ - * example by Nathan Bell: - - .. literalinclude:: examples/pyamg_with_lobpcg.py - - * :download:`examples/pyamg_with_lobpcg.py` - -* example by Nils Wagner: - - * :download:`examples/lobpcg_sakurai.py` - -* output:: - - $ python examples/lobpcg_sakurai.py - Results by LOBPCG for n=2500 - - [ 0.06250083 0.06250028 0.06250007] - - Exact eigenvalues - - [ 0.06250005 0.0625002 0.06250044] - - Elapsed time 7.01 - -.. image:: figures/lobpcg_eigenvalues.png diff --git a/advanced/scipy_sparse/storage_schemes.md b/advanced/scipy_sparse/storage_schemes.md new file mode 100644 index 000000000..f68a7b143 --- /dev/null +++ b/advanced/scipy_sparse/storage_schemes.md @@ -0,0 +1,78 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Storage Schemes + +## Sparse Array Classes + +- There are seven sparse array types in scipy.sparse: + + 1. [csr_array](csr_array): Compressed Sparse Row format + 2. [csc_array](csc_array): Compressed Sparse Column format + 3. [bsr_array](csc_array): Block Sparse Row format + 4. [lil_array](csc_array): List of Lists format + 5. [dok_array](dok_array): Dictionary of Keys format + 6. [coo_array](coo_array): COOrdinate format (aka IJV, + triplet format) + 7. [dia_array](dia_array): DIAgonal format + +- each suitable for some tasks + +- many employ sparsetools C++ module by Nathan Bell + +- assume the following is imported: + +```{code-cell} +import numpy as np +import scipy as sp +import matplotlib.pyplot as plt +``` + +- **warning** for Numpy users: + - passing a sparse array object to NumPy functions that expect + ndarray/matrix does not work. Use sparse functions. + - the older csr_matrix classes use '\*' for matrix multiplication (dot + product) and 'A.multiply(B)' for elementwise multiplication. + - the newer csr_array uses '@' for dot product and '\*' for elementwise + multiplication + - sparse arrays can be 1D or 2D, but not nD for n > 2 (unlike Numpy arrays). + +## Common Methods + +- all scipy.sparse array classes are subclasses of {class}`sparray` + - default implementation of arithmetic operations + - always converts to CSR + - subclasses override for efficiency + - shape, data type, set/get + - indices of nonzero values in the array + - format conversion, interaction with NumPy (`toarray()`) + - ... +- attributes: + - `mtx.T` - transpose (same as mtx.transpose()) + - `mtx.real` - real part of complex matrix + - `mtx.imag` - imaginary part of complex matrix + - `mtx.size` - the number of nonzeros (same as self.getnnz()) + - `mtx.shape` - the number of rows and columns (tuple) +- data and indices usually stored in 1D NumPy arrays + +## Summary + +| format | matrix \* vector | get item | fancy get | set item | fancy set | solvers | note | +| ------ | ---------------- | -------- | ------------- | -------- | --------- | ----------- | ----------------------------------------------------------- | +| CSR | sparsetools | yes | yes | slow | . | any | has data array, fast row-wise ops | +| CSC | sparsetools | yes | yes | slow | . | any | has data array, fast column-wise ops | +| BSR | sparsetools | . | . | . | . | specialized | has data array, specialized | +| COO | sparsetools | . | . | . | . | iterative | has data array, facilitates fast conversion | +| DIA | sparsetools | . | . | . | . | iterative | has data array, specialized | +| LIL | via CSR | yes | yes | yes | yes | iterative | arithmetic via CSR, incremental construction | +| DOK | Python | yes | one axis only | yes | yes | iterative | O(1) item access, incremental construction, slow arithmetic | diff --git a/advanced/scipy_sparse/storage_schemes.rst b/advanced/scipy_sparse/storage_schemes.rst deleted file mode 100644 index 17ca03818..000000000 --- a/advanced/scipy_sparse/storage_schemes.rst +++ /dev/null @@ -1,132 +0,0 @@ -Storage Schemes -=============== - -* seven sparse array types in scipy.sparse: - 1. csr_array: Compressed Sparse Row format - 2. csc_array: Compressed Sparse Column format - 3. bsr_array: Block Sparse Row format - 4. lil_array: List of Lists format - 5. dok_array: Dictionary of Keys format - 6. coo_array: COOrdinate format (aka IJV, triplet format) - 7. dia_array: DIAgonal format -* each suitable for some tasks -* many employ sparsetools C++ module by Nathan Bell -* assume the following is imported:: - - >>> import numpy as np - >>> import scipy as sp - >>> import matplotlib.pyplot as plt - - -* **warning** for Numpy users: - * passing a sparse array object to NumPy functions that expect - ndarray/matrix does not work. Use sparse functions. - * the older csr_matrix classes use '*' for matrix multiplication (dot product) - and 'A.multiply(B)' for elementwise multiplication. - * the newer csr_array uses '@' for dot product and '*' for elementwise multiplication - * sparse arrays can be 1D or 2D, but not nD for n > 2 (unlike Numpy arrays). - -Common Methods --------------- - -* all scipy.sparse array classes are subclasses of :class:`sparray` - * default implementation of arithmetic operations - * always converts to CSR - * subclasses override for efficiency - * shape, data type, set/get - * indices of nonzero values in the array - * format conversion, interaction with NumPy (`toarray()`) - * ... -* attributes: - * `mtx.T` - transpose (same as mtx.transpose()) - * `mtx.real` - real part of complex matrix - * `mtx.imag` - imaginary part of complex matrix - * `mtx.size` - the number of nonzeros (same as self.getnnz()) - * `mtx.shape` - the number of rows and columns (tuple) -* data and indices usually stored in 1D NumPy arrays - -Sparse Array Classes ---------------------- - -.. toctree:: - :maxdepth: 2 - - dia_array - lil_array - dok_array - coo_array - csr_array - csc_array - bsr_array - -Summary -------- - -.. list-table:: Summary of storage schemes. - :widths: 10 10 10 10 10 10 10 30 - :header-rows: 1 - - * - format - - matrix * vector - - get item - - fancy get - - set item - - fancy set - - solvers - - note - * - CSR - - sparsetools - - yes - - yes - - slow - - . - - any - - has data array, fast row-wise ops - * - CSC - - sparsetools - - yes - - yes - - slow - - . - - any - - has data array, fast column-wise ops - * - BSR - - sparsetools - - . - - . - - . - - . - - specialized - - has data array, specialized - * - COO - - sparsetools - - . - - . - - . - - . - - iterative - - has data array, facilitates fast conversion - * - DIA - - sparsetools - - . - - . - - . - - . - - iterative - - has data array, specialized - * - LIL - - via CSR - - yes - - yes - - yes - - yes - - iterative - - arithmetic via CSR, incremental construction - * - DOK - - python - - yes - - one axis only - - yes - - yes - - iterative - - O(1) item access, incremental construction, slow arithmetic diff --git a/build_requirements.txt b/build_requirements.txt new file mode 100644 index 000000000..35246e005 --- /dev/null +++ b/build_requirements.txt @@ -0,0 +1,12 @@ +# Build requirements +# To upgrade certificates; needed for Python.org install. +# certifi +# Also: https://stackoverflow.com/a/79235523 +# export SSL_CERT_FILE=$(python3 -m certifi) +pre-commit +sphinx-book-theme@git+https://github.com/executablebooks/sphinx-book-theme@56874cb +sphinx_exercise +jupyter-book>=1,<2 +-r requirements.txt +# To allow static build / upload +ghp-import diff --git a/conf.py b/conf.py deleted file mode 100644 index e66c929ee..000000000 --- a/conf.py +++ /dev/null @@ -1,303 +0,0 @@ -from datetime import date -from subprocess import PIPE, Popen -import os - -import sphinx_gallery -from pygments import formatters -from sphinx import highlighting - -# General configuration -# --------------------- - -exclude_patterns = ["README.rst"] - -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.doctest", - "IPython.sphinxext.ipython_console_highlighting", - "IPython.sphinxext.ipython_directive", - "sphinx.ext.imgmath", - "sphinx.ext.intersphinx", - "sphinx.ext.extlinks", - "sphinx_gallery.gen_gallery", - "sphinx_copybutton", -] - -# See https://sphinx-copybutton.readthedocs.io/en/latest/use.html#automatic-exclusion-of-prompts-from-the-copies -copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " -copybutton_prompt_is_regexp = True -copybutton_copy_empty_lines = False - -doctest_test_doctest_blocks = "true" - -sphinx_gallery_conf = { - "examples_dirs": [ - "intro/scipy/summary-exercises/examples", - "intro/matplotlib/examples", - "intro/numpy/examples", - "intro/scipy/examples", - # the following entry contains an extra level because - # execution of the other python files causes errors - "advanced/advanced_numpy/examples/plots", - "advanced/image_processing/examples", - "advanced/mathematical_optimization/examples", - "packages/scikit-image/examples", - "packages/scikit-learn/examples", - "packages/statistics/examples", - "guide/examples", - ], - "gallery_dirs": [ - "intro/scipy/summary-exercises/auto_examples", - "intro/matplotlib/auto_examples", - "intro/numpy/auto_examples", - "intro/scipy/auto_examples", - "advanced/advanced_numpy/auto_examples", - "advanced/image_processing/auto_examples", - "advanced/mathematical_optimization/auto_examples", - "packages/scikit-image/auto_examples", - "packages/scikit-learn/auto_examples", - "packages/statistics/auto_examples", - "guide/auto_examples", - ], - "doc_module": "scientific-python-lectures", - # The following is necessary to get the links in the code of the - # examples - "backreferences_dir": "tmp", - "plot_gallery": "1", -} - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix of source filenames. -source_suffix = ".rst" - -# General information about the project. -project = "Scientific Python Lectures" -copyright = f"{date.today().year}" - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -release = "2025.1rc0.dev0" -version = release - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -language = "en" - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - -# Monkey-patch sphinx to set the lineseparator option of pygment, to -# have indented line wrapping - - -class MyHtmlFormatter(formatters.HtmlFormatter): # type: ignore[misc] - def __init__(self, **options): - options["lineseparator"] = '\n
' - formatters.HtmlFormatter.__init__(self, **options) - - -highlighting.PygmentsBridge.html_formatter = MyHtmlFormatter - -# Our substitutions -rst_epilog = """ - -.. |clear-floats| raw:: html - -
- -.. always clear floats at the bottom to avoid having stick out in the footer - -|clear-floats| - -""" - -# Options for HTML output -# ----------------------- - -# The theme to use for HTML and HTML Help pages. Major themes that come with -# Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = "scientific_python_lectures" - -# Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ["themes"] - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -html_theme_options = { - # 'nosidebar': 'true', - "footerbgcolor": "#000000", - "relbarbgcolor": "#000000", -} - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -html_title = "Scientific Python Lectures" - -# A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = "" - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -html_favicon = "images/favicon.ico" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["themes/scientific_python_lectures/static"] - -# If false, no index is generated. -html_use_index = False - -# Output file base name for HTML help builder. -htmlhelp_basename = "ScientificPythonLectures" - -# Options for epub output -# ------------------------ - -epub_theme = "epub" -epub_theme_options = {"relbar1": False, "footer": False} -epub_show_urls = "no" -epub_tocdup = False - -# Options for LaTeX output -# ------------------------ - -# Latex references with page numbers (only Sphinx 1.0) -latex_show_pagerefs = False - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, document class [howto/manual]). -latex_documents = [ - ( - "index", - "ScientificPythonLectures.tex", - r"Scientific Python Lectures", - r"""Scientific Python Lectures team. Editors: Gaël Varoquaux, Emmanuelle Gouillart, Olav Vahtras, Pierre de Buyl, K. Jarrod Millman, Stéfan van der Walt""", - "manual", - ), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -latex_logo = "images/cover.pdf" - -# Latex settings -latex_toplevel_sectioning = "part" -latex_domain_indices = False - -# Additional stuff for the LaTeX preamble. -preamble = r""" -\definecolor{VerbatimColor}{rgb}{0.961, .98, 1.} -\definecolor{VerbatimBorderColor}{rgb}{0.6,0.6,0.6} -\usepackage{graphics} -\usepackage[final]{pdfpages} - -\setcounter{tocdepth}{1} -\usepackage{amssymb} -\usepackage{pifont} -\usepackage{multicol} -\DeclareUnicodeCharacter{2460}{\ding{182}} -\DeclareUnicodeCharacter{2461}{\ding{183}} -\DeclareUnicodeCharacter{2462}{\ding{184}} -\DeclareUnicodeCharacter{2794}{\ding{229}} - -\renewenvironment{wrapfigure}[2]{\begin{figure}[H]}{\end{figure}} - -\def\shadowbox#1{\rule{\linewidth}{1pt}\nopagebreak - -\nopagebreak\hspace*{.02\linewidth}#1\nopagebreak - -\nopagebreak\rule{\linewidth}{1pt} -} -""" - -latex_elements = { - "papersize": "a4paper", - "preamble": preamble, - "fontpkg": "\\usepackage{lmodern}", - "fncychap": r"""% - \usepackage[Sonny]{fncychap}% - \ChRuleWidth{1.5pt}% - \ChNumVar{\fontsize{76}{80}\sffamily\slshape} - \ChTitleVar{\raggedleft\Huge\sffamily\bfseries} - """, - "classoptions": ",oneside,openany", - "babel": r"\usepackage[english]{babel}", - "releasename": "Edition", - "sphinxsetup": "warningBgColor={RGB}{255,204,204}", - "maketitle": r""" - \includepdf[noautoscale]{cover.pdf} - \makeatletter% - \hypersetup{ - pdfinfo={ - Title={\@title}, - Author={\@author}, - License={CC-BY}, - } - }% - \makeatother% - \newpage\newpage - """, - # 'tableofcontents': '\\pagestyle{normal}\\pagenumbering{arabic} %\\tableofcontents', -} - -_python_doc_base = "https://docs.python.org/3/" - -# Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = { - "python": (_python_doc_base, None), - "numpy": ("https://numpy.org/doc/stable/", None), - "scipy": ("https://docs.scipy.org/doc/scipy/", None), - "matplotlib": ("https://matplotlib.org/stable/", None), - "sklearn": ("https://scikit-learn.org/stable/", None), - "sphinx": ("https://www.sphinx-doc.org/en/master/", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), - "seaborn": ("https://seaborn.pydata.org/", None), - "skimage": ("https://scikit-image.org/docs/stable/", None), - "statsmodels": ("https://www.statsmodels.org/stable/", None), - "imageio": ("https://imageio.readthedocs.io/en/stable/", None), -} - - -extlinks = { - "simple": (_python_doc_base + "/reference/simple_stmts.html#%s", "%s"), - "compound": (_python_doc_base + "/reference/compound_stmts.html#%s", "%s"), -} - -# -- Options for imgmath ------------------------------------------------ - -imgmath_use_preview = True - - -def add_per_page_js(app, pagename, templatename, context, doctree): - if pagename.split("/")[-1] == "index": - # For folding table of contents - app.add_js_file("foldable_toc.js") - app.add_css_file("foldable_toc.css") - - -def setup(app): - app.add_js_file("https://code.jquery.com/jquery-3.7.0.min.js") - app.add_js_file("scroll_highlight_toc.js") - - app.connect("html-page-context", add_per_page_js) - - # Is this still used? - app.add_css_file("https://unpkg.com/purecss@3.0.0/build/base-min.css") - - -# Generate redirect on scipy-lectures.org -domain = os.getenv("DOMAIN", "lectures.scientific-python.org") -html_context = {"domain": domain} -print(f"Building for domain: {domain}") diff --git a/data/an_array.txt b/data/an_array.txt new file mode 100644 index 000000000..78b2463ca --- /dev/null +++ b/data/an_array.txt @@ -0,0 +1,10 @@ +5 3 8 2 +3 8 4 7 +1 5 6 3 +3 2 1 5 +4 6 0 5 +5 8 6 3 +7 2 2 0 +6 6 3 3 +2 2 6 3 +1 6 5 4 diff --git a/dev_requirements.txt b/dev_requirements.txt new file mode 100644 index 000000000..3d014638c --- /dev/null +++ b/dev_requirements.txt @@ -0,0 +1,3 @@ +# Development requirements +-r build_requirements.txt +pre_commit diff --git a/guide/index.md b/guide/index.md new file mode 100644 index 000000000..aec184e2e --- /dev/null +++ b/guide/index.md @@ -0,0 +1,186 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +(guide)= + +# How to contribute + +**Author**: _Nicolas Rougier_ + +:::{admonition} Foreword +Use the `topic` keyword for any forewords +::: + +Make sure to read this [Documentation style guide] as well as these +[tips, tricks] and conventions about documentation content and workflows. + +## How to contribute ? + +- If you spot typos, unclear or clumsy wording in the lectures, please + help to improve them. Simple text editing can be done by [editing files + in your GitHub fork](https://help.github.com/articles/editing-files-in-your-repository/) of + the lectures. On every html page of the lectures, an **edit** + button on the top right links to the editable source of the page (you still + need to create a fork of the project). Edit the source and choose + "Create a new branch for this commit and start a pull request". + +- Choose a topic that is not yet covered and write it up ! + + First create a new issue on GitHub to explain the topic which you would + like to cover, in order to discuss with editors and contributors about + the scope of the future tutorial. + + Then create a new directory inside one of the chapters directories (`intro`, + `advanced`, or `packages`) and create a new notebook named `index` for the + new tutorial. Add the new file in the table of contents of the corresponding + chapter (in the `_toc.yml`). + +Keep in mind that tutorials are to be taught at different places and +different parts may be combined into a course on Python for scientific +computing. Thus you want them to be interactive and reasonably short (one +to two hours). + +Last but not least, the goal of this material is to provide a concise +text to learn the main features of the scientific Python ecosystem. If +you want to contribute to reference material, we suggest that you +contribute to the documentation of the specific packages that you are +interested in. + +## Using GitHub + +The easiest way to make your own version of this teaching material +is to fork it under GitHub, and use the git version control system to +maintain your own fork. For this, all you have to do is create an account +on GitHub and click on the _fork_ button, on the top right of [this +page](https://github.com/scipy-lectures/scientific-python-lectures). You can use git to pull from your _fork_, and push back to it the +changes. If you want to contribute the changes back, just fill a +_pull request_, using the button on the top of your fork's page. + +Several resources are available online to learn git and GitHub, such as + for complete beginners. + +Please refrain from modifying the Makefile unless it is absolutely +necessary. + +## Keeping it concise: collapsing paragraphs + +The HTML output is used for displaying on screen while teaching. The goal +is to have the same material displayed as in the notes. Thus there needs +to be a very concise display, with bullet-lists rather than full-blown +paragraphs and sentences. For more elaborate discussions that people can +read and refer to, please use +[Dropdowns](https://jupyterbook.org/en/stable/interactive/hiding.html#hide-markdown-using-myst-markdown). +These create collapsible paragraphs, that can be hidden during an oral +presentation. For example: + +::: {toggle} + +Here insert a full-blown discussion, that will be collapsible in the HTML +version. + +It can span on multiple paragraphs +::: + +This renders as a section that is only visible on clicking dropdown widget. + +You can also use `{class} dropdown` with an admonition, for the same purpose: + +::: {note} +:class: dropdown + +Another discussion. + +It can also span on multiple paragraphs +::: + +## Figures and code examples + +**We do not check figures in the repository**. + +Any figure must be generated with code in some notebook built during the book +build. + +## Using Markup + +There are three main kinds of markup that should be used: _italics_, **bold** +and `fixed-font`. _Italics_ should be used when introducing a new technical +term, **bold** should be used for emphasis and `fixed-font` for source code. + +:::{admonition} Example: +When using _object-oriented programming_ in Python you **must** use the +`class` keyword to define your _classes_. +::: + +In Markdown markup this is: + +```markdown +:::{admonition} Example: +when using _object-oriented programming_ in Python you **must** use the +`class` keyword to define your _classes_. +::: +``` + +## Linking to package documentations + +The goal of the Scientific Python Lectures is not to duplicate or replace +the documentation of the various packages. You should link as much as +possible to the original documentation. + +For cross-referencing API documentation we prefer to use the [intersphinx +extension](https://www.sphinx-doc.org/en/master/usage/extensions/index.html#built-in-extensions). This provides +the directives `{mod}`, `{class}` and `{func}` to cross-link to modules, +classes and functions respectively. For example the `` {func}`numpy.var` `` will +create a link like {func}`numpy.var`. + +## Chapter, section, subsection, paragraph + +Try to avoid to go below paragraph granularity or your document might become +difficult to read: + +```markdown +# Chapter title + +Sample content. + +## Section + +### Subsection + +#### Paragraph + +And some text. +``` + +## Admonitions + +:::{note} +This is a note +::: + +:::{warning} +This is a warning +::: + +## Clearing floats + +Figures positioned with `:align: right` are float. To flush them, use: + +```markdown +{{ clear_floats }} +``` + +## References + +[documentation style guide]: https://documentation-style-guide-sphinx.readthedocs.org/en/latest/style-guide.html +[tips, tricks]: https://docness.readthedocs.org/en/latest/index.html diff --git a/guide/index.rst b/guide/index.rst deleted file mode 100644 index 122f5d5c8..000000000 --- a/guide/index.rst +++ /dev/null @@ -1,212 +0,0 @@ -.. _guide: - -================= -How to contribute -================= - -**Author**: *Nicolas Rougier* - -.. topic:: Foreword - - Use the ``topic`` keyword for any forewords - - -.. contents:: Chapters contents - :local: - :depth: 1 - - -Make sure to read this `Documentation style guide`_ as well as these -`tips, tricks`_ and conventions about documentation content and workflows. - - -How to contribute ? -=================== - -* If you spot typos, unclear or clumsy wording in the lectures, please - help to improve them. Simple text editing can be done by `editing files - in your GitHub fork - `_ of - the lectures. On every html page of the lectures, an **edit** - button on the top right links to the editable source of the page (you still - need to create a fork of the project). Edit the source and choose - "Create a new branch for this commit and start a pull request". - -* Choose a topic that is not yet covered and write it up ! - - First create a new issue on GitHub to explain the topic which you would - like to cover, in order to discuss with editors and contributors about - the scope of the future tutorial. - - Then create a new directory inside one of the chapters directories - (``intro``, ``advanced``, or ``packages``) and create a file ``index.rst`` - for the new tutorial. Add the new file in the table of contents of the - corresponding chapter (in its ``index.rst``). - -Keep in mind that tutorials are to be taught at different places and -different parts may be combined into a course on Python for scientific -computing. Thus you want them to be interactive and reasonably short (one -to two hours). - -Last but not least, the goal of this material is to provide a concise -text to learn the main features of the scientific Python ecosystem. If -you want to contribute to reference material, we suggest that you -contribute to the documentation of the specific packages that you are -interested in. - -Using GitHub -============ - -The easiest way to make your own version of this teaching material -is to fork it under GitHub, and use the git version control system to -maintain your own fork. For this, all you have to do is create an account -on GitHub and click on the *fork* button, on the top right of `this -page `_. You can use git to pull from your *fork*, and push back to it the -changes. If you want to contribute the changes back, just fill a -*pull request*, using the button on the top of your fork's page. - -Several resources are available online to learn git and GitHub, such as -https://try.github.io for complete beginners. - -Please refrain from modifying the Makefile unless it is absolutely -necessary. - -Keeping it concise: collapsing paragraphs -=========================================== - -The HTML output is used for displaying on screen while teaching. The goal -is to have the same material displayed as in the notes. Thus there needs -to be a very concise display, with bullet-lists rather than full-blown -paragraphs and sentences. For more elaborate discussions that people can -read and refer to, please use the ``tip`` sphinx directive. It creates -collapsible paragraphs, that can be hidden during an oral -presentation:: - - .. tip:: - - Here insert a full-blown discussion, that will be collapsible in - the HTML version. - - It can span on multiple paragraphs - -This renders as: - - .. tip:: - - Here insert a full-blown discussion, that will be collapsible in - the HTML version. - - It can span on multiple paragraphs - -Figures and code examples -========================== - -**We do not check figures in the repository**. -Any figure must be generated from a python script that needs to be named -``plot_xxx.py`` (xxx can be anything of course) and put into the ``examples`` -directory. The generated image will be named from the script name. - -.. image:: auto_examples/images/sphx_glr_plot_simple_001.png - :target: auto_examples/plot_simple.html - - -This is the way to include your image and link it to the code: - -.. code-block:: rst - - .. image:: auto_examples/images/sphx_glr_plot_simple_001.png - :target: auto_examples/plot_simple.html - -You can display the corresponding code using the ``literal-include`` -directive. - -.. literalinclude:: examples/plot_simple.py - -.. note:: - - The transformation of Python scripts into figures and galleries of - examples is provided by the `sphinx-gallery - `_ package. - -Using Markup -============ - -There are three main kinds of markup that should be used: *italics*, **bold** -and ``fixed-font``. *Italics* should be used when introducing a new technical -term, **bold** should be used for emphasis and ``fixed-font`` for source code. - -.. topic:: Example: - - When using *object-oriented programming* in Python you **must** use the - ``class`` keyword to define your *classes*. - -In restructured-text markup this is:: - - when using *object-oriented programming* in Python you **must** use the - ``class`` keyword to define your *classes*. - - -Linking to package documentations -================================== - -The goal of the Scientific Python Lectures is not to duplicate or replace -the documentation of the various packages. You should link as much as -possible to the original documentation. - -For cross-referencing API documentation we prefer to use the `intersphinx -extension `_. This provides -the directives ``:mod:``, ``:class:`` and ``:func:`` to cross-link to modules, -classes and functions respectively. For example the ``:func:`numpy.var``` will -create a link like :func:`numpy.var`. - -Chapter, section, subsection, paragraph -======================================= - -Try to avoid to go below paragraph granularity or your document might become -difficult to read: - -.. code-block:: rst - - ============= - Chapter title - ============= - - Sample content. - - Section - ======= - - Subsection - ---------- - - Paragraph - ......... - - And some text. - - -Admonitions -============ - -.. note:: - - This is a note - -.. warning:: - - This is a warning - -Clearing floats -================ - -Figures positioned with `:align: right` are float. To flush them, use:: - - |clear-floats| - -References -========== - -.. target-notes:: - -.. _`Documentation style guide`: https://documentation-style-guide-sphinx.readthedocs.org/en/latest/style-guide.html -.. _`tips, tricks`: https://docness.readthedocs.org/en/latest/index.html diff --git a/images/sp_lectures.ico b/images/sp_lectures.ico new file mode 100644 index 000000000..69391bf77 Binary files /dev/null and b/images/sp_lectures.ico differ diff --git a/images/sp_lectures.png b/images/sp_lectures.png new file mode 100644 index 000000000..f6844c37c Binary files /dev/null and b/images/sp_lectures.png differ diff --git a/includes/big_toc_css.rst b/includes/big_toc_css.rst deleted file mode 100644 index 454e9ace1..000000000 --- a/includes/big_toc_css.rst +++ /dev/null @@ -1,43 +0,0 @@ -:orphan: - -.. - File to ..include in a document with a big table of content, to give - it 'style' - -.. raw:: html - - diff --git a/includes/bigger_toc_css.rst b/includes/bigger_toc_css.rst deleted file mode 100644 index 66563bd73..000000000 --- a/includes/bigger_toc_css.rst +++ /dev/null @@ -1,59 +0,0 @@ -:orphan: - -.. - File to ..include in a document with a very big table of content, to - give it 'style' - -.. raw:: html - - diff --git a/index.md b/index.md new file mode 100644 index 000000000..1259c2acb --- /dev/null +++ b/index.md @@ -0,0 +1,13 @@ +# Scientific Python Lectures + +## One document to learn numerics, science, and data with Python + +Tutorials on the scientific Python ecosystem: a quick introduction to +central tools and techniques. The different chapters each correspond +to a 1 to 2 hours course with increasing level of expertise, from +beginner to expert. + +Release: {{ release }} + +::: {tableofcontents} +::: diff --git a/index.rst b/index.rst deleted file mode 100644 index 21495e712..000000000 --- a/index.rst +++ /dev/null @@ -1,153 +0,0 @@ -Scientific Python Lectures -========================== - -.. only:: html - - One document to learn numerics, science, and data with Python - -------------------------------------------------------------- - -.. raw html to center the title - -.. raw:: html - - - -.. nice layout in the toc - -.. Icons from https://fonts.google.com/icons - -.. |pdf-icon| image:: images/icon-pdf.svg - :width: 1em - :class: vcenter - :alt: PDF icon - -.. |html-icon| image:: images/icon-archive.svg - :width: 1em - :class: vcenter - :alt: Archive icon - - -.. |github-icon| image:: images/icon-github.svg - :width: 1em - :class: vcenter - :alt: GitHub icon - - -.. only:: html - - .. sidebar:: Download - - |pdf-icon| `PDF, 2 pages per side <./_downloads/ScientificPythonLectures.pdf>`_ - - |pdf-icon| `PDF, 1 page per side <./_downloads/ScientificPythonLectures-simple.pdf>`_ - - |github-icon| `Source code (github) `_ - - - Tutorials on the scientific Python ecosystem: a quick introduction to - central tools and techniques. The different chapters each correspond - to a 1 to 2 hours course with increasing level of expertise, from - beginner to expert. - - Release: |release| - - .. rst-class:: preface - - .. toctree:: - :maxdepth: 2 - - preface.rst - -| - -.. rst-class:: tune - - .. toctree:: - :numbered: 4 - - intro/index.rst - advanced/index.rst - packages/index.rst - about.rst - -| - -.. - FIXME: I need the link below to make sure the banner gets copied to the - target directory. - -.. only:: html - - .. raw:: html - -
- - :download:`ScientificPythonLectures.pdf` :download:`ScientificPythonLectures-simple.pdf` - - .. image:: themes/plusBox.png - - .. image:: images/logo.svg - - .. raw:: html - -
- - - -.. - >>> # For doctest on headless environments (needs to happen early) - >>> import matplotlib - >>> matplotlib.use('Agg') diff --git a/intro/help/help.md b/intro/help/help.md new file mode 100644 index 000000000..7eafe34db --- /dev/null +++ b/intro/help/help.md @@ -0,0 +1,90 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(help)= + +# Getting help and finding documentation + +**Author**: _Emmanuelle Gouillart_ + +Rather than knowing all functions in NumPy and SciPy, it is important to +find information throughout the documentation and the available help. Here are +some ways to get information: + +## `help` in Jupyter and IPython + +In the Jupyter notebook, and in IPython terminals, one can use the `help` +function to see the docstring of any particular function. For example: + +```{code-cell} +import numpy as np + +help(np.around) +``` + +Jupyter and IPython also recognize `?` at the end of the function name as a request to the function docstring, so executing: + +```{code-cell} +np.around? +``` + +is equivalent to executing `help(around)`. + +You only need type the beginning of the function's name and use tab completion +to display the matching functions. For example, if you were interesting the `np.vander` function, you can type the Tab key after `np.van` to tab complete to the only function starting with `np.van` (`np.vander`). + +```{code-cell} +# Uncomment, and press Tab at the end of `np.van` to show tab completion. +# np.van +``` + +In the standard Ipython terminal, it is not possible to open a separate window +for help and documentation; however one can always open a second `Ipython` +shell just to display help and docstrings... + +## Online documentation + +Numpy's and Scipy's documentations can be browsed online on + and . The `search` button is quite +useful inside the reference documentation of the two packages. + +Tutorials on various topics as well as the complete API with all docstrings are found on this website. + +The SciPy Cookbook gives recipes on +many common problems frequently encountered, such as fitting data points, +solving ODE, etc. + +Matplotlib's website features a very nice +**gallery** with a large number of plots, each of them shows both the source +code and the resulting plot. This is very useful for learning by example. More +standard documentation is also available. + +## `psearch` + +Jupyter and IPython have a magic function `%psearch` to search for objects +matching patterns. This is useful if, for example, one does not know the exact +name of a function. + +```{code-cell} +%psearch np.diag* +``` + +## If all else fails + +If everything listed above fails (and Google doesn't have the answer)... don't +despair! There is a vibrant Scientific Python community. Scientific Python is +present on various platform. + +Packages like SciPy and NumPy also have their own channels. Have a look at +their respective websites to find out how to engage with users and +maintainers. diff --git a/intro/help/help.rst b/intro/help/help.rst deleted file mode 100644 index e3cdb2146..000000000 --- a/intro/help/help.rst +++ /dev/null @@ -1,72 +0,0 @@ -.. _help: - -Getting help and finding documentation -========================================= - -**Author**: *Emmanuelle Gouillart* - -Rather than knowing all functions in NumPy and SciPy, it is important to -find rapidly information throughout the documentation and the available -help. Here are some ways to get information: - -* In Ipython, ``help function`` opens the docstring of the function. Only - type the beginning of the function's name and use tab completion to - display the matching functions. - - .. ipython:: - - @verbatim - In [204]: help(np.van - - In [204]: help(np.vander) - -In Ipython it is not possible to open a separated window for help and -documentation; however one can always open a second ``Ipython`` shell -just to display help and docstrings... - -* Numpy's and Scipy's documentations can be browsed online on - https://scipy.org and https://numpy.org. The ``search`` button is quite - useful inside - the reference documentation of the two packages. - - Tutorials on various topics as well as the complete API with all - docstrings are found on this website. - -* Numpy's and Scipy's documentation is enriched and updated on a regular - basis by users on a wiki https://numpy.org/doc/stable/. As a result, - some docstrings are clearer or more detailed on the wiki, and you may - want to read directly the documentation on the wiki instead of the - official documentation website. Note that anyone can create an account on - the wiki and write better documentation; this is an easy way to - contribute to an open-source project and improve the tools you are - using! - -* The SciPy Cookbook https://scipy-cookbook.readthedocs.io gives recipes on many - common problems frequently encountered, such as fitting data points, - solving ODE, etc. - -* Matplotlib's website https://matplotlib.org/ features a very - nice **gallery** with a large number of plots, each of them shows both - the source code and the resulting plot. This is very useful for - learning by example. More standard documentation is also available. - - -* In Ipython, the magical function ``%psearch`` search for objects - matching patterns. This is useful if, for example, one does not know - the exact name of a function. - - - .. ipython:: - - In [3]: import numpy as np - In [4]: %psearch np.diag* - -* If everything listed above fails (and Google doesn't have the - answer)... don't despair! There is a vibrant Scientific Python community. - Scientific Python is present on various platform. - https://scientific-python.org/community/ - - - Packages like SciPy and NumPy also have their own channels. Have a look at - their respective websites to find out how to engage with users and - maintainers. diff --git a/intro/index.md b/intro/index.md new file mode 100644 index 000000000..e1f200121 --- /dev/null +++ b/intro/index.md @@ -0,0 +1,11 @@ +--- +orphan: true +--- + +# Introduction to getting started + +This part of the _Scientific Python Lectures_ is a self-contained +introduction to everything that is needed to use Python for science, +from the language itself, to numerical computing or plotting. + +See the "Getting started with Python for Science" section in the table of contents. diff --git a/intro/index.rst b/intro/index.rst deleted file mode 100644 index 42ab9d671..000000000 --- a/intro/index.rst +++ /dev/null @@ -1,23 +0,0 @@ -Getting started with Python for science -======================================= - -This part of the *Scientific Python Lectures* is a self-contained -introduction to everything that is needed to use Python for science, -from the language itself, to numerical computing or plotting. - -| - - -.. include:: ../includes/big_toc_css.rst - :start-line: 1 - -.. rst-class:: tune - - .. toctree:: - - intro.rst - language/python_language.rst - numpy/index.rst - matplotlib/index.rst - scipy/index.rst - help/help.rst diff --git a/intro/intro.md b/intro/intro.md new file mode 100644 index 000000000..7258705e2 --- /dev/null +++ b/intro/intro.md @@ -0,0 +1,452 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Python scientific computing ecosystem + +**Authors**: _Fernando Perez, Emmanuelle Gouillart, Gaël Varoquaux, +Valentin Haenel_ + +## Why Python? + +### The scientist's needs + +- Get data (simulation, experiment control), +- Manipulate and process data, +- Visualize results, quickly to understand, but also with high quality + figures, for reports or publications. + +### Python's strengths + +- **Batteries included** Rich collection of already existing **bricks** + of classic numerical methods, plotting or data processing tools. We + don't want to re-program the plotting of a curve, a Fourier transform + or a fitting algorithm. Don't reinvent the wheel! +- **Easy to learn** Most scientists are not paid as programmers, neither + have they been trained so. They need to be able to draw a curve, smooth + a signal, do a Fourier transform in a few minutes. +- **Easy communication** To keep code alive within a lab or a company + it should be as readable as a book by collaborators, students, or + maybe customers. Python syntax is simple, avoiding strange symbols or + lengthy routine specifications that would divert the reader from + mathematical or scientific understanding of the code. +- **Efficient code** Python numerical modules are computationally + efficient. But needless to say that a very fast code becomes useless if + too much time is spent writing it. Python aims for quick development + times and quick execution times. +- **Universal** Python is a language used for many different problems. + Learning Python avoids learning a new software for each new problem. + +### How does Python compare to other solutions? + +::: {list-table} **Compiled languages (C, C++, Fortran ...)** + +- - Pros + - Very fast. For heavy computations, it’s difficult to outperform these + languages. +- - Cons + - Painful usage: no interactivity during development, mandatory compilation + steps, verbose syntax, manual memory management. These are **difficult + languages** for non programmers. + +::: + +::: {list-table} **Matlab scripting language** + +- - Pros + - - Very rich collection of libraries with numerous algorithms, for many + different domains. Fast execution because these libraries are often + written in a compiled language. + - Pleasant development environment: comprehensive and help, integrated + editor, etc. + - Commercial support is available. +- - Cons + - - Base language is quite poor and can become restrictive for advanced + users. + - Not free and not everything is open sourced. + +::: + +::: {list-table} **Julia** + +- - Pros + - - Fast code, yet interactive and simple to read and write. + - Easily connects to Python or C. +- - Cons + - - Ecosystem limited to numerical computing. + - Still young. + +::: + +::: {list-table} **Other scripting languages:** Scilab, Octave, R, IDL, etc. + +- - Pros + - - Open-source, free, or at least cheaper than Matlab. + - Some features can be very advanced (statistics in R, etc.) +- - Cons + - - Fewer available algorithms than in Matlab, and the language is not more + advanced. + - Some software are dedicated to one domain. Ex: Gnuplot to draw curves. + These programs are very powerful, but they are restricted to a single + type of usage, such as plotting. + +::: + +::: {list-table} **Python** + +- - Pros + - - Very rich scientific computing libraries + - Well thought out language, allowing to write very readable and well + structured code: we “code what we think”. + - Many libraries beyond scientific computing (web server, serial port + access, etc.) + - Free and open-source software, widely spread, with a vibrant community. + - A variety of powerful environments to work in, such as IPython, Spyder, + Jupyter notebooks, Pycharm, Visual Studio Code | +- - Cons + - - Not all the algorithms that can be found in more specialized software or + toolboxes. + +::: + ++++ + +### The scientific Python ecosystem + +Unlike Matlab, or R, Python does not come with a pre-bundled set +of modules for scientific computing. Below are the basic building blocks +that can be combined to obtain a scientific computing environment: + +**Python**, a generic and modern computing language + +- The language: flow control, data types (`string`, `int`), + data collections (lists, dictionaries), etc. +- Modules of the standard library: string processing, file + management, simple network protocols. +- A large number of specialized modules or applications written in + Python: web framework, etc. ... and scientific + computing. +- Development tools (automatic testing, documentation generation) + +:::{admonition} See also + +[Chapter on Python language](python-language-chapter) +::: + +**Core numeric libraries** + +- **NumPy**: numerical computing with powerful **numerical arrays** + objects, and routines to manipulate them. + + :::{seealso} + {ref}`Chapter on Numpy ` + ::: + +- **SciPy** : high-level numerical routines. + Optimization, regression, interpolation, etc + + :::{seealso} + {ref}`Chapter on SciPy ` + ::: + +- **Matplotlib** : 2-D visualization, "publication-ready" plots + + + :::{seealso} + {ref}`Chapter on Matplotlib ` + ::: + +**Advanced interactive environments**: + +- **IPython**, an advanced **Python console** +- **Jupyter**, **notebooks** in the browser + +**Domain-specific packages**, + +- **pandas, statsmodels, seaborn** for {ref}`statistics ` +- **sympy** for {ref}`symbolic computing ` +- **scikit-image** for {ref}`image processing ` +- **scikit-learn** for {ref}`machine learning ` + +and many more packages not documented in the Scientific Python Lectures. + +:::{admonition} See also + +- [Chapters on advanced topics](advanced-topics-part) +- [Chapters on packages and applications](applications-part) + ::: + +{{ clear_floats }} + +```{code-cell} +:tags: [hide-input] + +import numpy as np +``` + +## Before starting: Installing a working environment + +Python comes in many flavors, and there are many ways to install it. +However, we recommend to install a scientific-computing distribution, +that comes readily with optimized versions of scientific modules. + +**Under Linux** + +If you have a recent distribution, most of the tools are probably +packaged, and it is recommended to use your package manager. + +**Other systems** + +There are several fully-featured scientific Python distributions: + +- [Anaconda](https://www.anaconda.com/download) +- [WinPython](https://winpython.github.io) + ++++ + +## The workflow: interactive environments and text editors + +**Interactive work to test and understand algorithms:** In this section, we +describe a workflow combining interactive work and consolidation. + +Python is a general-purpose language. As such, there is not one blessed +environment to work in, and not only one way of using it. Although +this makes it harder for beginners to find their way, it makes it +possible for Python to be used for programs, in web servers, or +embedded devices. + +(interactive-work)= + +### Interactive work + +We recommend an interactive work with the [IPython](https://ipython.org) console, or its offspring, the [Jupyter notebook](https://docs.jupyter.org/en/latest/content-quickstart.html). They +are handy to explore and understand algorithms. + +:::{sidebar} Under the notebook +To execute code, press "shift enter" +::: + +Start `ipython`: + +```ipython +In [2]: print('Hello world') +Hello world +``` + +Getting help by using the **?** operator after an object: + +```ipython +In [3]: print? +Signature: print(*args, sep=' ', end='\n', file=None, flush=False) +Docstring: +Prints the values to a stream, or to sys.stdout by default. + +sep + string inserted between values, default a space. +end + string appended after the last value, default a newline. +file + a file-like object (stream); defaults to the current sys.stdout. +flush + whether to forcibly flush the stream. +Type: builtin_function_or_method +``` + +:::{admonition} See also + +- IPython user manual: +- Jupyter Notebook QuickStart: + + ::: + +### Elaboration of the work in an editor + +As you move forward, it will be important to not only work interactively, +but also to create and reuse Python files. For this, a powerful code editor +will get you far. Here are several good easy-to-use editors: + +- [Spyder](https://www.spyder-ide.org/): integrates an IPython + console, a debugger, a profiler... +- [PyCharm](https://www.jetbrains.com/pycharm): integrates an IPython + console, notebooks, a debugger... (freely available, + but commercial) +- [Visual Studio Code](https://code.visualstudio.com/docs/languages/python): + integrates a Python console, notebooks, a debugger, ... + +Some of these are shipped by the various scientific Python distributions, +and you can find them in the menus. + +As an exercise, create a file `my_file.py` in a code editor, and add the +following lines: + +```python +s = 'Hello world' +print(s) +``` + +Now, you can run it in IPython console or a notebook and explore the +resulting variables: + +```ipython +In [1]: %run my_file.py +Hello world + +In [2]: %whos +Variable Type Data/Info +---------------------------- +s str Hello world +``` + +:::{admonition} From a script to functions +While it is tempting to work only with scripts, that is a file full +of instructions following each other, do plan to progressively evolve +the script to a set of functions: + +- A script is not reusable, functions are. +- Thinking in terms of functions helps breaking the problem in small + blocks. + ::: + +### IPython and Jupyter Tips and Tricks + +The user manuals contain a wealth of information. Here we give a quick +introduction to four useful features: _history_, _tab completion_, _magic +functions_, and _aliases_. + +#### Command history + +Like a UNIX shell, the IPython console supports command history. Type the _up_ +and _down_ cursor keys to navigate previously typed commands: + +```ipython +In [3]: x = 10 + +In [4]: + +In [4]: x = 10 +``` + +#### Tab completion + +Tab completion is a convenient way to explore the structure of any object +you’re dealing with. Simply type `object_name.`\ to view the object’s +attributes. Besides Python objects and keywords, tab completion also works on +file and directory names.\* + +```ipython +In [5]: x = 10 + +In [6]: x. + as_integer_ratio() conjugate() imag to_bytes() + bit_count() denominator numerator + bit_length() from_bytes() real +``` + +#### Magic functions + +The console and the notebooks support so-called _magic_ functions by prefixing +a command with the `%` character. For example, the `run` and `whos` functions +from the previous section are magic functions. Note that, the setting +`automagic`, which is enabled by default, allows you to omit the preceding `%` +sign. Thus, you can just type the magic function and it will work. + +Other useful magic functions are: + +**`%cd` to change the current directory** + +```ipython +In [1]: cd /tmp +/tmp +``` + +**`%cpaste`** allows you to paste code, especially code from websites which has +been prefixed with the standard Python prompt (e.g. `>>>`) or with an ipython +prompt, (e.g. `in [3]`): + +```ipython +In [2]: %cpaste +Pasting code; enter '--' alone on the line to stop or use Ctrl-D. +:>>> for i in range(3): +:... print(i) +:-- +0 +1 +2 +``` + +**`%timeit`** allows you to time the execution of short snippets using the +`timeit` module from the standard library: + +```ipython +In [3]: %timeit x = 10 +10000000 loops, best of 3: 39 ns per loop +``` + +:::{seealso} +{ref}`Chapter on optimizing code ` +::: + +**`%debug`** allows you to enter post-mortem debugging. That is to say, if the +code you try to execute, raises an exception, using `%debug` will enter the +debugger at the point where the exception was thrown. For example, consider the following code. + +```{code-cell} +def func(a, b): + c = a * 3 + d = b * 20 + return c / d + +func(2, 3) +``` + +All good, but now you try: + +```{code-cell} +:tags: [raises-exception] + +func(3, 0) +``` + +You run the code and see the error, but perhaps you want to go in and have +a look at what the values of `c` and `d` are at the time of the error. + +You can next type the `%debug` magic to enter the debugger, where you can print out values inside the function, before exiting the debugger with `q` followed by Return. + +```ipython +In [4]: %debug +> /var/folders/hd/rfxyn9gx4bl39bvwzrgn3rtr0000gn/T/ipykernel_62633/2015602957.py(2)func() + 1 def func(a, b): +----> 2 return a / b + 3 + 4 func(10, 0) + +ipdb> +``` + +(where `ipnb` is the debugger prompt). + +:::{seealso} +{ref}`Chapter on debugging ` +::: + +#### Aliases + +IPython and Jupyter ship with various _aliases_ which emulate common UNIX +command line tools such as `ls` to list files, `cp` to copy files and `rm` to +remove files (a full list of aliases is shown when typing `alias`). + +:::{admonition} Getting help + +- The built-in cheat-sheet is accessible via the `%quickref` magic + function. +- A list of all available magic functions is shown when typing `%magic`. + ::: diff --git a/intro/intro.rst b/intro/intro.rst deleted file mode 100644 index 8f09cf2bd..000000000 --- a/intro/intro.rst +++ /dev/null @@ -1,472 +0,0 @@ -Python scientific computing ecosystem -====================================== - -**Authors**: *Fernando Perez, Emmanuelle Gouillart, Gaël Varoquaux, -Valentin Haenel* - -Why Python? ------------- - -The scientist's needs -....................... - -* Get data (simulation, experiment control), - -* Manipulate and process data, - -* Visualize results, quickly to understand, but also with high quality - figures, for reports or publications. - -Python's strengths -.................. - -* **Batteries included** Rich collection of already existing **bricks** - of classic numerical methods, plotting or data processing tools. We - don't want to re-program the plotting of a curve, a Fourier transform - or a fitting algorithm. Don't reinvent the wheel! - -* **Easy to learn** Most scientists are not paid as programmers, neither - have they been trained so. They need to be able to draw a curve, smooth - a signal, do a Fourier transform in a few minutes. - -* **Easy communication** To keep code alive within a lab or a company - it should be as readable as a book by collaborators, students, or - maybe customers. Python syntax is simple, avoiding strange symbols or - lengthy routine specifications that would divert the reader from - mathematical or scientific understanding of the code. - -* **Efficient code** Python numerical modules are computationally - efficient. But needless to say that a very fast code becomes useless if - too much time is spent writing it. Python aims for quick development - times and quick execution times. - -* **Universal** Python is a language used for many different problems. - Learning Python avoids learning a new software for each new problem. - -How does Python compare to other solutions? -............................................ - -Compiled languages: C, C++, Fortran... -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:Pros: - - * Very fast. For heavy computations, it's difficult to outperform these - languages. - -:Cons: - - * Painful usage: no interactivity during development, mandatory - compilation steps, verbose syntax, manual memory management. These - are **difficult languages** for non programmers. - -Matlab scripting language -~~~~~~~~~~~~~~~~~~~~~~~~~ - -:Pros: - - * Very rich collection of libraries with numerous algorithms, for many - different domains. Fast execution because these libraries are often written - in a compiled language. - - * Pleasant development environment: comprehensive and help, integrated - editor, etc. - - * Commercial support is available. - -:Cons: - - * Base language is quite poor and can become restrictive for advanced users. - - * Not free and not everything is open sourced. - -Julia -~~~~~~~ - -:Pros: - - * Fast code, yet interactive and simple. - - * Easily connects to Python or C. - -:Cons: - - * Ecosystem limited to numerical computing. - - * Still young. - -Other scripting languages: Scilab, Octave, R, IDL, etc. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:Pros: - - * Open-source, free, or at least cheaper than Matlab. - - * Some features can be very advanced (statistics in R, etc.) - -:Cons: - - * Fewer available algorithms than in Matlab, and the language - is not more advanced. - - * Some software are dedicated to one domain. Ex: Gnuplot to draw - curves. These programs are very powerful, but they are restricted to - a single type of usage, such as plotting. - -Python -~~~~~~ - -:Pros: - - * Very rich scientific computing libraries - - * Well thought out language, allowing to write very readable and well - structured code: we "code what we think". - - * Many libraries beyond scientific computing (web server, - serial port access, etc.) - - * Free and open-source software, widely spread, with a vibrant community. - - * A variety of powerful environments to work in, such as - `IPython `__, - `Spyder `__, - `Jupyter notebooks `__, - `Pycharm `__, - `Visual Studio Code `__ - -:Cons: - - * Not all the algorithms that can be found in more specialized - software or toolboxes. - -The scientific Python ecosystem -------------------------------- - -Unlike Matlab, or R, Python does not come with a pre-bundled set -of modules for scientific computing. Below are the basic building blocks -that can be combined to obtain a scientific computing environment: - -| - -**Python**, a generic and modern computing language - -* The language: flow control, data types (``string``, ``int``), - data collections (lists, dictionaries), etc. - -* Modules of the standard library: string processing, file - management, simple network protocols. - -* A large number of specialized modules or applications written in - Python: web framework, etc. ... and scientific - computing. - -* Development tools (automatic testing, documentation generation) - -.. seealso:: - - :ref:`chapter on Python language ` - -**Core numeric libraries** - -* **NumPy**: numerical computing with powerful **numerical arrays** - objects, and routines to manipulate them. https://numpy.org/ - - .. seealso:: - - :ref:`chapter on numpy ` - -* **SciPy** : high-level numerical routines. - Optimization, regression, interpolation, etc https://scipy.org/ - - .. seealso:: - - :ref:`chapter on SciPy ` - -* **Matplotlib** : 2-D visualization, "publication-ready" plots - https://matplotlib.org/ - - .. seealso:: - - :ref:`chapter on matplotlib ` - -**Advanced interactive environments**: - -* **IPython**, an advanced **Python console** https://ipython.org/ - -* **Jupyter**, **notebooks** in the browser https://jupyter.org/ - - -**Domain-specific packages**, - -* **pandas, statsmodels, seaborn** for :ref:`statistics ` - -* **sympy** for :ref:`symbolic computing ` - -* **scikit-image** for :ref:`image processing ` - -* **scikit-learn** for :ref:`machine learning ` - -and many more packages not documented in the Scientific Python Lectures. - -.. seealso:: - - :ref:`chapters on advanced topics ` - - :ref:`chapters on packages and applications ` - -|clear-floats| - -.. - >>> import numpy as np - - -Before starting: Installing a working environment --------------------------------------------------- -Python comes in many flavors, and there are many ways to install it. -However, we recommend to install a scientific-computing distribution, -that comes readily with optimized versions of scientific modules. - -**Under Linux** - -If you have a recent distribution, most of the tools are probably -packaged, and it is recommended to use your package manager. - -**Other systems** - -There are several fully-featured scientific Python distributions: - - -.. rst-class:: horizontal - - * `Anaconda `_ - * `WinPython `_ - - -The workflow: interactive environments and text editors ----------------------------------------------------------- - -**Interactive work to test and understand algorithms:** In this section, we -describe a workflow combining interactive work and consolidation. - -Python is a general-purpose language. As such, there is not one blessed -environment to work in, and not only one way of using it. Although -this makes it harder for beginners to find their way, it makes it -possible for Python to be used for programs, in web servers, or -embedded devices. - -.. _interactive_work: - -Interactive work -................. - -We recommend an interactive work with the `IPython -`__ console, or its offspring, the `Jupyter notebook -`_. They -are handy to explore and understand algorithms. - -.. sidebar:: Under the notebook - - To execute code, press "shift enter" - -Start `ipython`: - -.. ipython:: - :verbatim: - - In [1]: print('Hello world') - Hello world - -Getting help by using the **?** operator after an object: - -.. ipython:: - - In [1]: print? - -.. seealso:: - - * IPython user manual: https://ipython.readthedocs.io/en/stable/ - - * Jupyter Notebook QuickStart: - https://docs.jupyter.org/en/latest/start/index.html - -Elaboration of the work in an editor -.......................................... - -As you move forward, it will be important to not only work interactively, -but also to create and reuse Python files. For this, a powerful code editor -will get you far. Here are several good easy-to-use editors: - - * `Spyder `_: integrates an IPython - console, a debugger, a profiler... - * `PyCharm `_: integrates an IPython - console, notebooks, a debugger... (freely available, - but commercial) - * `Visual Studio Code `_: - integrates a Python console, notebooks, a debugger, ... - -Some of these are shipped by the various scientific Python distributions, -and you can find them in the menus. - - -As an exercise, create a file `my_file.py` in a code editor, and add the -following lines:: - - s = 'Hello world' - print(s) - -Now, you can run it in IPython console or a notebook and explore the -resulting variables: - -.. ipython:: - - @suppress - In [1]: s = 'Hello world' - - @verbatim - In [1]: %run my_file.py - Hello world - - @doctest - In [2]: s - Out[2]: 'Hello world' - - @verbatim - In [3]: %whos - Variable Type Data/Info - ---------------------------- - s str Hello world - - -.. topic:: **From a script to functions** - - While it is tempting to work only with scripts, that is a file full - of instructions following each other, do plan to progressively evolve - the script to a set of functions: - - * A script is not reusable, functions are. - - * Thinking in terms of functions helps breaking the problem in small - blocks. - - -IPython and Jupyter Tips and Tricks -.................................... - -The user manuals contain a wealth of information. Here we give a quick -introduction to four useful features: *history*, *tab completion*, *magic -functions*, and *aliases*. - -| - -**Command history** Like a UNIX shell, the IPython console supports -command history. Type *up* and *down* to navigate previously typed -commands: - -.. ipython:: - - In [1]: x = 10 - - @verbatim - In [2]: - - In [2]: x = 10 - -| - -**Tab completion** Tab completion, is a convenient way to explore the -structure of any object you’re dealing with. Simply type object_name. to -view the object’s attributes. Besides Python objects and keywords, tab -completion also works on file and directory names.* - -.. ipython:: - - In [1]: x = 10 - - @verbatim - In [2]: x. - as_integer_ratio() conjugate() imag to_bytes() - bit_count() denominator numerator - bit_length() from_bytes() real - -| - -**Magic functions** -The console and the notebooks support so-called *magic* functions by prefixing a command with the -``%`` character. For example, the ``run`` and ``whos`` functions from the -previous section are magic functions. Note that, the setting ``automagic``, -which is enabled by default, allows you to omit the preceding ``%`` sign. Thus, -you can just type the magic function and it will work. - -Other useful magic functions are: - -* ``%cd`` to change the current directory. - - .. ipython:: - - In [1]: cd /tmp - /tmp - -* ``%cpaste`` allows you to paste code, especially code from websites which has - been prefixed with the standard Python prompt (e.g. ``>>>``) or with an ipython - prompt, (e.g. ``in [3]``): - - .. ipython:: - - In [2]: %cpaste - Pasting code; enter '--' alone on the line to stop or use Ctrl-D. - :>>> for i in range(3): - :... print(i) - :-- - 0 - 1 - 2 - -* ``%timeit`` allows you to time the execution of short snippets using the - ``timeit`` module from the standard library: - - .. ipython:: - - In [3]: %timeit x = 10 - 10000000 loops, best of 3: 39 ns per loop - - .. seealso:: :ref:`Chapter on optimizing code ` - -* ``%debug`` allows you to enter post-mortem debugging. That is to say, if the - code you try to execute, raises an exception, using ``%debug`` will enter the - debugger at the point where the exception was thrown. - - .. ipython:: - :okexcept: - - In [4]: x === 10 - - @verbatim - In [5]: %debug - > /home/jarrod/.venv/lectures/lib64/python3.11/site-packages/IPython/core/compilerop.py(86)ast_parse() - 84 Arguments are exactly the same as ast.parse (in the standard library), - 85 and are passed to the built-in compile function.""" - ---> 86 return compile(source, filename, symbol, self.flags | PyCF_ONLY_AST, 1) - 87 - 88 def reset_compiler_flags(self): - ipdb> locals() - {'self': , 'source': 'x === 10\n', 'filename': '', 'symbol': 'exec'} - ipdb> - - .. seealso:: :ref:`Chapter on debugging ` - -| - -**Aliases** -Furthermore IPython ships with various *aliases* which emulate common UNIX -command line tools such as ``ls`` to list files, ``cp`` to copy files and ``rm`` to -remove files (a full list of aliases is shown when typing ``alias``). - -.. topic:: **Getting help** - - * The built-in cheat-sheet is accessible via the ``%quickref`` magic - function. - - * A list of all available magic functions is shown when typing ``%magic``. - -.. :vim:spell: diff --git a/intro/language/basic_types.md b/intro/language/basic_types.md new file mode 100644 index 000000000..02cadd734 --- /dev/null +++ b/intro/language/basic_types.md @@ -0,0 +1,584 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Basic types + +## Numerical types + +::: {note} +:class: dropdown + +Python supports the following numerical, scalar types: +::: + ++++ + +**Floats:** + +```{code-cell} +c = 2.1 +type(c) +``` + +**Complex:** + +```{code-cell} +a = 1.5 + 0.5j +a.real +``` + +```{code-cell} +a.imag +``` + +```{code-cell} +type(1. + 0j) +``` + +**Booleans:** + +```{code-cell} +3 > 4 +``` + +```{code-cell} +test = (3 > 4) +test +``` + +```{code-cell} +type(test) +``` + +::: {note} +:class: dropdown + +A Python shell can therefore replace your pocket calculator, with the +basic arithmetic operations `+`, `-`, `*`, `/`, `%` (modulo) +natively implemented +::: + +```{code-cell} +7 * 3. +``` + +```{code-cell} +2**10 +``` + +```{code-cell} +8 % 3 +``` + +Type conversion (casting): + +```{code-cell} +float(1) +``` + +## Containers + +::: {note} +:class: dropdown + +Python provides many efficient types of containers, in which +collections of objects can be stored. +::: + +### Lists + +::: {note} +:class: dropdown + +A list is an ordered collection of objects, that may have different +types. For example: +::: + +```{code-cell} +colors = ['red', 'blue', 'green', 'black', 'white'] +type(colors) +``` + +Indexing: accessing individual objects contained in the list: + +```{code-cell} +colors[2] +``` + +Counting from the end with negative indices: + +```{code-cell} +colors[-1] +``` + +```{code-cell} +colors[-2] +``` + +:::{warning} +**Indexing starts at 0** (as in C), not at 1 (as in Fortran or Matlab)! +::: + +Slicing: obtaining sublists of regularly-spaced elements: + +```{code-cell} +colors +``` + +```{code-cell} +colors[2:4] +``` + +:::{warning} +Note that `colors[start:stop]` contains the elements with indices `i` +such as `start<= i < stop` (`i` ranging from `start` to +`stop-1`). Therefore, `colors[start:stop]` has `(stop - start)` elements. +::: + +**Slicing syntax**: `colors[start:stop:stride]` + +::: {note} +:class: dropdown + +All slicing parameters are optional: + +```{code-cell} +colors +``` + +```{code-cell} +colors[3:] +``` + +```{code-cell} +colors[:3] +``` + +```{code-cell} +colors[::2] +``` + +::: + +Lists are _mutable_ objects and can be modified: + +```{code-cell} +colors[0] = 'yellow' +colors +``` + +```{code-cell} +colors[2:4] = ['gray', 'purple'] +colors +``` + +::::{note} +The elements of a list may have different types: + +```{code-cell} +colors = [3, -200, 'hello'] +colors +``` + +```{code-cell} +colors[1], colors[2] +``` + +::: {note} +:class: dropdown + +For collections of numerical data that all have the same type, it +is often **more efficient** to use the `array` type provided by +the `numpy` module. A NumPy array is a chunk of memory +containing fixed-sized items. With NumPy arrays, operations on +elements can be faster because elements are regularly spaced in +memory and more operations are performed through specialized C +functions instead of Python loops. +::: +:::: + +::: {note} +:class: dropdown + +Python offers a large panel of functions to modify lists, or query +them. Here are a few examples; for more details, see + +::: + +Add and remove elements: + +```{code-cell} +colors = ['red', 'blue', 'green', 'black', 'white'] +colors.append('pink') +colors +``` + +```{code-cell} +colors.pop() # removes and returns the last item +``` + +```{code-cell} +colors +``` + +```{code-cell} +colors.extend(['pink', 'purple']) # extend colors, in-place +colors +``` + +```{code-cell} +colors = colors[:-2] +colors +``` + +Reverse: + +```{code-cell} +rcolors = colors[::-1] +rcolors +``` + +```{code-cell} +rcolors2 = list(colors) # new object that is a copy of colors in a different memory area +rcolors2 +``` + +```{code-cell} +rcolors2.reverse() # in-place; reversing rcolors2 does not affect colors +rcolors2 +``` + +Concatenate and repeat lists: + +```{code-cell} +rcolors + colors +``` + +```{code-cell} +rcolors * 2 +``` + +**Sort:** + +```{code-cell} +sorted(rcolors) # new object +``` + +```{code-cell} +rcolors +``` + +```{code-cell} +rcolors.sort() # in-place +rcolors +``` + +:::{admonition} Methods and Object-Oriented Programming +The notation `rcolors.method()` (e.g. `rcolors.append(3)` and `colors.pop()`) is our +first example of object-oriented programming (OOP). Being a `list`, the +object `rcolors` owns the _method_ `function` that is called using the notation +**.**. No further knowledge of OOP than understanding the notation **.** is +necessary for going through this tutorial. +::: + +:::{admonition} Discovering methods: +Reminder: in Ipython: tab-completion (press tab) + +```python + + +rcolors. + append() count() insert() reverse() + clear() extend() pop() sort() + copy() index() remove() +``` + +::: + +### Strings + +Different string syntaxes (simple, double or triple quotes): + +```{code-cell} +s = 'Hello, how are you?' +s = "Hi, what's up" +s = '''Hello, + how are you''' # tripling the quotes allows the + # string to span more than one line +s = """Hi, +what's up?""" +``` + +However, if you try to run this code: + +```text +'Hi, what's up?' +``` + +— you will get a syntax error. (Try it.) (Why?) + +This syntax error can be avoided by enclosing the string in double quotes +instead of single quotes. Alternatively, one can prepend a backslash to the +second single quote. Other uses of the backslash are, e.g., the newline +character `\n` and the tab character `\t`. + +::: {note} +:class: dropdown + +Strings are collections like lists. Hence they can be indexed and +sliced, using the same syntax and rules. +::: + +Indexing: + +```{code-cell} +a = "hello" +a[0] +``` + +```{code-cell} +a[1] +``` + +```{code-cell} +a[-1] +``` + +::: {note} +:class: dropdown + +(Remember that negative indices correspond to counting from the right +end.) +::: + +Slicing: + +```{code-cell} +a = "hello, world!" +a[3:6] # 3rd to 6th (excluded) elements: elements 3, 4, 5 +``` + +```{code-cell} +a[2:10:2] # Syntax: a[start:stop:step] +``` + +```{code-cell} +a[::3] # every three characters, from beginning to end +``` + +::: {note} +:class: dropdown + +Accents and special characters can also be handled as in Python 3 +strings consist of Unicode characters. +::: + +A string is an **immutable object** and it is not possible to modify its +contents. One may however create new strings from the original one. + +```{code-cell} +:tags: [raises-exception] + +a = "hello, world!" +a[2] = 'z' +``` + +```{code-cell} +a.replace('l', 'z', 1) +``` + +```{code-cell} +a.replace('l', 'z') +``` + +::: {note} +:class: dropdown + +Strings have many useful methods, such as `a.replace` as seen +above. Remember the `a.` object-oriented notation and use tab +completion or `help(str)` to search for new methods. +::: + +:::{admonition} See also + +Python offers advanced possibilities for manipulating strings, +looking for patterns or formatting. The interested reader is referred to + and + +::: + +String formatting: + +```{code-cell} +'An integer: %i; a float: %f; another string: %s' % (1, 0.1, 'string') # with more values use tuple after % +``` + +```{code-cell} +i = 102 +filename = 'processing_of_dataset_%d.txt' % i # no need for tuples with just one value after % +filename +``` + +### Dictionaries + +::: {note} +:class: dropdown + +A dictionary is basically an efficient table that **maps keys to +values**. +::: + +```{code-cell} +tel = {'emmanuelle': 5752, 'sebastian': 5578} +tel['francis'] = 5915 +tel +``` + +```{code-cell} +tel['sebastian'] +``` + +```{code-cell} +tel.keys() +``` + +```{code-cell} +tel.values() +``` + +```{code-cell} +'francis' in tel +``` + +::: {note} +:class: dropdown + +It can be used to conveniently store and retrieve values +associated with a name (a string for a date, a name, etc.). See + +for more information. + +A dictionary can have keys (resp. values) with different types: + +```{code-cell} +d = {'a':1, 'b':2, 3:'hello'} +d +``` + +::: + +### More container types + +**Tuples** + +Tuples are basically immutable lists. The elements of a tuple are written +between parentheses, or just separated by commas: + +```{code-cell} +t = 12345, 54321, 'hello!' +t[0] +``` + +```{code-cell} +t +u = (0, 2) +``` + +**Sets:** unordered, unique items: + +```{code-cell} +s = set(('a', 'b', 'c', 'a')) +s +``` + +```{code-cell} +s.difference(('a', 'b')) +``` + +## Assignment operator + +::: {note} +:class: dropdown + +[Python library reference](https://docs.python.org/3/reference/simple_stmts.html#assignment-statements) +says: + +> Assignment statements are used to (re)bind names to values and to +> modify attributes or items of mutable objects. + +In short, it works as follows (simple assignment): + +1. an expression on the right hand side is evaluated, the corresponding + object is created/obtained +2. a **name** on the left hand side is assigned, or bound, to the + r.h.s. object + ::: + +Things to note: + +- A single object can have several names bound to it: + +```{code-cell} +a = [1, 2, 3] +b = a +a +``` + +```{code-cell} +b +``` + +```{code-cell} +a is b +``` + +```{code-cell} +b[1] = 'hi!' +a +``` + +- to change a list _in place_, use indexing/slices: + +```{code-cell} +a = [1, 2, 3] +a +``` + +```{code-cell} +a = ['a', 'b', 'c'] # Creates another object. +a +``` + +```{code-cell} +id(a) +``` + +```{code-cell} +a[:] = [1, 2, 3] # Modifies object in place. +a +``` + +```{code-cell} +id(a) +``` + +- the key concept here is **mutable vs. immutable** + + - mutable objects can be changed in place + - immutable objects cannot be modified once created + +:::{admonition} See also + +A very good and detailed explanation of the above issues can +be found in David M. Beazley's article [Types and Objects in Python](https://www.informit.com/articles/article.aspx?p=453682). +::: diff --git a/intro/language/basic_types.rst b/intro/language/basic_types.rst deleted file mode 100644 index 8af186cb4..000000000 --- a/intro/language/basic_types.rst +++ /dev/null @@ -1,472 +0,0 @@ -Basic types -============ - -Numerical types ----------------- - -.. tip:: - - Python supports the following numerical, scalar types: - -:Integer: - - >>> 1 + 1 - 2 - >>> a = 4 - >>> type(a) - - -:Floats: - - >>> c = 2.1 - >>> type(c) - - -:Complex: - - >>> a = 1.5 + 0.5j - >>> a.real - 1.5 - >>> a.imag - 0.5 - >>> type(1. + 0j) - - -:Booleans: - - >>> 3 > 4 - False - >>> test = (3 > 4) - >>> test - False - >>> type(test) - - -.. tip:: - - A Python shell can therefore replace your pocket calculator, with the - basic arithmetic operations ``+``, ``-``, ``*``, ``/``, ``%`` (modulo) - natively implemented - -:: - - >>> 7 * 3. - 21.0 - >>> 2**10 - 1024 - >>> 8 % 3 - 2 - -Type conversion (casting):: - - >>> float(1) - 1.0 - - -Containers ------------- - -.. tip:: - - Python provides many efficient types of containers, in which - collections of objects can be stored. - -Lists -~~~~~ - -.. tip:: - - A list is an ordered collection of objects, that may have different - types. For example: - -:: - - >>> colors = ['red', 'blue', 'green', 'black', 'white'] - >>> type(colors) - - -Indexing: accessing individual objects contained in the list:: - - >>> colors[2] - 'green' - -Counting from the end with negative indices:: - - >>> colors[-1] - 'white' - >>> colors[-2] - 'black' - -.. warning:: - - **Indexing starts at 0** (as in C), not at 1 (as in Fortran or Matlab)! - -Slicing: obtaining sublists of regularly-spaced elements:: - - >>> colors - ['red', 'blue', 'green', 'black', 'white'] - >>> colors[2:4] - ['green', 'black'] - -.. Warning:: - - Note that ``colors[start:stop]`` contains the elements with indices ``i`` - such as ``start<= i < stop`` (``i`` ranging from ``start`` to - ``stop-1``). Therefore, ``colors[start:stop]`` has ``(stop - start)`` elements. - -**Slicing syntax**: ``colors[start:stop:stride]`` - -.. tip:: - - All slicing parameters are optional:: - - >>> colors - ['red', 'blue', 'green', 'black', 'white'] - >>> colors[3:] - ['black', 'white'] - >>> colors[:3] - ['red', 'blue', 'green'] - >>> colors[::2] - ['red', 'green', 'white'] - -Lists are *mutable* objects and can be modified:: - - >>> colors[0] = 'yellow' - >>> colors - ['yellow', 'blue', 'green', 'black', 'white'] - >>> colors[2:4] = ['gray', 'purple'] - >>> colors - ['yellow', 'blue', 'gray', 'purple', 'white'] - -.. Note:: - - The elements of a list may have different types:: - - >>> colors = [3, -200, 'hello'] - >>> colors - [3, -200, 'hello'] - >>> colors[1], colors[2] - (-200, 'hello') - - .. tip:: - - For collections of numerical data that all have the same type, it - is often **more efficient** to use the ``array`` type provided by - the ``numpy`` module. A NumPy array is a chunk of memory - containing fixed-sized items. With NumPy arrays, operations on - elements can be faster because elements are regularly spaced in - memory and more operations are performed through specialized C - functions instead of Python loops. - - -.. tip:: - - Python offers a large panel of functions to modify lists, or query - them. Here are a few examples; for more details, see - https://docs.python.org/3/tutorial/datastructures.html#more-on-lists - -Add and remove elements:: - - >>> colors = ['red', 'blue', 'green', 'black', 'white'] - >>> colors.append('pink') - >>> colors - ['red', 'blue', 'green', 'black', 'white', 'pink'] - >>> colors.pop() # removes and returns the last item - 'pink' - >>> colors - ['red', 'blue', 'green', 'black', 'white'] - >>> colors.extend(['pink', 'purple']) # extend colors, in-place - >>> colors - ['red', 'blue', 'green', 'black', 'white', 'pink', 'purple'] - >>> colors = colors[:-2] - >>> colors - ['red', 'blue', 'green', 'black', 'white'] - -Reverse:: - - >>> rcolors = colors[::-1] - >>> rcolors - ['white', 'black', 'green', 'blue', 'red'] - >>> rcolors2 = list(colors) # new object that is a copy of colors in a different memory area - >>> rcolors2 - ['red', 'blue', 'green', 'black', 'white'] - >>> rcolors2.reverse() # in-place; reversing rcolors2 does not affect colors - >>> rcolors2 - ['white', 'black', 'green', 'blue', 'red'] - -Concatenate and repeat lists:: - - >>> rcolors + colors - ['white', 'black', 'green', 'blue', 'red', 'red', 'blue', 'green', 'black', 'white'] - >>> rcolors * 2 - ['white', 'black', 'green', 'blue', 'red', 'white', 'black', 'green', 'blue', 'red'] - - -.. tip:: - - Sort:: - - >>> sorted(rcolors) # new object - ['black', 'blue', 'green', 'red', 'white'] - >>> rcolors - ['white', 'black', 'green', 'blue', 'red'] - >>> rcolors.sort() # in-place - >>> rcolors - ['black', 'blue', 'green', 'red', 'white'] - -.. topic:: **Methods and Object-Oriented Programming** - - The notation ``rcolors.method()`` (e.g. ``rcolors.append(3)`` and ``colors.pop()``) is our - first example of object-oriented programming (OOP). Being a ``list``, the - object `rcolors` owns the *method* `function` that is called using the notation - **.**. No further knowledge of OOP than understanding the notation **.** is - necessary for going through this tutorial. - - -.. topic:: **Discovering methods:** - - Reminder: in Ipython: tab-completion (press tab) - - .. ipython:: - - @verbatim - In [28]: rcolors. - append() count() insert() reverse() - clear() extend() pop() sort() - copy() index() remove() - -Strings -~~~~~~~ - -Different string syntaxes (simple, double or triple quotes):: - - s = 'Hello, how are you?' - s = "Hi, what's up" - s = '''Hello, - how are you''' # tripling the quotes allows the - # string to span more than one line - s = """Hi, - what's up?""" - -.. ipython:: - :okexcept: - - In [1]: 'Hi, what's up?' - -This syntax error can be avoided by enclosing the string in double quotes -instead of single quotes. Alternatively, one can prepend a backslash to the -second single quote. Other uses of the backslash are, e.g., the newline character -``\n`` and the tab character ``\t``. - -.. tip:: - - Strings are collections like lists. Hence they can be indexed and - sliced, using the same syntax and rules. - -Indexing:: - - >>> a = "hello" - >>> a[0] - 'h' - >>> a[1] - 'e' - >>> a[-1] - 'o' - -.. tip:: - - (Remember that negative indices correspond to counting from the right - end.) - -Slicing:: - - - >>> a = "hello, world!" - >>> a[3:6] # 3rd to 6th (excluded) elements: elements 3, 4, 5 - 'lo,' - >>> a[2:10:2] # Syntax: a[start:stop:step] - 'lo o' - >>> a[::3] # every three characters, from beginning to end - 'hl r!' - -.. tip:: - - Accents and special characters can also be handled as in Python 3 - strings consist of Unicode characters. - - -A string is an **immutable object** and it is not possible to modify its -contents. One may however create new strings from the original one. - -.. ipython:: - - In [53]: a = "hello, world!" - In [54]: a[2] = 'z' - --------------------------------------------------------------------------- - Traceback (most recent call last): - File "", line 1, in - TypeError: 'str' object does not support item assignment - - In [55]: a.replace('l', 'z', 1) - Out[55]: 'hezlo, world!' - In [56]: a.replace('l', 'z') - Out[56]: 'hezzo, worzd!' - -.. tip:: - - Strings have many useful methods, such as ``a.replace`` as seen - above. Remember the ``a.`` object-oriented notation and use tab - completion or ``help(str)`` to search for new methods. - -.. seealso:: - - Python offers advanced possibilities for manipulating strings, - looking for patterns or formatting. The interested reader is referred to - https://docs.python.org/3/library/stdtypes.html#string-methods and - https://docs.python.org/3/library/string.html#format-string-syntax - -String formatting:: - - >>> 'An integer: %i; a float: %f; another string: %s' % (1, 0.1, 'string') # with more values use tuple after % - 'An integer: 1; a float: 0.100000; another string: string' - - >>> i = 102 - >>> filename = 'processing_of_dataset_%d.txt' % i # no need for tuples with just one value after % - >>> filename - 'processing_of_dataset_102.txt' - -Dictionaries -~~~~~~~~~~~~~ - -.. tip:: - - A dictionary is basically an efficient table that **maps keys to - values**. - -:: - - >>> tel = {'emmanuelle': 5752, 'sebastian': 5578} - >>> tel['francis'] = 5915 - >>> tel - {'emmanuelle': 5752, 'sebastian': 5578, 'francis': 5915} - >>> tel['sebastian'] - 5578 - >>> tel.keys() - dict_keys(['emmanuelle', 'sebastian', 'francis']) - >>> tel.values() - dict_values([5752, 5578, 5915]) - >>> 'francis' in tel - True - -.. tip:: - - It can be used to conveniently store and retrieve values - associated with a name (a string for a date, a name, etc.). See - https://docs.python.org/3/tutorial/datastructures.html#dictionaries - for more information. - - A dictionary can have keys (resp. values) with different types:: - - >>> d = {'a':1, 'b':2, 3:'hello'} - >>> d - {'a': 1, 'b': 2, 3: 'hello'} - -More container types -~~~~~~~~~~~~~~~~~~~~ - -**Tuples** - -Tuples are basically immutable lists. The elements of a tuple are written -between parentheses, or just separated by commas:: - - >>> t = 12345, 54321, 'hello!' - >>> t[0] - 12345 - >>> t - (12345, 54321, 'hello!') - >>> u = (0, 2) - -**Sets:** unordered, unique items:: - - >>> s = set(('a', 'b', 'c', 'a')) - >>> s # doctest: +SKIP - {'a', 'b', 'c'} - >>> s.difference(('a', 'b')) - {'c'} - -Assignment operator -------------------- - -.. tip:: - - `Python library reference - `_ - says: - - Assignment statements are used to (re)bind names to values and to - modify attributes or items of mutable objects. - - In short, it works as follows (simple assignment): - - #. an expression on the right hand side is evaluated, the corresponding - object is created/obtained - #. a **name** on the left hand side is assigned, or bound, to the - r.h.s. object - -Things to note: - -* A single object can have several names bound to it: - -.. ipython:: - - In [1]: a = [1, 2, 3] - - In [2]: b = a - - In [3]: a - Out[3]: [1, 2, 3] - - In [4]: b - Out[4]: [1, 2, 3] - - In [5]: a is b - Out[5]: True - - In [6]: b[1] = 'hi!' - - In [7]: a - Out[7]: [1, 'hi!', 3] - -* to change a list *in place*, use indexing/slices: - -.. ipython:: - - In [1]: a = [1, 2, 3] - - In [3]: a - Out[3]: [1, 2, 3] - - In [4]: a = ['a', 'b', 'c'] # Creates another object. - - In [5]: a - Out[5]: ['a', 'b', 'c'] - - In [6]: id(a) - Out[6]: 138641676 - - In [7]: a[:] = [1, 2, 3] # Modifies object in place. - - In [8]: a - Out[8]: [1, 2, 3] - - In [9]: id(a) - Out[9]: 138641676 # Same as in Out[6], yours will differ... - -* the key concept here is **mutable vs. immutable** - - * mutable objects can be changed in place - * immutable objects cannot be modified once created - -.. seealso:: A very good and detailed explanation of the above issues can - be found in David M. Beazley's article `Types and Objects in Python - `_. diff --git a/intro/language/control_flow.md b/intro/language/control_flow.md new file mode 100644 index 000000000..646ea2a59 --- /dev/null +++ b/intro/language/control_flow.md @@ -0,0 +1,344 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Control Flow + +Controls the order in which the code is executed. + +## if/elif/else + +```{code-cell} +if 2**2 == 4: + print("Obvious!") +``` + +**Blocks are delimited by indentation** + +::: {note} +:class: dropdown + +Type the following lines in your Python interpreter, and be careful +to **respect the indentation depth**. The Jupyter / IPython shell automatically +increases the indentation depth after a colon `:` sign; to decrease the +indentation depth, go four spaces to the left with the Backspace key. Press the +Enter key twice to leave the logical block. +::: + +```{code-cell} +a = 10 +``` + +```{code-cell} +if a == 1: + print(1) +elif a == 2: + print(2) +else: + print("A lot") +``` + +Indentation is compulsory in scripts as well. As an exercise, re-type the +previous lines with the same indentation in a script `condition.py`, and +execute the script with `run condition.py` in IPython. + ++++ + +## for/range + +Iterating with an index: + +```{code-cell} +for i in range(4): + print(i) +``` + +But most often, it is more readable to iterate over values: + +```{code-cell} +for word in ('cool', 'powerful', 'readable'): + print('Python is %s' % word) +``` + +## while/break/continue + +Typical C-style while loop (Mandelbrot problem): + +```{code-cell} +z = 1 + 1j +while abs(z) < 100: + z = z**2 + 1 +z +``` + +**More advanced features** + +`break` out of enclosing for/while loop: + +```{code-cell} +z = 1 + 1j +``` + +```{code-cell} +while abs(z) < 100: + if z.imag == 0: + break + z = z**2 + 1 +``` + +`continue` the next iteration of a loop.: + +```{code-cell} +a = [1, 0, 2, 4] +for element in a: + if element == 0: + continue + print(1. / element) +``` + +## Conditional Expressions + ++++ + +### `if :` + +Evaluates to `False` for: + +- any number equal to zero (0, 0.0, 0+0j) +- an empty container (list, tuple, set, dictionary, …) +- `False`, `None` + +Evaluates to `True` for: + +- everything else + +Examples: + +```{code-cell} +a = 10 +if a: + print("Evaluated to `True`") +else: + print('Evaluated to `False') +``` + +```{code-cell} +a = [] +if a: + print("Evaluated to `True`") +else: + print('Evaluated to `False') +``` + +### `a == b:` + +Tests equality, with logics:: + +```{code-cell} +1 == 1. +``` + +### `a is b` + +Tests identity: both sides **are the same object**: + +```{code-cell} +a = 1 +b = 1. +a == b +``` + +```{code-cell} +a is b +``` + +```{code-cell} +a = 'A string' +b = a +a is b +``` + +### `a in b` + +For any collection `b`: `b` contains `a` : + +```{code-cell} +b = [1, 2, 3] +2 in b +``` + +```{code-cell} +5 in b +``` + +If `b` is a dictionary, this tests that `a` is a key of `b`. + +```{code-cell} +b = {'first': 0, 'second': 1} +# Tests for key. +'first' in b +``` + +```{code-cell} +# Does not test for value. +0 in b +``` + +## Advanced iteration + +**Iterate over any sequence**: + +You can iterate over any sequence (string, list, keys in a dictionary, lines in +a file, ...): + +```{code-cell} +vowels = 'aeiouy' +``` + +```{code-cell} +for i in 'powerful': + if i in vowels: + print(i) +``` + +```{code-cell} +message = "Hello how are you?" +message.split() # returns a list +``` + +```{code-cell} +for word in message.split(): + print(word) +``` + +::: {note} +:class: dropdown + +Few languages (in particular, languages for scientific computing) allow to +loop over anything but integers/indices. With Python it is possible to +loop exactly over the objects of interest without bothering with indices +you often don't care about. This feature can often be used to make +code more readable. +::: + +:::{warning} +It is not safe to modify the sequence you are iterating over. +::: + +### Keeping track of enumeration number + +Common task is to iterate over a sequence while keeping track of the +item number. + +We could use while loop with a counter as above. Or a for loop: + +```{code-cell} +words = ('cool', 'powerful', 'readable') +for i in range(0, len(words)): + print((i, words[i])) +``` + +But, Python provides a built-in function - `enumerate` - for this: + +```{code-cell} +for index, item in enumerate(words): + print((index, item)) +``` + +### Looping over a dictionary + +Use **items**: + +```{code-cell} +d = {'a': 1, 'b':1.2, 'c':1j} +``` + +```{code-cell} +for key, val in d.items(): + print('Key: %s has value: %s' % (key, val)) +``` + +## List Comprehensions + +Instead of creating a list by means of a loop, one can make use +of a list comprehension with a rather self-explaining syntax. + +```{code-cell} +[i**2 for i in range(4)] +``` + +::: {exercise-start} +:label: pi-wallis-ex +:class: dropdown +::: + +Compute the decimals of Pi using the Wallis formula: + +$$ +\pi = 2 \prod_{i=1}^{\infty} \frac{4i^2}{4i^2 - 1} +$$ + +::: {exercise-end} +::: + +::: {solution-start} pi-wallis-ex +:class: dropdown +::: + +```{code-cell} +from functools import reduce + +pi = 3.14159265358979312 + +my_pi = 1.0 + +for i in range(1, 100000): + my_pi *= 4 * i**2 / (4 * i**2 - 1.0) + +my_pi *= 2 + +print(pi) +print(my_pi) +print(abs(pi - my_pi)) +``` + +```{code-cell} +num = 1 +den = 1 +for i in range(1, 100000): + tmp = 4 * i * i + num *= tmp + den *= tmp - 1 + +better_pi = 2 * (num / den) + +print(pi) +print(better_pi) +print(abs(pi - better_pi)) +print(abs(my_pi - better_pi)) +``` + +Solution in a single line using more advanced constructs (reduce, lambda, +list comprehensions): + +```{code-cell} +print( + 2 + * reduce( + lambda x, y: x * y, + [float(4 * (i**2)) / ((4 * (i**2)) - 1) for i in range(1, 100000)], + ) +) +``` + +::: {solution-end} +::: diff --git a/intro/language/control_flow.rst b/intro/language/control_flow.rst deleted file mode 100644 index 0d073100b..000000000 --- a/intro/language/control_flow.rst +++ /dev/null @@ -1,257 +0,0 @@ -Control Flow -============ - -Controls the order in which the code is executed. - -if/elif/else ------------- - -.. code-block:: pycon - - >>> if 2**2 == 4: - ... print("Obvious!") - ... - Obvious! - - -**Blocks are delimited by indentation** - -.. tip:: - - Type the following lines in your Python interpreter, and be careful - to **respect the indentation depth**. The Ipython shell automatically - increases the indentation depth after a colon ``:`` sign; to - decrease the indentation depth, go four spaces to the left with the - Backspace key. Press the Enter key twice to leave the logical block. - -.. code-block:: pycon - - >>> a = 10 - - >>> if a == 1: - ... print(1) - ... elif a == 2: - ... print(2) - ... else: - ... print("A lot") - ... - A lot - -Indentation is compulsory in scripts as well. As an exercise, re-type the -previous lines with the same indentation in a script ``condition.py``, and -execute the script with ``run condition.py`` in Ipython. - -for/range ----------- - -Iterating with an index:: - - >>> for i in range(4): - ... print(i) - 0 - 1 - 2 - 3 - -But most often, it is more readable to iterate over values:: - - >>> for word in ('cool', 'powerful', 'readable'): - ... print('Python is %s' % word) - Python is cool - Python is powerful - Python is readable - - -while/break/continue ---------------------- - -Typical C-style while loop (Mandelbrot problem):: - - >>> z = 1 + 1j - >>> while abs(z) < 100: - ... z = z**2 + 1 - >>> z - (-134+352j) - -**More advanced features** - -``break`` out of enclosing for/while loop:: - - >>> z = 1 + 1j - - >>> while abs(z) < 100: - ... if z.imag == 0: - ... break - ... z = z**2 + 1 - - -``continue`` the next iteration of a loop.:: - - >>> a = [1, 0, 2, 4] - >>> for element in a: - ... if element == 0: - ... continue - ... print(1. / element) - 1.0 - 0.5 - 0.25 - - - -Conditional Expressions ------------------------ - -:``if ``: - - Evaluates to False: - * any number equal to zero (0, 0.0, 0+0j) - * an empty container (list, tuple, set, dictionary, ...) - * ``False``, ``None`` - - Evaluates to True: - * everything else - -:``a == b``: - - Tests equality, with logics:: - - >>> 1 == 1. - True - -:``a is b``: - - Tests identity: both sides are the same object:: - - >>> a = 1 - >>> b = 1. - >>> a == b - True - >>> a is b - False - - >>> a = 1 - >>> b = 1 - >>> a is b - True - -:``a in b``: - - For any collection ``b``: ``b`` contains ``a`` :: - - >>> b = [1, 2, 3] - >>> 2 in b - True - >>> 5 in b - False - - - If ``b`` is a dictionary, this tests that ``a`` is a key of ``b``. - -Advanced iteration -------------------------- - -Iterate over any *sequence* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can iterate over any sequence (string, list, keys in a dictionary, lines in -a file, ...):: - - >>> vowels = 'aeiouy' - - >>> for i in 'powerful': - ... if i in vowels: - ... print(i) - o - e - u - -:: - - >>> message = "Hello how are you?" - >>> message.split() # returns a list - ['Hello', 'how', 'are', 'you?'] - >>> for word in message.split(): - ... print(word) - ... - Hello - how - are - you? - -.. tip:: - - Few languages (in particular, languages for scientific computing) allow to - loop over anything but integers/indices. With Python it is possible to - loop exactly over the objects of interest without bothering with indices - you often don't care about. This feature can often be used to make - code more readable. - - -.. warning:: Not safe to modify the sequence you are iterating over. - -Keeping track of enumeration number -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Common task is to iterate over a sequence while keeping track of the -item number. - -* Could use while loop with a counter as above. Or a for loop:: - - >>> words = ('cool', 'powerful', 'readable') - >>> for i in range(0, len(words)): - ... print((i, words[i])) - (0, 'cool') - (1, 'powerful') - (2, 'readable') - -* But, Python provides a built-in function - ``enumerate`` - for this:: - - >>> for index, item in enumerate(words): - ... print((index, item)) - (0, 'cool') - (1, 'powerful') - (2, 'readable') - - - -Looping over a dictionary -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Use **items**:: - - >>> d = {'a': 1, 'b':1.2, 'c':1j} - - >>> for key, val in sorted(d.items()): - ... print('Key: %s has value: %s' % (key, val)) - Key: a has value: 1 - Key: b has value: 1.2 - Key: c has value: 1j - -.. note:: - - The ordering of a dictionary is random, thus we use :func:`sorted` - which will sort on the keys. - -List Comprehensions -------------------- - -Instead of creating a list by means of a loop, one can make use -of a list comprehension with a rather self-explaining syntax. - -:: - - >>> [i**2 for i in range(4)] - [0, 1, 4, 9] - -_____ - - -.. topic:: Exercise - :class: green - - Compute the decimals of Pi using the Wallis formula: - - .. math:: - \pi = 2 \prod_{i=1}^{\infty} \frac{4i^2}{4i^2 - 1} - -.. :ref:`pi_wallis` diff --git a/intro/scipy/solutions/data.txt b/intro/language/data.txt similarity index 100% rename from intro/scipy/solutions/data.txt rename to intro/language/data.txt diff --git a/intro/language/exceptions.md b/intro/language/exceptions.md new file mode 100644 index 000000000..ca76e48c0 --- /dev/null +++ b/intro/language/exceptions.md @@ -0,0 +1,174 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Exception handling in Python + +It is likely that you have raised Exceptions if you have +typed all the previous commands of the tutorial. For example, you may +have raised an exception if you entered a command with a typo. + +Exceptions are raised by different kinds of errors arising when executing +Python code. In your own code, you may also catch errors, or define custom +error types. You may want to look at the descriptions of the [the built-in +Exceptions](https://docs.python.org/3/library/exceptions.html) when looking +for the right exception type. + +## Exceptions + +Exceptions are raised by errors in Python: + +```{code-cell} +:tags: [raises-exception] + +1/0 +``` + +```{code-cell} +:tags: [raises-exception] + +1 + 'e' +``` + +```{code-cell} +:tags: [raises-exception] + +d = {1:1, 2:2} +d[3] +``` + +```{code-cell} +:tags: [raises-exception] + +l = [1, 2, 3] +l[4] +``` + +```{code-cell} +:tags: [raises-exception] + +l.foobar +``` + +As you can see, there are **different types** of exceptions for different errors. + +## Catching exceptions + +### try/except + +```ipython +In [1]: while True: + ....: try: + ....: x = int(input('Please enter a number: ')) + ....: break + ....: except ValueError: + ....: print('That was no valid number. Try again...') + ....: +Please enter a number: a +That was no valid number. Try again... +Please enter a number: 1 + +In [2]: x +Out[9]: 1 +``` + +### try/finally + +```ipython +In [1]: try: + ....: x = int(input('Please enter a number: ')) + ....: finally: + ....: print('Thank you for your input') + ....: +Please enter a number: a +Thank you for your input +--------------------------------------------------------------------------- +ValueError Traceback (most recent call last) +Cell In[10], line 2 + 1 try: +----> 2 x = int(input('Please enter a number: ')) + 3 finally: + 4 print('Thank you for your input') +ValueError: invalid literal for int() with base 10: 'a' +``` + +Important for resource management (e.g. closing a file) + +### Easier to ask for forgiveness than for permission + +```{code-cell} +def print_sorted(collection): + try: + collection.sort() + except AttributeError: + pass # The pass statement does nothing + print(collection) +``` + +```{code-cell} +print_sorted([1, 3, 2]) +``` + +```{code-cell} +print_sorted(set((1, 3, 2))) +``` + +```{code-cell} +print_sorted('132') +``` + +## Raising exceptions + +### Capturing and re-raising an exception: + +```{code-cell} +def filter_name(name): + try: + name = name.encode('ascii') + except UnicodeError as e: + if name == 'Gaël': + print('OK, Gaël') + else: + raise e + return name + +filter_name('Gaël') +``` + +```{code-cell} +:tags: [raises-exception] + +filter_name('Stéfan') +``` + +### Exceptions to pass messages between parts of the code: + +```{code-cell} +def achilles_arrow(x): + if abs(x - 1) < 1e-3: + raise StopIteration + x = 1 - (1-x)/2. + return x + +x = 0 + +while True: + try: + x = achilles_arrow(x) + except StopIteration: + break + +x +``` + +Use exceptions to notify certain conditions are met (e.g. `StopIteration`) or +not (e.g. custom error raising). diff --git a/intro/language/exceptions.rst b/intro/language/exceptions.rst deleted file mode 100644 index 3c333a79f..000000000 --- a/intro/language/exceptions.rst +++ /dev/null @@ -1,161 +0,0 @@ -Exception handling in Python -============================ - -It is likely that you have raised Exceptions if you have -typed all the previous commands of the tutorial. For example, you may -have raised an exception if you entered a command with a typo. - -Exceptions are raised by different kinds of errors arising when executing -Python code. In your own code, you may also catch errors, or define custom -error types. You may want to look at the descriptions of the `the built-in -Exceptions `_ when looking -for the right exception type. - -Exceptions ------------ - -Exceptions are raised by errors in Python: - -.. ipython:: - :okexcept: - - In [1]: 1/0 - - In [2]: 1 + 'e' - - In [3]: d = {1:1, 2:2} - - In [4]: d[3] - - In [5]: l = [1, 2, 3] - - In [6]: l[4] - - In [7]: l.foobar - -As you can see, there are **different types** of exceptions for different errors. - -Catching exceptions --------------------- - -try/except -~~~~~~~~~~~ - -.. ipython:: - :verbatim: - - In [10]: while True: - ....: try: - ....: x = int(input('Please enter a number: ')) - ....: break - ....: except ValueError: - ....: print('That was no valid number. Try again...') - ....: - Please enter a number: a - That was no valid number. Try again... - Please enter a number: 1 - - In [9]: x - Out[9]: 1 - -try/finally -~~~~~~~~~~~~ - -.. ipython:: - :verbatim: - - In [10]: try: - ....: x = int(input('Please enter a number: ')) - ....: finally: - ....: print('Thank you for your input') - ....: - Please enter a number: a - Thank you for your input - --------------------------------------------------------------------------- - ValueError Traceback (most recent call last) - Cell In[10], line 2 - 1 try: - ----> 2 x = int(input('Please enter a number: ')) - 3 finally: - 4 print('Thank you for your input') - ValueError: invalid literal for int() with base 10: 'a' - -Important for resource management (e.g. closing a file) - -Easier to ask for forgiveness than for permission -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - -.. ipython:: - - In [11]: def print_sorted(collection): - ....: try: - ....: collection.sort() - ....: except AttributeError: - ....: pass # The pass statement does nothing - ....: print(collection) - ....: - - In [12]: print_sorted([1, 3, 2]) - [1, 2, 3] - - In [13]: print_sorted(set((1, 3, 2))) - set([1, 2, 3]) - - In [14]: print_sorted('132') - 132 - - -Raising exceptions ------------------- - -* Capturing and reraising an exception: - - .. ipython:: - :okexcept: - - In [15]: def filter_name(name): - ....: try: - ....: name = name.encode('ascii') - ....: except UnicodeError as e: - ....: if name == 'Gaël': - ....: print('OK, Gaël') - ....: else: - ....: raise e - ....: return name - ....: - - In [16]: filter_name('Gaël') - OK, Gaël - Out[16]: 'Ga\xc3\xabl' - - In [17]: filter_name('Stéfan') - - -* Exceptions to pass messages between parts of the code: - - .. ipython:: - - In [17]: def achilles_arrow(x): - ....: if abs(x - 1) < 1e-3: - ....: raise StopIteration - ....: x = 1 - (1-x)/2. - ....: return x - ....: - - In [18]: x = 0 - - In [19]: while True: - ....: try: - ....: x = achilles_arrow(x) - ....: except StopIteration: - ....: break - ....: - ....: - - In [20]: x - Out[20]: 0.9990234375 - - -Use exceptions to notify certain conditions are met (e.g. -StopIteration) or not (e.g. custom error raising) diff --git a/intro/language/first_steps.md b/intro/language/first_steps.md new file mode 100644 index 000000000..78a9d5242 --- /dev/null +++ b/intro/language/first_steps.md @@ -0,0 +1,97 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# First steps + +Start the **Ipython** shell (an enhanced interactive Python shell): + +- by typing "ipython" from a Linux/Mac terminal, or from the Windows cmd shell, +- **or** by starting the program from a menu, e.g. the [Anaconda Navigator], + the [Python(x,y)] menu if you have installed one of these + scientific-Python suites. + +::: {note} +:class: dropdown + +If you don't have Ipython installed on your computer, other Python +shells are available, such as the plain Python shell started by +typing "python" in a terminal, or the Idle interpreter. However, we +advise to use the Ipython shell because of its enhanced features, +especially for interactive scientific computing. +::: + +Once you have started the interpreter, type + +```{code-cell} +print("Hello, world!") +``` + +::: {note} +:class: dropdown + +The message "Hello, world!" is then displayed. You just executed your +first Python instruction, congratulations! +::: + +To get yourself started, type the following stack of instructions + +```{code-cell} +a = 3 +b = 2*a +type(b) +``` + +```{code-cell} +print(b) +``` + +```{code-cell} +a*b +``` + +```{code-cell} +b = 'hello' +type(b) +``` + +```{code-cell} +b + b +``` + +```{code-cell} +2*b +``` + +::: {note} +:class: dropdown + +Two variables `a` and `b` have been defined above. Note that one does +not declare the type of a variable before assigning its value. In C, +conversely, one should write: + +```c +int a = 3; +``` + +In addition, the type of a variable may change, in the sense that at +one point in time it can be equal to a value of a certain type, and a +second point in time, it can be equal to a value of a different +type. `b` was first equal to an integer, but it became equal to a +string when it was assigned the value `'hello'`. Operations on +integers (`b=2*a`) are coded natively in Python, and so are some +operations on strings such as additions and multiplications, which +amount respectively to concatenation and repetition. +::: + +[anaconda navigator]: https://anaconda.org/anaconda/anaconda-navigator +[python(x,y)]: https://python-xy.github.io/ diff --git a/intro/language/first_steps.rst b/intro/language/first_steps.rst deleted file mode 100644 index 1ea1d3353..000000000 --- a/intro/language/first_steps.rst +++ /dev/null @@ -1,68 +0,0 @@ -First steps -------------- - - -Start the **Ipython** shell (an enhanced interactive Python shell): - -* by typing "ipython" from a Linux/Mac terminal, or from the Windows cmd shell, -* **or** by starting the program from a menu, e.g. the `Anaconda Navigator`_, - the `Python(x,y)`_ menu if you have installed one of these - scientific-Python suites. - -.. _`Python(x,y)`: https://python-xy.github.io/ -.. _`Anaconda Navigator`: https://anaconda.org/anaconda/anaconda-navigator - -.. tip:: - - If you don't have Ipython installed on your computer, other Python - shells are available, such as the plain Python shell started by - typing "python" in a terminal, or the Idle interpreter. However, we - advise to use the Ipython shell because of its enhanced features, - especially for interactive scientific computing. - -Once you have started the interpreter, type :: - - >>> print("Hello, world!") - Hello, world! - -.. tip:: - - The message "Hello, world!" is then displayed. You just executed your - first Python instruction, congratulations! - -To get yourself started, type the following stack of instructions :: - - >>> a = 3 - >>> b = 2*a - >>> type(b) - - >>> print(b) - 6 - >>> a*b - 18 - >>> b = 'hello' - >>> type(b) - - >>> b + b - 'hellohello' - >>> 2*b - 'hellohello' - -.. tip:: - - Two variables ``a`` and ``b`` have been defined above. Note that one does - not declare the type of a variable before assigning its value. In C, - conversely, one should write: - - .. sourcecode:: c - - int a = 3; - - In addition, the type of a variable may change, in the sense that at - one point in time it can be equal to a value of a certain type, and a - second point in time, it can be equal to a value of a different - type. `b` was first equal to an integer, but it became equal to a - string when it was assigned the value `'hello'`. Operations on - integers (``b=2*a``) are coded natively in Python, and so are some - operations on strings such as additions and multiplications, which - amount respectively to concatenation and repetition. diff --git a/intro/language/functions.md b/intro/language/functions.md new file mode 100644 index 000000000..33a6787f0 --- /dev/null +++ b/intro/language/functions.md @@ -0,0 +1,480 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Defining functions + +## Function definition + +```{code-cell} +def test(): + print('in test function') +``` + +```{code-cell} +test() +``` + +:::{warning} +Function blocks must be indented in the same way as other control-flow blocks. +::: + ++++ + +## Return statement + +Functions _always_ return values: + +```{code-cell} +def disk_area(radius): + return 3.14 * radius * radius +``` + +```{code-cell} +disk_area(1.5) +``` + +But - if you do not specify an explicit return value, functions return the +special Python value `None`. + +```{code-cell} +def another_func(a): + # Do nothing. + # Notice there is no "return" statement. + pass +``` + +```{code-cell} +result = another_func(10) +# Check whether result returned is None value. +result is None +``` + +:::{note} +Note the syntax to define a function: + +- the `def` keyword; +- is followed by the function's **name**, then +- the arguments of the function are given between parentheses followed + by a colon. +- the function body; +- and `return object` for optionally returning values. + ::: + ++++ + +## Parameters + +Mandatory parameters (positional arguments) + +```{code-cell} +def double_it(x): + return x * 2 +``` + +```{code-cell} +double_it(3) +``` + +```{code-cell} +:tags: [raises-exception] + +double_it() +``` + +Optional parameters (keyword or named arguments) + +```{code-cell} +def double_it(x=2): + return x * 2 +``` + +```{code-cell} +double_it() +``` + +```{code-cell} +double_it(3) +``` + +Keyword arguments allow you to specify _default values_. + +**Warning:** default values are evaluated when the function is defined, not +when it is called. This can be problematic when using mutable types (e.g. +dictionary or list) and modifying them in the function body, since the +modifications will be persistent across invocations of the function. + +Using an immutable type in a keyword argument: + +```{code-cell} +bigx = 10 +def double_it(x=bigx): + return x * 2 +``` + +```{code-cell} +bigx = 1e9 # Now really big +double_it() +``` + +Using an mutable type in a keyword argument (and modifying it inside the +function body): + +```{code-cell} +def add_to_dict(args={'a': 1, 'b': 2}): + for i in args.keys(): + args[i] += 1 + print(args) +``` + +```{code-cell} +add_to_dict +``` + +```{code-cell} +add_to_dict() +``` + +```{code-cell} +add_to_dict() +``` + +```{code-cell} +add_to_dict() +``` + +More involved example implementing python's slicing: + +```{code-cell} +def slicer(seq, start=None, stop=None, step=None): + """Implement basic python slicing.""" + return seq[start:stop:step] +``` + +```{code-cell} +rhyme = 'one fish, two fish, red fish, blue fish'.split() +rhyme +``` + +```{code-cell} +slicer(rhyme) +``` + +```{code-cell} +slicer(rhyme, step=2) +``` + +```{code-cell} +slicer(rhyme, 1, step=2) +``` + +```{code-cell} +slicer(rhyme, start=1, stop=4, step=2) +``` + +The order of the keyword arguments does not matter: + +```{code-cell} +slicer(rhyme, step=2, start=1, stop=4) +``` + +— but it is good practice to use the same ordering as the function's +definition. + +_Keyword arguments_ are a very convenient feature for defining functions with +a variable number of arguments, especially when default values are to be used +in most calls to the function. + ++++ + +## Passing by value + +::: {note} +:class: dropdown + +Can you modify the value of a variable inside a function? Most languages (C, +Java, ...) distinguish "passing by value" and "passing by reference". In +Python, such a distinction is somewhat artificial, and it is a bit subtle +whether your variables are going to be modified or not. Fortunately, there +exist clear rules. + +Parameters to functions are references to objects, which are passed by +value. When you pass a variable to a function, python passes the +reference to the object to which the variable refers (the **value**). +Not the variable itself. +::: + +If the **value** passed in a function is immutable, the function does not +modify the caller's variable. If the **value** is mutable, the function +may modify the caller's variable in-place: + +```{code-cell} +def try_to_modify(x, y, z): + x = 23 + y.append(42) + z = [99] # new reference + print(x) + print(y) + print(z) +``` + +```{code-cell} +a = 77 # immutable variable +b = [99] # mutable variable +c = [28] +try_to_modify(a, b, c) +``` + +```{code-cell} +print(a) +``` + +```{code-cell} +print(b) +``` + +```{code-cell} +print(c) +``` + +Functions have a local variable table called a _local namespace_. + +The variable `x` only exists within the function `try_to_modify`. + ++++ + +## Global variables + +Variables declared outside the function can be referenced within the function: + +```{code-cell} +x = 5 +def addx(y): + return x + y +``` + +```{code-cell} +addx(10) +``` + +But these "global" variables cannot be modified within the function, unless +declared **global** in the function. + +This doesn't work: + +```{code-cell} +def setx(y): + x = y + print('x is %d' % x) +``` + +```{code-cell} +setx(10) +``` + +```{code-cell} +x +``` + +This works: + +```{code-cell} +def setx(y): + global x + x = y + print('x is %d' % x) +``` + +```{code-cell} +setx(10) +``` + +```{code-cell} +x +``` + +## Variable number of parameters + +Special forms of parameters: + +- `*args`: any number of positional arguments packed into a tuple +- `**kwargs`: any number of keyword arguments packed into a dictionary + +```{code-cell} +def variable_args(*args, **kwargs): + print('args is', args) + print('kwargs is', kwargs) +``` + +```{code-cell} +variable_args('one', 'two', x=1, y=2, z=3) +``` + +## Docstrings + +Documentation about what the function does and its parameters. General +convention: + +```{code-cell} +def funcname(params): + """Concise one-line sentence describing the function. + + Extended summary which can contain multiple paragraphs. + """ + # function body + pass +``` + +```{code-cell} +# Also assessible in Jupyter / IPython with "funcname?" +help(funcname) +``` + +:::{note} +**Docstring guidelines** + +For the sake of standardization, the [Docstring +Conventions](https://peps.python.org/pep-0257) webpage documents the semantics +and conventions associated with Python docstrings. + +Also, the NumPy and SciPy modules have defined a precise standard for +documenting scientific functions, that you may want to follow for your own +functions, with a `Parameters` section, an `Examples` section, etc. See + +::: + ++++ + +## Functions are objects + +Functions are first-class objects, which means they can be: + +- assigned to a variable +- an item in a list (or any collection) +- passed as an argument to another function. + +```{code-cell} +va = variable_args +va('three', x=1, y=2) +``` + +## Methods + +Methods are functions attached to objects. You've seen these in our examples on +_lists_, _dictionaries_, _strings_, etc... + ++++ + +## Exercises + +::: {exercise-start} +:label: fibonacci-ex +:class: dropdown +::: + +Write a function that displays the `n` first terms of the Fibonacci +sequence, defined by: + +$$ +\begin{align} +U_{0} &= 0 \\ +U_{1} &= 1 \\ +U_{n+2} &= U_{n+1} + U_{n} +\end{align} +$$ + +::: {exercise-end} +::: + +::: {solution-start} fibonacci-ex +:class: dropdown +::: + +```{code-cell} +def fib(n): + """Display the n first terms of Fibonacci sequence""" + a, b = 0, 1 + i = 0 + while i < n: + print(b) + a, b = b, a+b + i +=1 +``` + +```{code-cell} +fib(10) +``` + +::: {solution-end} +::: + +::: {exercise-start} +:label: quicksort-ex +:class: dropdown +::: + +Implement the [Quicksort algorithm, as defined by +Wikipedia](https://en.wikipedia.org/wiki/Quicksort) + +``` +function quicksort(array) + var list less, greater + if length(array) < 2 + return array + select and remove a pivot value pivot from array + for each x in array + if x < pivot + 1 then append x to less + else append x to greater + return concatenate(quicksort(less), pivot, quicksort(greater)) +``` + +::: {exercise-end} +::: + +::: {solution-start} quicksort-ex +:class: dropdown +::: + +```{code-cell} +def qsort(lst): + """Quick sort: returns a sorted copy of the list.""" + if len(lst) <= 1: + return lst + pivot, rest = lst[0], lst[1:] + + # Could use list comprehension: + # less_than = [ lt for lt in rest if lt < pivot ] + + less_than = [] + for lt in rest: + if lt < pivot: + less_than.append(lt) + + # Could use list comprehension: + # greater_equal = [ ge for ge in rest if ge >= pivot ] + + greater_equal = [] + for ge in rest: + if ge >= pivot: + greater_equal.append(ge) + return qsort(less_than) + [pivot] + qsort(greater_equal) +``` + +```{code-cell} +# And now check that qsort does sort: +assert qsort(range(10)) == list(range(10)) +assert qsort(range(10)[::-1]) == list(range(10)) +assert qsort([1, 4, 2, 5, 3]) == sorted([1, 4, 2, 5, 3]) +``` + +::: {solution-end} +::: diff --git a/intro/language/functions.rst b/intro/language/functions.rst deleted file mode 100644 index 7894204a4..000000000 --- a/intro/language/functions.rst +++ /dev/null @@ -1,392 +0,0 @@ -Defining functions -===================== - -Function definition -------------------- - -.. ipython:: - - In [56]: def test(): - ....: print('in test function') - ....: - ....: - - In [57]: test() - in test function - -.. Warning:: - - Function blocks must be indented as other control-flow blocks. - -Return statement ----------------- - -Functions can *optionally* return values. - -.. ipython:: - - In [6]: def disk_area(radius): - ...: return 3.14 * radius * radius - ...: - - In [8]: disk_area(1.5) - Out[8]: 7.0649999999999995 - -.. Note:: By default, functions return ``None``. - -.. Note:: Note the syntax to define a function: - - * the ``def`` keyword; - - * is followed by the function's **name**, then - - * the arguments of the function are given between parentheses followed - by a colon. - - * the function body; - - * and ``return object`` for optionally returning values. - - -Parameters ----------- - -Mandatory parameters (positional arguments) - -.. ipython:: - :okexcept: - - In [81]: def double_it(x): - ....: return x * 2 - ....: - - In [82]: double_it(3) - Out[82]: 6 - - In [83]: double_it() - -Optional parameters (keyword or named arguments) - -.. ipython:: - - In [84]: def double_it(x=2): - ....: return x * 2 - ....: - - In [85]: double_it() - Out[85]: 4 - - In [86]: double_it(3) - Out[86]: 6 - -Keyword arguments allow you to specify *default values*. - -.. warning:: - - Default values are evaluated when the function is defined, not when - it is called. This can be problematic when using mutable types (e.g. - dictionary or list) and modifying them in the function body, since the - modifications will be persistent across invocations of the function. - - Using an immutable type in a keyword argument: - - .. ipython:: - - In [124]: bigx = 10 - - In [125]: def double_it(x=bigx): - .....: return x * 2 - .....: - - In [126]: bigx = 1e9 # Now really big - - In [128]: double_it() - Out[128]: 20 - - Using an mutable type in a keyword argument (and modifying it inside the - function body): - - .. ipython:: - - In [2]: def add_to_dict(args={'a': 1, 'b': 2}): - ...: for i in args.keys(): - ...: args[i] += 1 - ...: print(args) - ...: - - In [3]: add_to_dict - Out[3]: - - In [4]: add_to_dict() - {'a': 2, 'b': 3} - - In [5]: add_to_dict() - {'a': 3, 'b': 4} - - In [6]: add_to_dict() - {'a': 4, 'b': 5} - -.. tip:: - - More involved example implementing python's slicing: - - .. ipython:: - - In [98]: def slicer(seq, start=None, stop=None, step=None): - ....: """Implement basic python slicing.""" - ....: return seq[start:stop:step] - ....: - - In [101]: rhyme = 'one fish, two fish, red fish, blue fish'.split() - - In [102]: rhyme - Out[102]: ['one', 'fish,', 'two', 'fish,', 'red', 'fish,', 'blue', 'fish'] - - In [103]: slicer(rhyme) - Out[103]: ['one', 'fish,', 'two', 'fish,', 'red', 'fish,', 'blue', 'fish'] - - In [104]: slicer(rhyme, step=2) - Out[104]: ['one', 'two', 'red', 'blue'] - - In [105]: slicer(rhyme, 1, step=2) - Out[105]: ['fish,', 'fish,', 'fish,', 'fish'] - - In [106]: slicer(rhyme, start=1, stop=4, step=2) - Out[106]: ['fish,', 'fish,'] - - The order of the keyword arguments does not matter: - - .. ipython:: - - In [107]: slicer(rhyme, step=2, start=1, stop=4) - Out[107]: ['fish,', 'fish,'] - - but it is good practice to use the same ordering as the function's - definition. - -*Keyword arguments* are a very convenient feature for defining functions -with a variable number of arguments, especially when default values are -to be used in most calls to the function. - -Passing by value ----------------- - -.. tip:: - - Can you modify the value of a variable inside a function? Most languages - (C, Java, ...) distinguish "passing by value" and "passing by reference". - In Python, such a distinction is somewhat artificial, and it is a bit - subtle whether your variables are going to be modified or not. - Fortunately, there exist clear rules. - - Parameters to functions are references to objects, which are passed by - value. When you pass a variable to a function, python passes the - reference to the object to which the variable refers (the **value**). - Not the variable itself. - -If the **value** passed in a function is immutable, the function does not -modify the caller's variable. If the **value** is mutable, the function -may modify the caller's variable in-place:: - - >>> def try_to_modify(x, y, z): - ... x = 23 - ... y.append(42) - ... z = [99] # new reference - ... print(x) - ... print(y) - ... print(z) - ... - >>> a = 77 # immutable variable - >>> b = [99] # mutable variable - >>> c = [28] - >>> try_to_modify(a, b, c) - 23 - [99, 42] - [99] - >>> print(a) - 77 - >>> print(b) - [99, 42] - >>> print(c) - [28] - - - -Functions have a local variable table called a *local namespace*. - -The variable ``x`` only exists within the function ``try_to_modify``. - - -Global variables ----------------- - -Variables declared outside the function can be referenced within the -function: - -.. ipython:: - - In [114]: x = 5 - - In [115]: def addx(y): - .....: return x + y - .....: - - In [116]: addx(10) - Out[116]: 15 - -But these "global" variables cannot be modified within the function, -unless declared **global** in the function. - -This doesn't work: - -.. ipython:: - - In [117]: def setx(y): - .....: x = y - .....: print('x is %d' % x) - .....: - .....: - - In [118]: setx(10) - x is 10 - - In [120]: x - Out[120]: 5 - -This works: - -.. ipython:: - - In [121]: def setx(y): - .....: global x - .....: x = y - .....: print('x is %d' % x) - .....: - .....: - - In [122]: setx(10) - x is 10 - - In [123]: x - Out[123]: 10 - - -Variable number of parameters ------------------------------ -Special forms of parameters: - * ``*args``: any number of positional arguments packed into a tuple - * ``**kwargs``: any number of keyword arguments packed into a dictionary - -.. ipython:: - - In [35]: def variable_args(*args, **kwargs): - ....: print('args is', args) - ....: print('kwargs is', kwargs) - ....: - - In [36]: variable_args('one', 'two', x=1, y=2, z=3) - args is ('one', 'two') - kwargs is {'x': 1, 'y': 2, 'z': 3} - - -Docstrings ----------- - -Documentation about what the function does and its parameters. General -convention: - -.. ipython:: - - In [67]: def funcname(params): - ....: """Concise one-line sentence describing the function. - ....: - ....: Extended summary which can contain multiple paragraphs. - ....: """ - ....: # function body - ....: pass - ....: - - @verbatim - In [68]: funcname? - Signature: funcname(params) - Docstring: - Concise one-line sentence describing the function. - Extended summary which can contain multiple paragraphs. - File: ~/src/scientific-python-lectures/ - Type: function - -.. Note:: **Docstring guidelines** - - - For the sake of standardization, the `Docstring - Conventions `_ webpage - documents the semantics and conventions associated with Python - docstrings. - - Also, the NumPy and SciPy modules have defined a precise standard - for documenting scientific functions, that you may want to follow for - your own functions, with a ``Parameters`` section, an ``Examples`` - section, etc. See - https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard - -Functions are objects ---------------------- -Functions are first-class objects, which means they can be: - * assigned to a variable - * an item in a list (or any collection) - * passed as an argument to another function. - -.. ipython:: - - In [38]: va = variable_args - - In [39]: va('three', x=1, y=2) - args is ('three',) - kwargs is {'x': 1, 'y': 2} - - -Methods -------- - -Methods are functions attached to objects. You've seen these in our -examples on *lists*, *dictionaries*, *strings*, etc... - - -Exercises ---------- - -.. topic:: Exercise: Fibonacci sequence - :class: green - - Write a function that displays the ``n`` first terms of the Fibonacci - sequence, defined by: - - .. math:: - \left\{ - \begin{array}{ll} - U_{0} = 0 \\ - U_{1} = 1 \\ - U_{n+2} = U_{n+1} + U_{n} - \end{array} - \right. - -.. :ref:`fibonacci` - -.. topic:: Exercise: Quicksort - :class: green - - Implement the quicksort algorithm, as defined by wikipedia - -.. parsed-literal:: - - function quicksort(array) - var list less, greater - if length(array) < 2 - return array - select and remove a pivot value pivot from array - for each x in array - if x < pivot + 1 then append x to less - else append x to greater - return concatenate(quicksort(less), pivot, quicksort(greater)) - -.. :ref:`quick_sort` diff --git a/intro/language/io.md b/intro/language/io.md new file mode 100644 index 000000000..2b72e2502 --- /dev/null +++ b/intro/language/io.md @@ -0,0 +1,77 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Input and Output + +To be exhaustive, here are some information about input and output in +Python. Since we will use the NumPy methods to read and write files, +**you may skip this chapter at first reading**. + +We write or read **strings** to/from files (other types must be converted to +strings). To write in a file: + +```{code-cell} +f = open('workfile', 'w') # opens the workfile file +type(f) +``` + +```{code-cell} +f.write('This is a test \nand another test') +f.close() +``` + +To read from a file + +```{code-cell} +f = open('workfile', 'r') +s = f.read() +print(s) +``` + +```{code-cell} +f.close() +``` + +:::{admonition} See also + +For more details: +::: + +## Iterating over a file + +```{code-cell} +f = open('workfile', 'r') + +for line in f: + print(line) +``` + +```{code-cell} +f.close() +``` + +### File modes + +- Read-only: `r` + +- Write-only: `w` + + - Note: Create a new file or _overwrite_ existing file. + +- Append a file: `a` + +- Read and Write: `r+` + +- Binary mode: `b` + + - Note: Use for binary files, especially on Windows. diff --git a/intro/language/io.rst b/intro/language/io.rst deleted file mode 100644 index 17e08cbed..000000000 --- a/intro/language/io.rst +++ /dev/null @@ -1,65 +0,0 @@ -Input and Output -================ - -To be exhaustive, here are some information about input and output in -Python. Since we will use the NumPy methods to read and write files, -**you may skip this chapter at first reading**. - -We write or read **strings** to/from files (other types must be converted to -strings). To write in a file:: - - >>> f = open('workfile', 'w') # opens the workfile file - >>> type(f) - - >>> f.write('This is a test \nand another test') # doctest: +SKIP - >>> f.close() - -To read from a file - -.. ipython:: - :verbatim: - - In [1]: f = open('workfile', 'r') - - In [2]: s = f.read() - - In [3]: print(s) - This is a test - and another test - - In [4]: f.close() - - -.. seealso:: - - For more details: https://docs.python.org/3/tutorial/inputoutput.html - -Iterating over a file -~~~~~~~~~~~~~~~~~~~~~ - -.. ipython:: - :verbatim: - - In [6]: f = open('workfile', 'r') - - In [7]: for line in f: - ...: print(line) - ...: - This is a test - and another test - - In [8]: f.close() - -File modes ----------- - -* Read-only: ``r`` -* Write-only: ``w`` - - * Note: Create a new file or *overwrite* existing file. - -* Append a file: ``a`` -* Read and Write: ``r+`` -* Binary mode: ``b`` - - * Note: Use for binary files, especially on Windows. diff --git a/intro/language/my_file.py b/intro/language/my_file.py new file mode 100644 index 000000000..0a594bc97 --- /dev/null +++ b/intro/language/my_file.py @@ -0,0 +1,4 @@ +# Contents of my_file.py +import sys + +print(sys.argv) diff --git a/intro/language/oop.md b/intro/language/oop.md new file mode 100644 index 000000000..43eee016e --- /dev/null +++ b/intro/language/oop.md @@ -0,0 +1,76 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Object-oriented programming (OOP) + +Python supports object-oriented programming (OOP). The goals of OOP are: + +- to organize the code, and +- to reuse code in similar contexts. + +Here is a small example: we create a Student _class_, which is an object +gathering several custom functions (_methods_) and variables (_attributes_), +we will be able to use: + +```{code-cell} +class Student(object): + def __init__(self, name): + self.name = name + def set_age(self, age): + self.age = age + def set_major(self, major): + self.major = major +``` + +```{code-cell} +anna = Student('anna') +anna.set_age(21) +anna.set_major('physics') +``` + +In the previous example, the Student class has `__init__`, `set_age` and +`set_major` methods. Its attributes are `name`, `age` and `major`. We +can call these methods and attributes with the following notation: +`classinstance.method` or `classinstance.attribute`. The `__init__` +constructor is a special method we call with: `MyClass(init parameters if +any)`. + +Now, suppose we want to create a new class MasterStudent with the same +methods and attributes as the previous one, but with an additional +`internship` attribute. We won't copy the previous class, but +**inherit** from it: + +```{code-cell} +class MasterStudent(Student): + internship = 'mandatory, from March to June' +``` + +```{code-cell} +james = MasterStudent('james') +james.internship +``` + +```{code-cell} +james.set_age(23) +james.age +``` + +The MasterStudent class inherited from the Student attributes and methods. + +Thanks to classes and object-oriented programming, we can organize code +with different classes corresponding to different objects we encounter +(an Experiment class, an Image class, a Flow class, etc.), with their own +methods and attributes. Then we can use inheritance to consider +variations around a base class and **reuse** code. Ex : from a Flow +base class, we can create derived StokesFlow, TurbulentFlow, +PotentialFlow, etc. diff --git a/intro/language/oop.rst b/intro/language/oop.rst deleted file mode 100644 index 24274b63d..000000000 --- a/intro/language/oop.rst +++ /dev/null @@ -1,57 +0,0 @@ -Object-oriented programming (OOP) -================================= - -Python supports object-oriented programming (OOP). The goals of OOP are: - - * to organize the code, and - - * to reuse code in similar contexts. - - -Here is a small example: we create a Student *class*, which is an object -gathering several custom functions (*methods*) and variables (*attributes*), -we will be able to use:: - - >>> class Student(object): - ... def __init__(self, name): - ... self.name = name - ... def set_age(self, age): - ... self.age = age - ... def set_major(self, major): - ... self.major = major - ... - >>> anna = Student('anna') - >>> anna.set_age(21) - >>> anna.set_major('physics') - -In the previous example, the Student class has ``__init__``, ``set_age`` and -``set_major`` methods. Its attributes are ``name``, ``age`` and ``major``. We -can call these methods and attributes with the following notation: -``classinstance.method`` or ``classinstance.attribute``. The ``__init__`` -constructor is a special method we call with: ``MyClass(init parameters if -any)``. - -Now, suppose we want to create a new class MasterStudent with the same -methods and attributes as the previous one, but with an additional -``internship`` attribute. We won't copy the previous class, but -**inherit** from it:: - - >>> class MasterStudent(Student): - ... internship = 'mandatory, from March to June' - ... - >>> james = MasterStudent('james') - >>> james.internship - 'mandatory, from March to June' - >>> james.set_age(23) - >>> james.age - 23 - -The MasterStudent class inherited from the Student attributes and methods. - -Thanks to classes and object-oriented programming, we can organize code -with different classes corresponding to different objects we encounter -(an Experiment class, an Image class, a Flow class, etc.), with their own -methods and attributes. Then we can use inheritance to consider -variations around a base class and **reuse** code. Ex : from a Flow -base class, we can create derived StokesFlow, TurbulentFlow, -PotentialFlow, etc. diff --git a/intro/language/python_language.md b/intro/language/python_language.md new file mode 100644 index 000000000..d55de09c7 --- /dev/null +++ b/intro/language/python_language.md @@ -0,0 +1,46 @@ +(python-language-chapter)= + +# The Python language + +**Authors**: _Chris Burns, Christophe Combelles, Emmanuelle Gouillart, +Gaël Varoquaux_ + +:::{topic} Python for scientific computing +We introduce here the Python language. Only the bare minimum +necessary for getting started with NumPy and SciPy is addressed here. +To learn more about the language, consider going through the +excellent tutorial . Dedicated books +are also available, such as [Dive into Python 3](https://diveintopython3.net/). +::: + +![](python-logo.png) + +:::{tip} +Python is a **programming language**, as are C, Fortran, BASIC, PHP, +etc. Some specific features of Python are as follows: + +- an _interpreted_ (as opposed to _compiled_) language. Contrary to e.g. + C or Fortran, one does not compile Python code before executing it. In + addition, Python can be used **interactively**: many Python + interpreters are available, from which commands and scripts can be + executed. +- a free software released under an **open-source** license: Python can + be used and distributed free of charge, even for building commercial + software. +- **multi-platform**: Python is available for all major operating + systems, Windows, Linux/Unix, MacOS X, most likely your mobile phone + OS, etc. +- a very readable language with clear non-verbose syntax +- a language for which a large variety of high-quality packages are + available for various applications, from web frameworks to scientific + computing. +- a language very easy to interface with other languages, in particular C + and C++. +- Some other features of the language are illustrated just below. For + example, Python is an object-oriented language, with dynamic typing + (the same variable can contain objects of different types during the + course of a program). + +See for more information about +distinguishing features of Python. +::: diff --git a/intro/language/python_language.rst b/intro/language/python_language.rst deleted file mode 100644 index f80173377..000000000 --- a/intro/language/python_language.rst +++ /dev/null @@ -1,71 +0,0 @@ -.. _python_language_chapter: - -The Python language -===================================== - -**Authors**: *Chris Burns, Christophe Combelles, Emmanuelle Gouillart, -Gaël Varoquaux* - -.. topic:: Python for scientific computing - - We introduce here the Python language. Only the bare minimum - necessary for getting started with NumPy and SciPy is addressed here. - To learn more about the language, consider going through the - excellent tutorial https://docs.python.org/3/tutorial. Dedicated books - are also available, such as `Dive into Python 3 `__. - - -.. image:: python-logo.png - :align: right - -.. tip:: - - Python is a **programming language**, as are C, Fortran, BASIC, PHP, - etc. Some specific features of Python are as follows: - - * an *interpreted* (as opposed to *compiled*) language. Contrary to e.g. - C or Fortran, one does not compile Python code before executing it. In - addition, Python can be used **interactively**: many Python - interpreters are available, from which commands and scripts can be - executed. - - * a free software released under an **open-source** license: Python can - be used and distributed free of charge, even for building commercial - software. - - * **multi-platform**: Python is available for all major operating - systems, Windows, Linux/Unix, MacOS X, most likely your mobile phone - OS, etc. - - * a very readable language with clear non-verbose syntax - - * a language for which a large variety of high-quality packages are - available for various applications, from web frameworks to scientific - computing. - - * a language very easy to interface with other languages, in particular C - and C++. - - * Some other features of the language are illustrated just below. For - example, Python is an object-oriented language, with dynamic typing - (the same variable can contain objects of different types during the - course of a program). - - - See https://www.python.org/about/ for more information about - distinguishing features of Python. - -_____ - -.. toctree:: - :maxdepth: 2 - - first_steps.rst - basic_types.rst - control_flow.rst - functions.rst - reusing_code.rst - io.rst - standard_library.rst - exceptions.rst - oop.rst diff --git a/intro/language/reusing_code.md b/intro/language/reusing_code.md new file mode 100644 index 000000000..78bb38132 --- /dev/null +++ b/intro/language/reusing_code.md @@ -0,0 +1,500 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Reusing code: scripts and modules + +For now, we have typed all instructions in the interpreter. For longer +sets of instructions we need to change track and write the code in text +files (using a text editor), that we will call either _scripts_ or +_modules_. Use your favorite text editor (provided it offers syntax +highlighting for Python), or the editor that comes with the Scientific +Python Suite you may be using. + +## Scripts + +::: {note} +:class: dropdown + +Let us first write a _script_, that is a file with a sequence of +instructions that are executed each time the script is called. +Instructions may be e.g. copied-and-pasted from the interpreter (but +take care to respect indentation rules!). +::: + +The extension for Python files is `.py`. Write or copy-and-paste the +following lines in a file called `test.py` + +::: {literalinclude} test.py +:language: python +::: + +::: {note} +:class: dropdown + +Let us now execute the script interactively, that is inside the Jupyter or +IPython interpreter. This is maybe the most common use of scripts in +scientific computing. +::: + +In Jupyter or IPython, the syntax to execute a script is `%run script.py`. For +example: + +```{code-cell} +%run test.py +``` + +```{code-cell} +message +``` + +The script has been executed. Moreover the variables defined in the +script (such as `message`) are now available inside the interpreter's +namespace. + +::: {note} +:class: dropdown + +Other interpreters also offer the possibility to execute scripts +(e.g., `execfile` in the plain Python interpreter, etc.). +::: + +It is also possible In order to execute this script as a _standalone +program_, by executing the script inside a shell terminal (Linux/Mac +console or cmd Windows console). For example, if we are in the same +directory as the test.py file, we can execute this in a console: + +```bash +$ python test.py +Hello +how +are +you? +``` + +:::: {tip} +Standalone scripts may also take command-line arguments + +::: {literalinclude} my_file.py +:language: python +::: + +```bash +$ python my_file.py test arguments +['file.py', 'test', 'arguments'] +``` + +:::: + +::: {warning} + +Don't implement option parsing like this yourself. Use a dedicated module such +as {mod}`argparse`. + +::: + +## Importing objects from modules + +```{code-cell} +import os +os +``` + +```{code-cell} +os.listdir('.') +``` + +And also: + +```{code-cell} +from os import listdir +``` + +Importing shorthands: + +```{code-cell} +import numpy as np +``` + +:::{warning} + +The following code is an example of what is called the _star import_ and +please, **Do not use it** + +```{code-cell} +from os import * +``` + +- Makes the code harder to read and understand: where do symbols come + from? +- Makes it impossible to guess the functionality by the context and + the name (hint: `os.name` is the name of the OS), and to profit + usefully from tab completion. +- Restricts the variable names you can use: `os.name` might override + `name`, or vise-versa. +- Creates possible name clashes between modules. +- Makes the code impossible to statically check for undefined + symbols. + +::: + +Modules are a good way to organize code in a hierarchical way. Actually, +all the scientific computing tools we are going to use are modules: + +```{code-cell} +import numpy as np # Module for data arrays +import scipy as sp # Module for scientific computing + +# Use Numpy +np.linspace(0, 10, 6) +``` + +## Creating modules + +::: {note} +:class: dropdown + +If we want to write larger and better organized programs (compared to +simple scripts), where some objects are defined, (variables, +functions, classes) and that we want to reuse several times, we have +to create our own _modules_. +::: + +Let us create a module `demo` contained in the file `demo.py`: + +::: {literalinclude} demo.py +:language: python +::: + +::: {note} +:class: dropdown + +In this file, we defined two functions `print_a` and `print_b`. Suppose +we want to call the `print_a` function from the interpreter. We could +execute the file as a script, but since we just want to have access to +the function `print_a`, we are rather going to **import it as a module**. +The syntax is as follows. +::: + +```{code-cell} +import demo + +demo.print_a() +``` + +```{code-cell} +demo.print_b() +``` + +Importing the module gives access to its objects, using the +`module.object` syntax. Don't forget to put the module's name before the +object's name, otherwise Python won't recognize the instruction. + +## Introspection + +```{code-cell} +help(demo) +``` + +You can get the same output (in Jupyter / IPython) from: + +```ipython +demo? +``` + +An example session: + +```ipython +In [4]: demo? +Type: module +Base Class: +String Form: +Namespace: Interactive +File: /home/varoquau/Projects/Python_talks/scipy_2009_tutorial/source/demo.py +Docstring: + A demo module. + + +In [5]: who +demo + +In [6]: whos +Variable Type Data/Info +------------------------------ +demo module + +In [7]: dir(demo) +Out[7]: +['__builtins__', +'__doc__', +'__file__', +'__name__', +'__package__', +'c', +'d', +'print_a', +'print_b'] + +In [8]: demo. +demo.c demo.print_a demo.py +demo.d demo.print_b demo.pyc +``` + +Importing objects from modules into the main namespace + +```ipython +In [9]: from demo import print_a, print_b + +In [10]: whos +Variable Type Data/Info +-------------------------------- +demo module +print_a function +print_b function + +In [11]: print_a() +a +``` + +:::{warning} + +**Module caching** + +Modules are cached: if you modify `demo.py` and re-import it in the +old session, you will get the old one. + +**Solution** + +```ipython +In [10]: importlib.reload(demo) +``` + +::: + +## '\_\_main\_\_' and module loading + +::: {note} +:class: dropdown + +Sometimes we want code to be executed when a module is +run directly, but not when it is imported by another module. +`if __name__ == '__main__'` allows us to check whether the +module is being run directly. +::: + +File `demo2.py`: + +::: {literalinclude} demo2.py +::: + +Importing it: + +```{code-cell} +import demo2 +``` + +Importing it again in the same session: + +```{code-cell} +import demo2 +``` + +Running it: + +```{code-cell} +%run demo2 +``` + +## Scripts or modules? How to organize your code + +:::{note} +Rule of thumb + +- Sets of instructions that are called several times should be + written inside **functions** for better code reusability. +- Functions (or other bits of code) that are called from several + scripts should be written inside a **module**, so that only the + module is imported in the different scripts (do not copy-and-paste + your functions in the different scripts!). + ::: + +### How modules are found and imported + +When the `import mymodule` statement is executed, the module `mymodule` +is searched in a given list of directories. This list includes a list +of installation-dependent default path (e.g., `/usr/lib64/python3.11`) as +well as the list of directories specified by the environment variable +`PYTHONPATH`. + +The list of directories searched by Python is given by the `sys.path` +variable + +```{code-cell} +import sys +sys.path +``` + +Modules must be located in the search path, therefore you can: + +- write your own modules within directories already defined in the + search path (e.g. `$HOME/.venv/lectures/lib64/python3.11/site-packages`). + You may use symbolic links (on Linux) to keep the code somewhere else. + +- modify the environment variable `PYTHONPATH` to include the + directories containing the user-defined modules. + + :::{tip} + On Linux/Unix, add the following line to a file read by the shell at + startup (e.g. /etc/profile, .profile) + + ```bash + export PYTHONPATH=$PYTHONPATH:/home/emma/user_defined_modules + ``` + + On Windows, explains how to + handle environment variables. + ::: + +- or modify the `sys.path` variable itself within a Python script. + + :::{tip} + + ```python + import sys + new_path = '/home/emma/user_defined_modules' + if new_path not in sys.path: + sys.path.append(new_path) + ``` + + This method is not very robust, however, because it makes the code + less portable (user-dependent path) and because you have to add the + directory to your sys.path each time you want to import from a module + in this directory. + ::: + +:::{admonition} See also + +See for more information +about modules. +::: + +## Packages + +A directory that contains many modules is called a _package_. A package +is a module with submodules (which can have submodules themselves, etc.). +A special file called `__init__.py` (which may be empty) tells Python +that the directory is a Python package, from which modules can be +imported. + +```bash +$ ls +_build_utils/ fft/ _lib/ odr/ spatial/ +cluster/ fftpack/ linalg/ optimize/ special/ +conftest.py __init__.py linalg.pxd optimize.pxd special.pxd +constants/ integrate/ meson.build setup.py stats/ +datasets/ interpolate/ misc/ signal/ +_distributor_init.py io/ ndimage/ sparse/ +$ cd ndimage +$ ls +_filters.py __init__.py _measurements.py morphology.py src/ +filters.py _interpolation.py measurements.py _ni_docstrings.py tests/ +_fourier.py interpolation.py meson.build _ni_support.py utils/ +fourier.py LICENSE.txt _morphology.py setup.py +``` + +From Jupyter / IPython: + +```{code-cell} +import scipy as sp + +sp.__file__ +``` + +```{code-cell} +sp.version.version +``` + +```{code-cell} +# Also available as sp.ndimage.binary_dilation? +help(sp.ndimage.binary_dilation) +``` + +## Good practices + +- Use **meaningful** object **names** + +- **Indentation: no choice!** + + :::{tip} + Indenting is compulsory in Python! Every command block following a + colon bears an additional indentation level with respect to the + previous line with a colon. One must therefore indent after + `def f():` or `while:`. At the end of such logical blocks, one + decreases the indentation depth (and re-increases it if a new block + is entered, etc.) + + Strict respect of indentation is the price to pay for getting rid of + `{` or `;` characters that delineate logical blocks in other + languages. Improper indentation leads to errors such as + + ```ipython + ------------------------------------------------------------ + IndentationError: unexpected indent (test.py, line 2) + ``` + + All this indentation business can be a bit confusing in the + beginning. However, with the clear indentation, and in the absence of + extra characters, the resulting code is very nice to read compared to + other languages. + ::: + +- **Indentation depth**: Inside your text editor, you may choose to + indent with any positive number of spaces (1, 2, 3, 4, ...). However, + it is considered good practice to **indent with 4 spaces**. You may + configure your editor to map the `Tab` key to a 4-space + indentation. + +- **Style guidelines** + + **Long lines**: you should not write very long lines that span over more + than (e.g.) 80 characters. Long lines can be broken with the `\` character + + ```python + long_line = "Here is a very very long line \ + that we break in two parts." + ``` + + **Spaces** + + Write well-spaced code: put whitespaces after commas, around arithmetic + operators, etc.: + + ```python + a = 1 # yes + a=1 # too cramped + ``` + + A certain number of rules + for writing "beautiful" code (and more importantly using the same + conventions as anybody else!) are given in the [Style Guide for Python + Code](https://peps.python.org/pep-0008). + +--- + +:::{admonition} Quick read +If you want to do a first quick pass through the Scientific Python Lectures +to learn the ecosystem, you can directly skip to the next chapter: +{ref}`numpy`. + +The remainder of this chapter is not necessary to follow the rest of +the intro part. But be sure to come back and finish this chapter later. +::: diff --git a/intro/language/reusing_code.rst b/intro/language/reusing_code.rst deleted file mode 100644 index 548902f74..000000000 --- a/intro/language/reusing_code.rst +++ /dev/null @@ -1,513 +0,0 @@ -Reusing code: scripts and modules -================================= - -For now, we have typed all instructions in the interpreter. For longer -sets of instructions we need to change track and write the code in text -files (using a text editor), that we will call either *scripts* or -*modules*. Use your favorite text editor (provided it offers syntax -highlighting for Python), or the editor that comes with the Scientific -Python Suite you may be using. - -Scripts -------- - -.. tip:: - - Let us first write a *script*, that is a file with a sequence of - instructions that are executed each time the script is called. - Instructions may be e.g. copied-and-pasted from the interpreter (but - take care to respect indentation rules!). - -The extension for Python files is ``.py``. Write or copy-and-paste the -following lines in a file called ``test.py`` :: - - message = "Hello how are you?" - for word in message.split(): - print(word) - -.. tip:: - - Let us now execute the script interactively, that is inside the - Ipython interpreter. This is maybe the most common use of scripts in - scientific computing. - -.. note:: - - in Ipython, the syntax to execute a script is ``%run script.py``. For - example, - -.. ipython:: - :verbatim: - - In [1]: %run test.py - Hello - how - are - you? - - In [2]: message - Out[2]: 'Hello how are you?' - - -The script has been executed. Moreover the variables defined in the -script (such as ``message``) are now available inside the interpreter's -namespace. - -.. tip:: - - Other interpreters also offer the possibility to execute scripts - (e.g., ``execfile`` in the plain Python interpreter, etc.). - -It is also possible In order to execute this script as a *standalone -program*, by executing the script inside a shell terminal (Linux/Mac -console or cmd Windows console). For example, if we are in the same -directory as the test.py file, we can execute this in a console: - -.. sourcecode:: bash - - $ python test.py - Hello - how - are - you? - -.. tip:: - - Standalone scripts may also take command-line arguments - - In ``file.py``:: - - import sys - print(sys.argv) - - .. sourcecode:: bash - - $ python file.py test arguments - ['file.py', 'test', 'arguments'] - - .. warning:: - - Don't implement option parsing yourself. Use a dedicated module such as - :mod:`argparse`. - - -Importing objects from modules ------------------------------- - -.. ipython:: - - In [1]: import os - - In [2]: os - Out[2]: - - In [3]: os.listdir('.') - Out[3]: - ['conf.py', - 'basic_types.rst', - 'control_flow.rst', - 'functions.rst', - 'python_language.rst', - 'reusing.rst', - 'file_io.rst', - 'exceptions.rst', - 'workflow.rst', - 'index.rst'] - -And also: - -.. ipython:: - - In [4]: from os import listdir - -Importing shorthands: - -.. ipython:: - - In [5]: import numpy as np - -.. warning:: - - :: - - from os import * - - This is called the *star import* and please, **Do not use it** - - * Makes the code harder to read and understand: where do symbols come - from? - - * Makes it impossible to guess the functionality by the context and - the name (hint: `os.name` is the name of the OS), and to profit - usefully from tab completion. - - * Restricts the variable names you can use: `os.name` might override - `name`, or vise-versa. - - * Creates possible name clashes between modules. - - * Makes the code impossible to statically check for undefined - symbols. - -.. tip:: - - Modules are thus a good way to organize code in a hierarchical way. Actually, - all the scientific computing tools we are going to use are modules:: - - >>> import numpy as np # data arrays - >>> np.linspace(0, 10, 6) - array([ 0., 2., 4., 6., 8., 10.]) - >>> import scipy as sp # scientific computing - - -Creating modules ------------------ - -.. tip:: - - If we want to write larger and better organized programs (compared to - simple scripts), where some objects are defined, (variables, - functions, classes) and that we want to reuse several times, we have - to create our own *modules*. - -Let us create a module ``demo`` contained in the file ``demo.py``: - - .. literalinclude:: demo.py - -.. tip:: - - In this file, we defined two functions ``print_a`` and ``print_b``. Suppose - we want to call the ``print_a`` function from the interpreter. We could - execute the file as a script, but since we just want to have access to - the function ``print_a``, we are rather going to **import it as a module**. - The syntax is as follows. - - -.. ipython:: - :verbatim: - - In [1]: import demo - - - In [2]: demo.print_a() - a - - In [3]: demo.print_b() - b - -Importing the module gives access to its objects, using the -``module.object`` syntax. Don't forget to put the module's name before the -object's name, otherwise Python won't recognize the instruction. - - -Introspection - -.. ipython:: - :verbatim: - - In [4]: demo? - Type: module - Base Class: - String Form: - Namespace: Interactive - File: /home/varoquau/Projects/Python_talks/scipy_2009_tutorial/source/demo.py - Docstring: - A demo module. - - - In [5]: who - demo - - In [6]: whos - Variable Type Data/Info - ------------------------------ - demo module - - In [7]: dir(demo) - Out[7]: - ['__builtins__', - '__doc__', - '__file__', - '__name__', - '__package__', - 'c', - 'd', - 'print_a', - 'print_b'] - - - In [8]: demo. - demo.c demo.print_a demo.py - demo.d demo.print_b demo.pyc - - -Importing objects from modules into the main namespace - -.. ipython:: - :verbatim: - - In [9]: from demo import print_a, print_b - - In [10]: whos - Variable Type Data/Info - -------------------------------- - demo module - print_a function - print_b function - - In [11]: print_a() - a - -.. warning:: - - **Module caching** - - Modules are cached: if you modify ``demo.py`` and re-import it in the - old session, you will get the old one. - - Solution: - - .. sourcecode :: ipython - - In [10]: importlib.reload(demo) - -'__main__' and module loading ------------------------------- - -.. tip:: - - Sometimes we want code to be executed when a module is - run directly, but not when it is imported by another module. - ``if __name__ == '__main__'`` allows us to check whether the - module is being run directly. - -File ``demo2.py``: - - .. literalinclude:: demo2.py - -Importing it: - -.. ipython:: - :verbatim: - - In [11]: import demo2 - b - - In [12]: import demo2 - -Running it: - -.. ipython:: - :verbatim: - - In [13]: %run demo2 - b - a - - -Scripts or modules? How to organize your code ---------------------------------------------- - -.. Note:: Rule of thumb - - * Sets of instructions that are called several times should be - written inside **functions** for better code reusability. - - * Functions (or other bits of code) that are called from several - scripts should be written inside a **module**, so that only the - module is imported in the different scripts (do not copy-and-paste - your functions in the different scripts!). - -How modules are found and imported -.................................. - - -When the ``import mymodule`` statement is executed, the module ``mymodule`` -is searched in a given list of directories. This list includes a list -of installation-dependent default path (e.g., ``/usr/lib64/python3.11``) as -well as the list of directories specified by the environment variable -``PYTHONPATH``. - -The list of directories searched by Python is given by the ``sys.path`` -variable - -.. ipython:: - - In [1]: import sys - - In [2]: sys.path - Out[2]: - ['/home/jarrod/.venv/lectures/bin', - '/usr/lib64/python311.zip', - '/usr/lib64/python3.11', - '/usr/lib64/python3.11/lib-dynload', - '', - '/home/jarrod/.venv/lectures/lib64/python3.11/site-packages', - '/home/jarrod/.venv/lectures/lib/python3.11/site-packages'] - -Modules must be located in the search path, therefore you can: - -* write your own modules within directories already defined in the - search path (e.g. ``$HOME/.venv/lectures/lib64/python3.11/site-packages``). - You may use symbolic links (on Linux) to keep the code somewhere else. - -* modify the environment variable ``PYTHONPATH`` to include the - directories containing the user-defined modules. - - .. tip:: - - On Linux/Unix, add the following line to a file read by the shell at - startup (e.g. /etc/profile, .profile) - - :: - - export PYTHONPATH=$PYTHONPATH:/home/emma/user_defined_modules - - On Windows, https://support.microsoft.com/kb/310519 explains how to - handle environment variables. - -* or modify the ``sys.path`` variable itself within a Python script. - - .. tip:: - - :: - - import sys - new_path = '/home/emma/user_defined_modules' - if new_path not in sys.path: - sys.path.append(new_path) - - This method is not very robust, however, because it makes the code - less portable (user-dependent path) and because you have to add the - directory to your sys.path each time you want to import from a module - in this directory. - -.. seealso:: - - See https://docs.python.org/3/tutorial/modules.html for more information - about modules. - -Packages --------- - -A directory that contains many modules is called a *package*. A package -is a module with submodules (which can have submodules themselves, etc.). -A special file called ``__init__.py`` (which may be empty) tells Python -that the directory is a Python package, from which modules can be -imported. - -.. sourcecode:: bash - - $ ls - _build_utils/ fft/ _lib/ odr/ spatial/ - cluster/ fftpack/ linalg/ optimize/ special/ - conftest.py __init__.py linalg.pxd optimize.pxd special.pxd - constants/ integrate/ meson.build setup.py stats/ - datasets/ interpolate/ misc/ signal/ - _distributor_init.py io/ ndimage/ sparse/ - $ cd ndimage - $ ls - _filters.py __init__.py _measurements.py morphology.py src/ - filters.py _interpolation.py measurements.py _ni_docstrings.py tests/ - _fourier.py interpolation.py meson.build _ni_support.py utils/ - fourier.py LICENSE.txt _morphology.py setup.py - - -From Ipython: - -.. ipython:: - - In [1]: import scipy as sp - - In [2]: sp.__file__ - - In [3]: sp.version.version - - @verbatim - In [4]: sp.ndimage.morphology.binary_dilation? - Signature: - sp.ndimage.morphology.binary_dilation( - input, - structure=None, - iterations=1, - mask=None, - output=None, - border_value=0, - origin=0, - brute_force=False, - ) - Docstring: - Multidimensional binary dilation with the given structuring element. - ... - - -Good practices --------------- - -* Use **meaningful** object **names** - -* **Indentation: no choice!** - - .. tip:: - - Indenting is compulsory in Python! Every command block following a - colon bears an additional indentation level with respect to the - previous line with a colon. One must therefore indent after - ``def f():`` or ``while:``. At the end of such logical blocks, one - decreases the indentation depth (and re-increases it if a new block - is entered, etc.) - - Strict respect of indentation is the price to pay for getting rid of - ``{`` or ``;`` characters that delineate logical blocks in other - languages. Improper indentation leads to errors such as - - .. code-block:: ipython - - ------------------------------------------------------------ - IndentationError: unexpected indent (test.py, line 2) - - All this indentation business can be a bit confusing in the - beginning. However, with the clear indentation, and in the absence of - extra characters, the resulting code is very nice to read compared to - other languages. - -* **Indentation depth**: Inside your text editor, you may choose to - indent with any positive number of spaces (1, 2, 3, 4, ...). However, - it is considered good practice to **indent with 4 spaces**. You may - configure your editor to map the ``Tab`` key to a 4-space - indentation. - -* **Style guidelines** - - **Long lines**: you should not write very long lines that span over more - than (e.g.) 80 characters. Long lines can be broken with the ``\`` - character :: - - >>> long_line = "Here is a very very long line \ - ... that we break in two parts." - - **Spaces** - - Write well-spaced code: put whitespaces after commas, around arithmetic - operators, etc.:: - - >>> a = 1 # yes - >>> a=1 # too cramped - - A certain number of rules - for writing "beautiful" code (and more importantly using the same - conventions as anybody else!) are given in the `Style Guide for Python - Code `_. - - -____ - - -.. topic:: **Quick read** - - If you want to do a first quick pass through the Scientific Python Lectures - to learn the ecosystem, you can directly skip to the next chapter: - :ref:`numpy`. - - The remainder of this chapter is not necessary to follow the rest of - the intro part. But be sure to come back and finish this chapter later. diff --git a/intro/scipy/solutions/dir_sort.py b/intro/language/solutions/dir_sort.py similarity index 100% rename from intro/scipy/solutions/dir_sort.py rename to intro/language/solutions/dir_sort.py diff --git a/intro/scipy/solutions/path_site.py b/intro/language/solutions/path_site.py similarity index 100% rename from intro/scipy/solutions/path_site.py rename to intro/language/solutions/path_site.py diff --git a/intro/scipy/solutions/test_dir_sort.py b/intro/language/solutions/test_dir_sort.py similarity index 100% rename from intro/scipy/solutions/test_dir_sort.py rename to intro/language/solutions/test_dir_sort.py diff --git a/intro/language/standard_library.md b/intro/language/standard_library.md new file mode 100644 index 000000000..6244e9c8b --- /dev/null +++ b/intro/language/standard_library.md @@ -0,0 +1,341 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Standard Library + +:::{note} +Reference document for this section: + +- The Python Standard Library documentation: + +- Python Essential Reference, David Beazley, Addison-Wesley Professional + ::: + +## `os` module: operating system functionality + +_"A portable way of using operating system dependent functionality."_ + +### Directory and file manipulation + +Current directory: + +```{code-cell} +import os +os.getcwd() +``` + +List a directory: + +```{code-cell} +os.listdir(os.curdir) +``` + +Make a directory: + +```{code-cell} +os.mkdir('junkdir') +'junkdir' in os.listdir(os.curdir) +``` + +Rename the directory: + +```{code-cell} +os.rename('junkdir', 'foodir') +'junkdir' in os.listdir(os.curdir) +``` + +```{code-cell} +'foodir' in os.listdir(os.curdir) +``` + +```{code-cell} +os.rmdir('foodir') +'foodir' in os.listdir(os.curdir) +``` + +Delete a file: + +```{code-cell} +fp = open('junk.txt', 'w') +fp.close() +'junk.txt' in os.listdir(os.curdir) +``` + +```{code-cell} +os.remove('junk.txt') +'junk.txt' in os.listdir(os.curdir) +``` + +### `os.path`: path manipulations + +`os.path` provides common operations on pathnames. + +```{code-cell} +fp = open('junk.txt', 'w') +fp.close() +a = os.path.abspath('junk.txt') +a +``` + +```{code-cell} +os.path.split(a) +``` + +```{code-cell} +os.path.dirname(a) +``` + +```{code-cell} +os.path.basename(a) +``` + +```{code-cell} +os.path.splitext(os.path.basename(a)) +``` + +```{code-cell} +os.path.exists('junk.txt') +``` + +```{code-cell} +os.path.isfile('junk.txt') +``` + +```{code-cell} +os.path.isdir('junk.txt') +``` + +```{code-cell} +os.path.expanduser('~/local') +``` + +```{code-cell} +os.path.join(os.path.expanduser('~'), 'local', 'bin') +``` + +### Running an external command + +```{code-cell} +return_code = os.system('ls') +``` + +:::{note} +Alternative to `os.system` + +A noteworthy alternative to `os.system` is the [sh +module](https://amoffat.github.com/sh/). Which provides much more convenient +ways to obtain the output, error stream and exit code of the external command. + +```python +import sh +com = sh.ls() + +print(com) +basic_types.md exceptions.md oop.md standard_library.md +control_flow.md first_steps.md python_language.md +demo2.py functions.md python-logo.png +demo.py io.md reusing_code.md + +type(com) +Out[33]: str +``` + +::: + +### Walking a directory + +`os.path.walk` generates a list of filenames in a directory tree. + +```{code-cell} +for dirpath, dirnames, filenames in os.walk(os.curdir): + for fp in filenames: + print(os.path.abspath(fp)) +``` + +### Environment variables: + +```ipython +In [2]: os.environ.keys() +Out[2]: KeysView(environ({'SHELL': '/bin/bash', 'PWD': '/home/mb312', 'LOGNAME': 'mb312', 'HOME': '/home/mb312', 'TERM': 'xterm', 'USER': 'mb312', 'SHLVL': '1', 'PATH': '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin', 'MAIL': '/var/mail/mb312', '_': '/usr/bin/python3', 'LC_CTYPE': 'C.UTF-8'})) + +In [3]: os.environ['SHELL'] +Out[3]: '/bin/bash' +``` + ++++ + +## `shutil`: high-level file operations + +The `shutil` provides useful file operations: + +- `shutil.rmtree`: Recursively delete a directory tree. +- `shutil.move`: Recursively move a file or directory to another location. +- `shutil.copy`: Copy files or directories. + +## `glob`: Pattern matching on files + +The `glob` module provides convenient file pattern matching. + +Find all files ending in `.txt`: + +```{code-cell} +import glob +glob.glob('*.txt') +``` + +## `sys` module: system-specific information + +System-specific information related to the Python interpreter. + +**Which version of Python** are you running and where is it installed: + +```{code-cell} +import sys +sys.platform +``` + +```{code-cell} +sys.version +``` + +```{code-cell} +sys.prefix +``` + +`sys.argv` gives you a **list of command line arguments** passed to a Python +script. It is useful when you call as script with e.g. `python my_script.py some arguments`. Inside the `my_arguments.py` script, you can get the passed arguments (here ['some', 'arguments']) with `sys.argv`. + +`sys.path` is a list of strings that specifies the search path for +modules. Initialized from `PYTHONPATH`: + +```{code-cell} +sys.path +``` + +## `pickle`: easy persistence + +Useful to store arbitrary objects to a file. Not safe or fast! + +```{code-cell} +import pickle +l = [1, None, 'Stan'] +with open('test.pkl', 'wb') as file: + pickle.dump(l, file) +``` + +```{code-cell} +with open('test.pkl', 'rb') as file: + out = pickle.load(file) +``` + +```{code-cell} +out +``` + +## Exercises + +::: {exercise-start} +:label: data-file-ex +:class: dropdown +::: + +Write a function that will load the column of numbers in `data.txt` and +calculate the min, max and sum values. Use no modules except those in the +standard library; specifically, do not use Numpy. + +{download}`data.txt`: + +::: {literalinclude} data.txt + +::: + +::: {exercise-end} +::: + +::: {solution-start} data-file-ex +:class: dropdown +::: + +```{code-cell} +def load_data(filename): + fp = open(filename) + data_string = fp.read() + fp.close() + + data = [] + for x in data_string.split(): + # Data is read in as a string. We need to convert it to floats + data.append(float(x)) + + # Could instead use the following one line with list comprehensions! + # data = [float(x) for x in data_string.split()] + return data +``` + +```{code-cell} +data = load_data("data.txt") +# Python provides these basic math functions. +print(f"min: {min(data):f}") +print(f"max: {max(data):f}") +print(f"sum: {sum(data):f}") +``` + +::: {solution-end} +::: + +::: {exercise-start} +:label: dir-sort-ex +:class: dropdown +::: + +Implement a _script_ that takes a directory name as argument, and +returns the list of '.py' files, sorted by name length. + +**Hint:** try to understand the docstring of list.sort + +::: {exercise-end} +::: + +::: {solution-start} dir-sort-ex +:class: dropdown +::: + +::: {literalinclude} solutions/dir_sort.py + +::: + +::: {solution-end} +::: + ++++ + +::: {exercise-start} +:label: path-site-ex +:class: dropdown +::: + +Write a program to search your `PYTHONPATH` for the module `site.py`. + +::: {exercise-end} +::: + +::: {solution-start} path-site-ex +:class: dropdown +::: + +::: {literalinclude} solutions/path_site.py + +::: + +::: {solution-end} +::: diff --git a/intro/language/standard_library.rst b/intro/language/standard_library.rst deleted file mode 100644 index 12d5d4f97..000000000 --- a/intro/language/standard_library.rst +++ /dev/null @@ -1,276 +0,0 @@ -Standard Library -================ - -.. note:: Reference document for this section: - - * The Python Standard Library documentation: - https://docs.python.org/3/library/index.html - - * Python Essential Reference, David Beazley, Addison-Wesley Professional - -``os`` module: operating system functionality ------------------------------------------------ - -*"A portable way of using operating system dependent functionality."* - -Directory and file manipulation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Current directory: - -.. ipython:: - - In [1]: import os - - In [2]: os.getcwd() - Out[2]: '/home/jarrod/src/scientific-python-lectures/intro' - -List a directory: - -.. ipython:: - - In [3]: os.listdir(os.curdir) - Out[3]: ['intro.rst', 'scipy', 'language', 'matplotlib', 'index.rst', 'numpy', 'help'] - -Make a directory: - -.. ipython:: - - In [4]: os.mkdir('junkdir') - - In [5]: 'junkdir' in os.listdir(os.curdir) - Out[5]: True - -Rename the directory: - -.. ipython:: - - In [6]: os.rename('junkdir', 'foodir') - - In [7]: 'junkdir' in os.listdir(os.curdir) - Out[7]: False - - In [8]: 'foodir' in os.listdir(os.curdir) - Out[8]: True - - In [9]: os.rmdir('foodir') - - In [10]: 'foodir' in os.listdir(os.curdir) - Out[10]: False - -Delete a file: - -.. ipython:: - - In [11]: fp = open('junk.txt', 'w') - - In [12]: fp.close() - - In [13]: 'junk.txt' in os.listdir(os.curdir) - Out[13]: True - - In [14]: os.remove('junk.txt') - - In [15]: 'junk.txt' in os.listdir(os.curdir) - Out[15]: False - -``os.path``: path manipulations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -``os.path`` provides common operations on pathnames. - -.. ipython:: - - In [16]: fp = open('junk.txt', 'w') - - In [17]: fp.close() - - In [18]: a = os.path.abspath('junk.txt') - - In [19]: a - Out[19]: '/home/jarrod/src/scientific-python-lectures/intro/junk.txt' - - In [20]: os.path.split(a) - Out[20]: ('/home/jarrod/src/scientific-python-lectures/intro', 'junk.txt') - - In [21]: os.path.dirname(a) - Out[21]: '/home/jarrod/src/scientific-python-lectures/intro' - - In [22]: os.path.basename(a) - Out[22]: 'junk.txt' - - In [23]: os.path.splitext(os.path.basename(a)) - Out[23]: ('junk', '.txt') - - In [24]: os.path.exists('junk.txt') - Out[24]: True - - In [25]: os.path.isfile('junk.txt') - Out[25]: True - - In [26]: os.path.isdir('junk.txt') - Out[26]: False - - In [27]: os.path.expanduser('~/local') - Out[27]: '/home/jarrod/local' - - In [28]: os.path.join(os.path.expanduser('~'), 'local', 'bin') - Out[28]: '/home/jarrod/local/bin' - -Running an external command -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. ipython:: - - In [29]: os.system('ls') - help index.rst intro.rst junk.txt language matplotlib numpy scipy - Out[29]: 0 - -.. note:: Alternative to ``os.system`` - - A noteworthy alternative to ``os.system`` is the `sh module - `_. Which provides much more convenient ways to - obtain the output, error stream and exit code of the external command. - - .. ipython:: - :verbatim: - - In [30]: import sh - In [31]: com = sh.ls() - - In [32]: print(com) - basic_types.rst exceptions.rst oop.rst standard_library.rst - control_flow.rst first_steps.rst python_language.rst - demo2.py functions.rst python-logo.png - demo.py io.rst reusing_code.rst - - In [33]: type(com) - Out[33]: str - -Walking a directory -~~~~~~~~~~~~~~~~~~~~ - -``os.path.walk`` generates a list of filenames in a directory tree. - -.. ipython:: - - In [10]: for dirpath, dirnames, filenames in os.walk(os.curdir): - ....: for fp in filenames: - ....: print(os.path.abspath(fp)) - ....: - ....: - /home/jarrod/src/scientific-python-lectures/intro/language/basic_types.rst - /home/jarrod/src/scientific-python-lectures/intro/language/control_flow.rst - /home/jarrod/src/scientific-python-lectures/intro/language/python_language.rst - /home/jarrod/src/scientific-python-lectures/intro/language/reusing_code.rst - /home/jarrod/src/scientific-python-lectures/intro/language/standard_library.rst - ... - -Environment variables: -~~~~~~~~~~~~~~~~~~~~~~ - -.. ipython:: - :verbatim: - - In [32]: os.environ.keys() - Out[32]: KeysView(environ({'SHELL': '/bin/bash', 'COLORTERM': 'truecolor', ...})) - - - In [34]: os.environ['SHELL'] - Out[34]: '/bin/bash' - - -``shutil``: high-level file operations ---------------------------------------- - -The ``shutil`` provides useful file operations: - - * ``shutil.rmtree``: Recursively delete a directory tree. - * ``shutil.move``: Recursively move a file or directory to another location. - * ``shutil.copy``: Copy files or directories. - -``glob``: Pattern matching on files -------------------------------------- - -The ``glob`` module provides convenient file pattern matching. - -Find all files ending in ``.txt``: - -.. ipython:: - - In [36]: import glob - - In [37]: glob.glob('*.txt') - Out[37]: ['junk.txt'] - -``sys`` module: system-specific information --------------------------------------------- - -System-specific information related to the Python interpreter. - -* Which version of python are you running and where is it installed: - -.. ipython:: - - - In [39]: import sys - - In [40]: sys.platform - Out[40]: 'linux' - - In [41]: sys.version - Out[41]: '3.11.8 (main, Feb 28 2024, 00:00:00) [GCC 13.2.1 20231011 (Red Hat 13.2.1-4)]' - - In [42]: sys.prefix - Out[42]: '/home/jarrod/.venv/nx' - -* List of command line arguments passed to a Python script: - -.. ipython:: - - In [43]: sys.argv - Out[43]: ['/home/jarrod/.venv/nx/bin/ipython'] - -``sys.path`` is a list of strings that specifies the search path for -modules. Initialized from PYTHONPATH: - -.. ipython:: - - In [44]: sys.path - Out[44]: - ['/home/jarrod/.venv/nx/bin', - '/usr/lib64/python311.zip', - '/usr/lib64/python3.11', - '/usr/lib64/python3.11/lib-dynload', - '', - '/home/jarrod/.venv/nx/lib64/python3.11/site-packages', - '/home/jarrod/.venv/nx/lib/python3.11/site-packages'] - -``pickle``: easy persistence -------------------------------- - -Useful to store arbitrary objects to a file. Not safe or fast! - -.. ipython:: - - In [45]: import pickle - - In [46]: l = [1, None, 'Stan'] - - In [3]: with open('test.pkl', 'wb') as file: - ...: pickle.dump(l, file) - ...: - - In [4]: with open('test.pkl', 'rb') as file: - ...: out = pickle.load(file) - ...: - - In [49]: out - Out[49]: [1, None, 'Stan'] - - -.. topic:: Exercise - - Write a program to search your ``PYTHONPATH`` for the module ``site.py``. - -:ref:`path_site` diff --git a/intro/language/test.py b/intro/language/test.py new file mode 100644 index 000000000..a8940d753 --- /dev/null +++ b/intro/language/test.py @@ -0,0 +1,4 @@ +# Contents of test.py +message = "Hello how are you?" +for word in message.split(): + print(word) diff --git a/intro/matplotlib/examples/plot_plot.py b/intro/matplotlib/examples/plot_plot.py deleted file mode 100644 index 2932069ac..000000000 --- a/intro/matplotlib/examples/plot_plot.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Plot and filled plots -===================== - -Simple example of plots and filling between them with matplotlib. -""" - -import numpy as np -import matplotlib.pyplot as plt - -n = 256 -X = np.linspace(-np.pi, np.pi, n) -Y = np.sin(2 * X) - -plt.axes((0.025, 0.025, 0.95, 0.95)) - -plt.plot(X, Y + 1, color="blue", alpha=1.00) -plt.fill_between(X, 1, Y + 1, color="blue", alpha=0.25) - -plt.plot(X, Y - 1, color="blue", alpha=1.00) -plt.fill_between(X, -1, Y - 1, (Y - 1) > -1, color="blue", alpha=0.25) -plt.fill_between(X, -1, Y - 1, (Y - 1) < -1, color="red", alpha=0.25) - -plt.xlim(-np.pi, np.pi) -plt.xticks([]) -plt.ylim(-2.5, 2.5) -plt.yticks([]) - -plt.show() diff --git a/intro/matplotlib/index.md b/intro/matplotlib/index.md new file mode 100644 index 000000000..3bb680a99 --- /dev/null +++ b/intro/matplotlib/index.md @@ -0,0 +1,1696 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(matplotlib)= + ++++ + +# Matplotlib: plotting + +:::{sidebar} Thanks + +Many thanks to **Bill Wing** and **Christoph Deil** for review and +corrections. + +::: + +**Authors**: _Nicolas Rougier, Mike Müller, Gaël Varoquaux_ + +## Introduction + +[Matplotlib](https://matplotlib.org/) is probably the most +used Python package for 2D-graphics. It provides both a quick +way to visualize data from Python and publication-quality figures in +many formats. We are going to explore matplotlib in interactive mode +covering most common cases. + +### IPython, Jupyter, and matplotlib modes + +The [Jupyter](https://jupyter.org) notebook and the +[IPython](https://ipython.org/) enhanced interactive Python, are +tuned for the scientific-computing workflow in Python, +in combination with Matplotlib: + +For interactive matplotlib sessions, turn on the **matplotlib mode**. + +### IPython sessions + +To make plots open interactively in an IPython console session use the +following [magic +command](https://ipython.readthedocs.io/en/stable/interactive/magics.html): + +```{code-cell} +%matplotlib +``` + +### Jupyter notebook + +The Jupyter Notebook uses Matplotlib mode by default; that is, it inserts the figures into the notebook, as you run Matplotlib commands. + ++++ + +### pyplot + +_pyplot_ provides a procedural interface to the matplotlib object-oriented +plotting library. It is modeled closely after Matlab™. Therefore, the +majority of plotting commands in pyplot have Matlab™ analogs with similar +arguments. Important commands are explained with interactive examples. + +```{code-cell} +import matplotlib.pyplot as plt +``` + +## Simple plot + +In this section, we want to draw the cosine and sine functions on the same +plot. Starting from the default settings, we'll enrich the figure step by +step to make it nicer. + +First step is to get the data for the sine and cosine functions: + +```{code-cell} +import numpy as np + +X = np.linspace(-np.pi, np.pi, 256) +C, S = np.cos(X), np.sin(X) +``` + +`X` is now a numpy array with 256 values ranging from $-\pi$ to $+\pi$ +(included). `C` is the cosine (256 values) and `S` is the sine (256 +values). + +To run the code, you can execute it in a Jupyter notebook or type it in an +IPython interactive session: + +```bash +$ ipython --matplotlib +``` + +This brings us to the IPython prompt: + +```text +IPython 0.13 -- An enhanced Interactive Python. +? -> Introduction to IPython's features. +%magic -> Information about IPython's 'magic' % functions. +help -> Python's own help system. +object? -> Details about 'object'. ?object also works, ?? prints more. +``` + +### Plotting with default settings + +:::{hint} + +Documentation + +- [plot tutorial](https://matplotlib.org/users/pyplot_tutorial.html) +- {func}`~plot()` command + +::: + +::: {note} +:class: dropdown + +Matplotlib comes with a set of default settings that allow +customizing all kinds of properties. You can control the defaults of +almost every property in matplotlib: figure size and dpi, line width, +color and style, axes, axis and grid properties, text and font +properties and so on. + +::: + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt + +X = np.linspace(-np.pi, np.pi, 256) +C, S = np.cos(X), np.sin(X) + +plt.plot(X, C) +plt.plot(X, S); +``` + +::: {note} + +You will notice that we used a semicolon (`;`) to end the last line in the +cell above. This is to prevent Jupyter or IPython echoing the return value of +this final expression back to us in the notebook or console session. It has no other effect; it does not affect the execution of the code. + +::: + +### Instantiating defaults + +:::{hint} +Documentation + +- [Customizing matplotlib](https://matplotlib.org/users/customizing.html) + ::: + +In the plotting code below, you will see that we've instantiated (and +commented) all the figure settings that influence the appearance of the plot. + +::: {note} +:class: dropdown + +The settings have been explicitly set to their default values, but +now you can interactively play with the values to explore their +affect (see [Line properties](mpl-line-properties) and [Line styles](mpl-line-styles) below). + +::: + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt + +# Create a figure of size 8x6 inches, 80 dots per inch +plt.figure(figsize=(8, 6), dpi=80) + +# Create a new subplot from a grid of 1x1 +plt.subplot(1, 1, 1) + +X = np.linspace(-np.pi, np.pi, 256) +C, S = np.cos(X), np.sin(X) + +# Plot cosine with a blue continuous line of width 1 (pixels) +plt.plot(X, C, color="blue", linewidth=1.0, linestyle="-") + +# Plot sine with a green continuous line of width 1 (pixels) +plt.plot(X, S, color="green", linewidth=1.0, linestyle="-") + +# Set x limits +plt.xlim(-4.0, 4.0) + +# Set x ticks +plt.xticks(np.linspace(-4, 4, 9)) + +# Set y limits +plt.ylim(-1.0, 1.0) + +# Set y ticks +plt.yticks(np.linspace(-1, 1, 5)); + +# You could also save this figure using 72 dots per inch with: +# plt.savefig("exercise_2.png", dpi=72) +``` + +### Changing colors and line widths + +:::{hint} +Documentation + +- [Controlling line properties](https://matplotlib.org/users/pyplot_tutorial.html#controlling-line-properties) +- {class}`~matplotlib.lines.Line2D` API + ::: + +::: {note} +:class: dropdown + +First step, we want to have the cosine in blue and the sine in red and a +slightly thicker line for both of them. We'll also slightly alter the figure +size to make it more horizontal. +::: + +```{code-cell} +# Generate the plot. +plt.figure(figsize=(10, 6), dpi=80) +plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-") +plt.plot(X, S, color="red", linewidth=2.5, linestyle="-"); + +# Get the current figure (gcf) into a variable for later use. +fig_to_update = plt.gcf() +``` + +::: {note} +:class: dropdown + +The final line `fig_to_update = plt.gcf()` uses `plt.gcf()` to Get the Current Figure — the figure we've just built in the cell. We then store that figure in the `fig_to_update` variable, so we can restore it, and update it, in the cells below. This is not a very common pattern in general, we are using it here to show you how to build up a figure in steps. + +::: + +### Setting limits + +:::{hint} +Documentation + +- {func}`xlim()` command +- {func}`ylim()` command + ::: + +::: {note} +:class: dropdown + +Current limits of the figure are a bit too tight and we want to make +some space in order to clearly see all data points. +::: + +::: {note} +:class: dropdown + +Following on from the note above, for the purposes of the tutorial, we first +restore the figure we stored above (with `plt.figure(fig_to_update)`, then we +add the limits to the figure, and finally, we press Jupyter to display the +figure by putting the figure variable as an expression in the last line of the +cell. + +Again, this pattern of restore, update, redisplay is not a very common one in ordinary use of Matplotlib; we use it here to allow us to separate the various steps in the process of updating the figure. +::: + +```{code-cell} +# Restore previous figure, ready to update below. +plt.figure(fig_to_update) + +# Setting the axis limits. +plt.xlim(X.min() * 1.2, X.max() * 1.2) +plt.ylim(C.min() * 1.2, C.max() * 1.2) + +# Make Jupyter display updated figure. +fig_to_update +``` + +### Setting ticks + +:::{hint} +Documentation + +- {func}`xticks()` command +- {func}`yticks()` command +- [Tick container](https://matplotlib.org/users/artists.html#axis-container) +- [Tick locating and formatting](https://matplotlib.org/api/ticker_api.html) + ::: + +::: {note} +:class: dropdown + +Current ticks are not ideal because they do not show the interesting values +($\pm \pi$, $\pm \frac{\pi}{2}$) for sine and cosine. We'll change them such +that they show only these values. +::: + +```{code-cell} +# Restore figure we are working on. +plt.figure(fig_to_update) + +# Set x and y ticks. +plt.xticks([-np.pi, -np.pi / 2, 0, np.pi / 2, np.pi]) +plt.yticks([-1, 0, +1]) + +# Make Jupyter display updated figure. +fig_to_update +``` + +### Setting tick labels + +:::{hint} +Documentation + +- [Working with text](https://matplotlib.org/users/index_text.html) +- {func}`~xticks()` command +- {func}`~yticks()` command +- {meth}`~matplotlib.axes.Axes.set_xticklabels()` +- {meth}`~matplotlib.axes.Axes.set_yticklabels()` + ::: + +::: {note} +:class: dropdown + +Ticks are now properly placed but their label is not very explicit. +We could guess that 3.142 is $\pi$ but it would be better to make it +explicit. When we set tick values, we can also provide a +corresponding label in the second argument list. Note that we'll use +latex to allow for nice rendering of the label. +::: + +{{ clear_floats }} + +```{code-cell} +# Restore figure +plt.figure(fig_to_update) + +# Update tick labels. +plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi], + [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$']) + +plt.yticks([-1, 0, +1], + [r'$-1$', r'$0$', r'$+1$']) + +# Force display of updated figure. +fig_to_update +``` + +### Moving spines + ++++ + +:::{hint} +Documentation + +- {mod}`~matplotlib.spines` API +- [Axis container](https://matplotlib.org/users/artists.html#axis-container) +- [Transformations tutorial](https://matplotlib.org/users/transforms_tutorial.html) + ::: + +::: {note} +:class: dropdown + +Spines are the lines connecting the axis tick marks and noting the +boundaries of the data area. They can be placed at arbitrary +positions and until now, they were on the border of the axis. We'll +change that since we want to have them in the middle. Since there are +four of them (top/bottom/left/right), we'll discard the top and right +by setting their color to none and we'll move the bottom and left +ones to coordinate 0 in data space coordinates. +::: + +{{ clear_floats }} + +```{code-cell} +# Restore figure +plt.figure(fig_to_update) + +# Update spines. +ax = plt.gca() # gca stands for 'get current axis' +ax.spines['right'].set_color('none') +ax.spines['top'].set_color('none') +ax.xaxis.set_ticks_position('bottom') +ax.spines['bottom'].set_position(('data',0)) +ax.yaxis.set_ticks_position('left') +ax.spines['left'].set_position(('data',0)) + +# Force display of updated figure. +fig_to_update +``` + +### Adding a legend + ++++ + +:::{hint} +Documentation + +- [Legend guide](https://matplotlib.org/users/legend_guide.html) +- {func}`legend()` command +- {mod}`~matplotlib.legend` API + ::: + +::: {note} +:class: dropdown + +Let's add a legend in the upper left corner. This only requires +adding the keyword argument label (that will be used in the legend +box) to the plot commands. + +::: + +{{ clear_floats }} + +```{code-cell} +# Restore figure +plt.figure(fig_to_update) + +# Add legend. +plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-", label="cosine") +plt.plot(X, S, color="red", linewidth=2.5, linestyle="-", label="sine") + +plt.legend(loc='upper left') + +# Force display of updated figure. +fig_to_update +``` + +### Annotate some points + ++++ + +:::{hint} +Documentation + +- [Annotating axis](https://matplotlib.org/users/annotations_guide.html) +- {func}`annotate()` command + ::: + +::: {note} +:class: dropdown + +Let's annotate some interesting points using the annotate command. We +chose the $2\pi / 3$ value and we want to annotate both the sine and the +cosine. We'll first draw a marker on the curve as well as a straight +dotted line. Then, we'll use the annotate command to display some +text with an arrow. +::: + +{{ clear_floats }} + +```{code-cell} +# Restore figure +plt.figure(fig_to_update) + +# Annotate points. +t = 2 * np.pi / 3 +plt.plot([t, t], [0, np.cos(t)], color='blue', linewidth=2.5, linestyle="--") +plt.scatter([t, ], [np.cos(t), ], 50, color='blue') + +plt.annotate(r'$cos(\frac{2\pi}{3})=-\frac{1}{2}$', + xy=(t, np.cos(t)), xycoords='data', + xytext=(-90, -50), textcoords='offset points', fontsize=16, + arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2")) + +plt.plot([t, t],[0, np.sin(t)], color='red', linewidth=2.5, linestyle="--") +plt.scatter([t, ],[np.sin(t), ], 50, color='red') + +plt.annotate(r'$sin(\frac{2\pi}{3})=\frac{\sqrt{3}}{2}$', + xy=(t, np.sin(t)), xycoords='data', + xytext=(+10, +30), textcoords='offset points', fontsize=16, + arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2")) + +# Force display of updated figure. +fig_to_update +``` + +### Devil is in the details + ++++ + +:::{hint} +Documentation + +- {mod}`~matplotlib.artist` API +- {meth}`~matplotlib.text.Text.set_bbox()` method + ::: + +::: {note} +:class: dropdown + +The tick labels are now hardly visible because of the blue and red +lines. We can make them bigger and we can also adjust their +properties such that they'll be rendered on a semi-transparent white +background. This will allow us to see both the data and the labels. +::: + +{{ clear_floats }} + +```{code-cell} +# Restore figure +plt.figure(fig_to_update) + +# Set properties of tick labels. +for label in ax.get_xticklabels() + ax.get_yticklabels(): + label.set_fontsize(16) + label.set_bbox(dict(facecolor='white', edgecolor='None', alpha=0.65)) + +# Force display of updated figure. +fig_to_update +``` + +## Figures, Subplots, Axes and Ticks + +A **"figure"** in matplotlib means the whole window in the user interface. +Within this figure there can be **"subplots"**. + +::: {note} +:class: dropdown + +So far we have used implicit figure and axes creation. This is handy for +fast plots. We can have more control over the display using figure, +subplot, and axes explicitly. While subplot positions the plots in a +regular grid, axes allows free placement within the figure. Both can be +useful depending on your intention. We've already worked with figures and +subplots without explicitly calling them. When we call plot, matplotlib +calls {func}`gca` to get the current axes and gca in turn calls {func}`gcf` to +get the current figure. If there is none it calls {func}`figure` to make one, +strictly speaking, to make a `subplot(111)`. Let's look at the details. +::: + +### Figures + +::: {note} +:class: dropdown + +A figure is a window in the GUI that has "Figure #" as title. Figures are +numbered starting from 1 as opposed to the normal Python way starting from 0. +This is clearly MATLAB-style. There are several parameters that determine what +the figure looks like: +::: + +| Argument | Default | Description | +| ----------- | ------------------ | ------------------------------------------- | +| `num` | `1` | number of figure | +| `figsize` | `figure.figsize` | figure size in inches (width, height) | +| `dpi` | `figure.dpi` | resolution in dots per inch | +| `facecolor` | `figure.facecolor` | color of the drawing background | +| `edgecolor` | `figure.edgecolor` | color of edge around the drawing background | +| `frameon` | `True` | draw figure frame or not | + +::: {note} +:class: dropdown + +The defaults can be specified in the resource file and will be used most of +the time. Only the number of the figure is frequently changed. + +As with other objects, you can set figure properties with the `plt.setp` +function, or with the `set_`(something) methods. + +When you work with the GUI, rather than in a notebook, you can close a figure +by clicking on the x in the upper right corner. But you can close a figure +programmatically by calling close. Depending on the argument it closes (1) the +current figure (no argument), (2) a specific figure (figure number or figure +instance as argument), or (3) all figures (`"all"` as argument). +::: + +```{code-cell} +# Useful working in a GUI outside the notebook. +plt.close(1) # Closes figure 1 +``` + +### Subplots + +::: {note} +:class: dropdown + +With subplot you can arrange plots in a regular grid. You need to specify +the number of rows and columns and the number of the plot. Note that the +[gridspec](https://matplotlib.org/users/gridspec.html) command +is a more powerful alternative. +::: + +{{ clear_floats }} + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(6, 4)) +plt.subplot(2, 1, 1) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "subplot(2,1,1)", ha="center", va="center", size=24, alpha=0.5) + +plt.subplot(2, 1, 2) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "subplot(2,1,2)", ha="center", va="center", size=24, alpha=0.5) +# Title for whole figure (rather than current subplot). +plt.suptitle('Horizontal subplots') + +plt.tight_layout() +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(6, 4)) +plt.subplot(1, 2, 1) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "subplot(1,2,1)", ha="center", va="center", size=24, alpha=0.5) + +plt.subplot(1, 2, 2) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "subplot(1,2,2)", ha="center", va="center", size=24, alpha=0.5) +plt.suptitle('Vertical subplots') + +plt.tight_layout() +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(6, 4)) +plt.subplot(2, 2, 1) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "subplot(2,2,1)", ha="center", va="center", size=20, alpha=0.5) + +plt.subplot(2, 2, 2) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "subplot(2,2,2)", ha="center", va="center", size=20, alpha=0.5) + +plt.subplot(2, 2, 3) +plt.xticks([]) +plt.yticks([]) + +plt.text(0.5, 0.5, "subplot(2,2,3)", ha="center", va="center", size=20, alpha=0.5) + +plt.subplot(2, 2, 4) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "subplot(2,2,4)", ha="center", va="center", size=20, alpha=0.5) +plt.suptitle('Subplot grid') + +plt.tight_layout() +``` + +```{code-cell} +:tags: [hide-input] + +from matplotlib import gridspec + +plt.figure(figsize=(6, 4)) +G = gridspec.GridSpec(3, 3) + +axes_1 = plt.subplot(G[0, :]) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "Axes 1", ha="center", va="center", size=24, alpha=0.5) + +axes_2 = plt.subplot(G[1, :-1]) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "Axes 2", ha="center", va="center", size=24, alpha=0.5) + +axes_3 = plt.subplot(G[1:, -1]) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "Axes 3", ha="center", va="center", size=24, alpha=0.5) + +axes_4 = plt.subplot(G[-1, 0]) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "Axes 4", ha="center", va="center", size=24, alpha=0.5) + +axes_5 = plt.subplot(G[-1, -2]) +plt.xticks([]) +plt.yticks([]) +plt.text(0.5, 0.5, "Axes 5", ha="center", va="center", size=24, alpha=0.5) +plt.suptitle('Subplot with gridspec') + +plt.tight_layout() +``` + +### Axes + +Axes are very similar to subplots but allow placement of plots at any location +in the figure. So if we want to put a smaller plot inside a bigger one we do +so with axes. + +```{code-cell} +:tags: [hide-input] + +plt.axes((0.1, 0.1, 0.8, 0.8)) +plt.xticks([]) +plt.yticks([]) +plt.text( + 0.6, 0.6, "axes([0.1, 0.1, 0.8, 0.8])", ha="center", va="center", size=20, alpha=0.5 +); +``` + +```{code-cell} +:tags: [hide-input] + +plt.axes((0.2, 0.2, 0.3, 0.3)) +plt.xticks([]) +plt.yticks([]) +plt.text( + 0.5, 0.5, "axes([0.2, 0.2, 0.3, 0.3])", ha="center", va="center", size=16, alpha=0.5 +); +``` + +### Ticks + +Well formatted ticks are an important part of publishing-ready +figures. Matplotlib provides a totally configurable system for ticks. There are +tick locators to specify where ticks should appear and tick formatters to give +ticks the appearance you want. Major and minor ticks can be located and +formatted independently from each other. Per default minor ticks are not shown, +i.e. there is only an empty list for them because it is as `NullLocator` (see +below). + ++++ + +#### Tick Locators + +Tick locators control the positions of the ticks. They are set as +follows: + +```python +ax = plt.gca() +ax.xaxis.set_major_locator(eval(locator)) +``` + +There are several locators for different kind of requirements: + +```{code-cell} +:tags: [hide-input] + +from matplotlib import ticker + +def tickline(): + plt.xlim(0, 10), plt.ylim(-1, 1), plt.yticks([]) + ax = plt.gca() + ax.spines["right"].set_color("none") + ax.spines["left"].set_color("none") + ax.spines["top"].set_color("none") + ax.xaxis.set_ticks_position("bottom") + ax.spines["bottom"].set_position(("data", 0)) + ax.yaxis.set_ticks_position("none") + ax.xaxis.set_minor_locator(ticker.MultipleLocator(0.1)) + ax.plot(np.arange(11), np.zeros(11)) + return ax + +locators = [ + "ticker.NullLocator()", + "ticker.MultipleLocator(1.0)", + "ticker.FixedLocator([0, 2, 8, 9, 10])", + "ticker.IndexLocator(3, 1)", + "ticker.LinearLocator(5)", + "ticker.LogLocator(2, [1.0])", + "ticker.AutoLocator()", +] + +n_locators = len(locators) + +size = 512, 40 * n_locators +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) + +for i, locator in enumerate(locators): + plt.subplot(n_locators, 1, i + 1) + ax = tickline() + ax.xaxis.set_major_locator(eval(locator)) + plt.text(5, 0.3, locator[7:], ha="center") + +plt.subplots_adjust(bottom=0.01, top=0.99, left=0.01, right=0.99) +``` + +All of these "locators" (see code above) derive from the base class +{class}`matplotlib.ticker.Locator`. You can make your own locator deriving +from it. Handling dates as ticks can be especially tricky. Therefore, +matplotlib provides special locators in matplotlib.dates. + ++++ + +## Other Types of Plots: examples and exercises + +### Regular Plots + +```{code-cell} +:tags: [hide-input] + +n = 256 +X = np.linspace(-np.pi, np.pi, n) +Y = np.sin(2 * X) + +plt.axes((0.025, 0.025, 0.95, 0.95)) + +plt.plot(X, Y + 1, color="blue", alpha=1.00) +plt.fill_between(X, 1, Y + 1, color="blue", alpha=0.25) + +plt.plot(X, Y - 1, color="blue", alpha=1.00) +plt.fill_between(X, -1, Y - 1, (Y - 1) > -1, color="blue", alpha=0.25) +plt.fill_between(X, -1, Y - 1, (Y - 1) < -1, color="red", alpha=0.25) + +plt.xlim(-np.pi, np.pi) +plt.xticks([]) +plt.ylim(-2.5, 2.5) +plt.yticks([]); +``` + +::: {exercise-start} +:label: plot-fill-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic taking +care of filled areas: + +:::{hint} +You need to use the {func}`fill_between()` command. +::: + +```{code-cell} +:tags: [hide-output] + +n = 256 +X = np.linspace(-np.pi, np.pi, n) +Y = np.sin(2 * X) + +plt.plot(X, Y + 1, color='blue', alpha=1.00) +plt.plot(X, Y - 1, color='blue', alpha=1.00) +``` + +::: {exercise-end} +::: + ++++ + +Click on the hidden code for the figure above for solution. + ++++ + +### Scatter Plots + +```{code-cell} +:tags: [hide-input] + +n = 1024 +rng = np.random.default_rng() +X = rng.normal(0, 1, n) +Y = rng.normal(0, 1, n) +T = np.arctan2(Y, X) + +plt.axes((0.025, 0.025, 0.95, 0.95)) +plt.scatter(X, Y, s=75, c=T, alpha=0.5) + +plt.xlim(-1.5, 1.5) +plt.xticks([]) +plt.ylim(-1.5, 1.5) +plt.yticks([]); +``` + +::: {exercise-start} +:label: plot-scatter-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic taking +care of marker size, color and transparency. + +:::{hint} +Color is given by angle of (X,Y). +::: + +```{code-cell} +:tags: [hide-output] + +n = 1024 +rng = np.random.default_rng() +X = rng.normal(0,1,n) +Y = rng.normal(0,1,n) + +plt.scatter(X,Y) +``` + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + +### Bar Plots + +```{code-cell} +:tags: [hide-input] + +n = 12 +X = np.arange(n) +rng = np.random.default_rng() +Y1 = (1 - X / n) * rng.uniform(0.5, 1.0, n) +Y2 = (1 - X / n) * rng.uniform(0.5, 1.0, n) + +plt.axes((0.025, 0.025, 0.95, 0.95)) +plt.bar(X, +Y1, facecolor="#9999ff", edgecolor="white") +plt.bar(X, -Y2, facecolor="#ff9999", edgecolor="white") + +for x, y in zip(X, Y1): + plt.text(x, y + 0.05, f"{y:.2f}", ha="center", va="bottom") + +for x, y in zip(X, Y2): + plt.text(x, -y - 0.05, f"{y:.2f}", ha="center", va="top") + +plt.xlim(-0.5, n) +plt.xticks([]) +plt.ylim(-1.25, 1.25) +plt.yticks([]); +``` + +::: {exercise-start} +:label: plot-bar-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic by +adding labels for red bars. + +:::{hint} +You need to take care of text alignment. +::: + +```{code-cell} +:tags: [hide-output] + +n = 12 +X = np.arange(n) +rng = np.random.default_rng() +Y1 = (1 - X / float(n)) * rng.uniform(0.5, 1.0, n) +Y2 = (1 - X / float(n)) * rng.uniform(0.5, 1.0, n) + +plt.bar(X, +Y1, facecolor='#9999ff', edgecolor='white') +plt.bar(X, -Y2, facecolor='#ff9999', edgecolor='white') + +for x, y in zip(X, Y1): + plt.text(x + 0.4, y + 0.05, '%.2f' % y, ha='center', va='bottom') + +plt.ylim(-1.25, +1.25) +``` + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + ++++ + +### Contour Plots + +```{code-cell} +:tags: [hide-input] + +def f(x, y): + return (1 - x / 2 + x**5 + y**3) * np.exp(-(x**2) - y**2) + +n = 256 +x = np.linspace(-3, 3, n) +y = np.linspace(-3, 3, n) +X, Y = np.meshgrid(x, y) + +plt.axes((0.025, 0.025, 0.95, 0.95)) + +plt.contourf(X, Y, f(X, Y), 8, alpha=0.75, cmap="hot") +C = plt.contour(X, Y, f(X, Y), 8, colors="black", linewidths=0.5) +plt.clabel(C, inline=1, fontsize=10) + +plt.xticks([]) +plt.yticks([]); +``` + +::: {exercise-start} +:label: plot-countour-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic taking +care of the colormap (see [Colormaps] below). + +:::{hint} +You need to use the {func}`clabel()` command. +::: + +```{code-cell} +:tags: [hide-output] + +def f(x, y): + return (1 - x / 2 + x ** 5 + y ** 3) * np.exp(-x ** 2 -y ** 2) + +n = 256 +x = np.linspace(-3, 3, n) +y = np.linspace(-3, 3, n) +X, Y = np.meshgrid(x, y) + +plt.contourf(X, Y, f(X, Y), 8, alpha=.75, cmap='jet') +C = plt.contour(X, Y, f(X, Y), 8, colors='black', linewidth=.5) +``` + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + ++++ + +### Imshow + +```{code-cell} +:tags: [hide-input] + +def f(x, y): + return (1 - x / 2 + x**5 + y**3) * np.exp(-(x**2) - y**2) + +n = 10 +x = np.linspace(-3, 3, int(3.5 * n)) +y = np.linspace(-3, 3, int(3.0 * n)) +X, Y = np.meshgrid(x, y) +Z = f(X, Y) + +plt.imshow(Z, interpolation="nearest", cmap="bone", origin="lower") +plt.axes((0.025, 0.025, 0.95, 0.95)) +plt.colorbar(shrink=0.92) + +plt.xticks([]) +plt.yticks([]); +``` + +::: {exercise-start} +:label: plot-imshow-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic taking +care of colormap, image interpolation and origin. + +:::{hint} +You need to take care of the `origin` of the image in the `imshow` command and +use a {func}`colorbar()` +::: + +```{code-cell} +:tags: [hide-output] + +def f(x, y): + return (1 - x / 2 + x ** 5 + y ** 3) * np.exp(-x ** 2 - y ** 2) + +n = 10 +x = np.linspace(-3, 3, 4 * n) +y = np.linspace(-3, 3, 3 * n) +X, Y = np.meshgrid(x, y) +plt.imshow(f(X, Y)) +``` + ++++ + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + ++++ + +### Pie Charts + +```{code-cell} +:tags: [hide-input] + +n = 20 +Z = np.ones(n) +Z[-1] *= 2 + +plt.axes((0.025, 0.025, 0.95, 0.95)) + +plt.pie(Z, explode=Z * 0.05, colors=[f"{i / float(n):f}" for i in range(n)]) +plt.axis("equal") +plt.xticks([]) +plt.yticks(); +``` + +::: {exercise-start} +:label: plot-pie-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic taking +care of colors and slices size. + +:::{hint} +You need to modify `Z`. +::: + +```{code-cell} +:tags: [hide-output] + +rng = np.random.default_rng() +Z = rng.uniform(0, 1, 20) +plt.pie(Z); +``` + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + ++++ + +### Quiver Plots + +```{code-cell} +:tags: [hide-input] + +n = 8 +X, Y = np.mgrid[0:n, 0:n] +T = np.arctan2(Y - n / 2.0, X - n / 2.0) +R = 10 + np.sqrt((Y - n / 2.0) ** 2 + (X - n / 2.0) ** 2) +U, V = R * np.cos(T), R * np.sin(T) + +plt.axes((0.025, 0.025, 0.95, 0.95)) +plt.quiver(X, Y, U, V, R, alpha=0.5) +plt.quiver(X, Y, U, V, edgecolor="k", facecolor="None", linewidth=0.5) + +plt.xlim(-1, n) +plt.xticks([]) +plt.ylim(-1, n) +plt.yticks([]); +``` + +::: {exercise-start} +:label: plot-quiver-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic taking +care of colors and orientations. + +:::{hint} +You need to draw arrows twice. +::: + +```{code-cell} +:tags: [hide-output] + +n = 8 +X, Y = np.mgrid[0:n, 0:n] +plt.quiver(X, Y) +``` + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + ++++ + +### Grids + +```{code-cell} +:tags: [hide-input] + +from matplotlib import ticker + +ax = plt.axes((0.025, 0.025, 0.95, 0.95)) + +ax.set_xlim(0, 4) +ax.set_ylim(0, 3) +ax.xaxis.set_major_locator(ticker.MultipleLocator(1.0)) +ax.xaxis.set_minor_locator(ticker.MultipleLocator(0.1)) +ax.yaxis.set_major_locator(ticker.MultipleLocator(1.0)) +ax.yaxis.set_minor_locator(ticker.MultipleLocator(0.1)) +ax.grid(which="major", axis="x", linewidth=0.75, linestyle="-", color="0.75") +ax.grid(which="minor", axis="x", linewidth=0.25, linestyle="-", color="0.75") +ax.grid(which="major", axis="y", linewidth=0.75, linestyle="-", color="0.75") +ax.grid(which="minor", axis="y", linewidth=0.25, linestyle="-", color="0.75") +ax.set_xticklabels([]) +ax.set_yticklabels([]); +``` + +::: {exercise-start} +:label: plot-grid-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic taking +care of line styles. + +```{code-cell} +:tags: [hide-output] + +axes = plt.gca() +axes.set_xlim(0, 4) +axes.set_ylim(0, 3) +axes.set_xticklabels([]) +axes.set_yticklabels([]) +``` + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + ++++ + +### Multi Plots + +```{code-cell} +:tags: [hide-input] + +fig = plt.figure() +fig.subplots_adjust(bottom=0.025, left=0.025, top=0.975, right=0.975) + +plt.subplot(2, 1, 1) +plt.xticks([]), plt.yticks([]) + +plt.subplot(2, 3, 4) +plt.xticks([]) +plt.yticks([]) + +plt.subplot(2, 3, 5) +plt.xticks([]) +plt.yticks([]) + +plt.subplot(2, 3, 6) +plt.xticks([]) +plt.yticks([]); +``` + +::: {exercise-start} +:label: plot-multiplot-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic. + +:::{hint} +You can use several subplots with different partition. +::: + +```{code-cell} +:tags: [hide-output] + +plt.subplot(2, 2, 1) +plt.subplot(2, 2, 3) +plt.subplot(2, 2, 4) +``` + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + ++++ + +### Polar Axis + +```{code-cell} +:tags: [hide-input] + +import matplotlib + +jet = matplotlib.colormaps["jet"] + +ax = plt.axes((0.025, 0.025, 0.95, 0.95), polar=True) + +N = 20 +theta = np.arange(0.0, 2 * np.pi, 2 * np.pi / N) +rng = np.random.default_rng() +radii = 10 * rng.random(N) +width = np.pi / 4 * rng.random(N) +bars = plt.bar(theta, radii, width=width, bottom=0.0) + +for r, bar in zip(radii, bars, strict=True): + bar.set_facecolor(jet(r / 10.0)) + bar.set_alpha(0.5) + +ax.set_xticklabels([]) +ax.set_yticklabels([]); +``` + +::: {exercise-start} +:label: plot-polar-ex +:class: dropdown +::: + +:::{hint} +You only need to modify the `axes` line +::: + +Starting from the code below, try to reproduce the graphic. + +```{code-cell} +:tags: [hide-output] + +plt.axes([0, 0, 1, 1]) + +N = 20 +theta = np.arange(0., 2 * np.pi, 2 * np.pi / N) +rng = np.random.default_rng() +radii = 10 * rng.random(N) +width = np.pi / 4 * rng.random(N) +bars = plt.bar(theta, radii, width=width, bottom=0.0) + +for r, bar in zip(radii, bars): + bar.set_facecolor(plt.cm.jet(r / 10.)) + bar.set_alpha(0.5) +``` + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + ++++ + +### 3D Plots + +```{code-cell} +:tags: [hide-input] + +from mpl_toolkits.mplot3d import Axes3D + +ax: Axes3D = plt.figure().add_subplot(projection="3d") +x = np.arange(-4, 4, 0.25) +y = np.arange(-4, 4, 0.25) +X, Y = np.meshgrid(x, y) +R = np.sqrt(X**2 + Y**2) +Z = np.sin(R) + +ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap="hot") +ax.contourf(X, Y, Z, zdir="z", offset=-2, cmap="hot") +ax.set_zlim(-2, 2); +``` + +::: {exercise-start} +:label: plot-3d-ex +:class: dropdown +::: + +Starting from the code below, try to reproduce the graphic. + +:::{hint} +You need to use {func}`contourf()` +::: + +```{code-cell} +:tags: [hide-output] + +from mpl_toolkits.mplot3d import Axes3D + +fig = plt.figure() +ax = Axes3D(fig) +X = np.arange(-4, 4, 0.25) +Y = np.arange(-4, 4, 0.25) +X, Y = np.meshgrid(X, Y) +R = np.sqrt(X**2 + Y**2) +Z = np.sin(R) + +ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='hot') +``` + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + ++++ + +### Text + +```{code-cell} +:tags: [hide-input] + +eqs = [] +eqs.append( + r"$W^{3\beta}_{\delta_1 \rho_1 \sigma_2} = U^{3\beta}_{\delta_1 \rho_1} + \frac{1}{8 \pi 2} \int^{\alpha_2}_{\alpha_2} d \alpha^\prime_2 \left[\frac{ U^{2\beta}_{\delta_1 \rho_1} - \alpha^\prime_2U^{1\beta}_{\rho_1 \sigma_2} }{U^{0\beta}_{\rho_1 \sigma_2}}\right]$" +) +eqs.append( + r"$\frac{d\rho}{d t} + \rho \vec{v}\cdot\nabla\vec{v} = -\nabla p + \mu\nabla^2 \vec{v} + \rho \vec{g}$" +) +eqs.append(r"$\int_{-\infty}^\infty e^{-x^2}dx=\sqrt{\pi}$") +eqs.append(r"$E = mc^2 = \sqrt{{m_0}^2c^4 + p^2c^2}$") +eqs.append(r"$F_G = G\frac{m_1m_2}{r^2}$") + +plt.axes((0.025, 0.025, 0.95, 0.95)) + +rng = np.random.default_rng() + +for i in range(24): + index = rng.integers(0, len(eqs)) + eq = eqs[index] + size = np.random.uniform(12, 32) + x, y = np.random.uniform(0, 1, 2) + alpha = np.random.uniform(0.25, 0.75) + plt.text( + x, + y, + eq, + ha="center", + va="center", + color="#11557c", + alpha=alpha, + transform=plt.gca().transAxes, + fontsize=size, + clip_on=True, + ) +plt.xticks([]) +plt.yticks([]); +``` + +::: {exercise-start} +:label: plot-text-ex +:class: dropdown +::: + +Try to do the same from scratch ! + +:::{hint} +Have a look at the [matplotlib logo](https://matplotlib.org/examples/api/logo2.html). +::: + +::: {exercise-end} +::: + +Click on the hidden code for the figure above for solution. + +--- + ++++ + +:::{admonition} Quick read + +If you want to do a first quick pass through the Scientific Python Lectures +to learn the ecosystem, you can directly skip to the next chapter: +{ref}`scipy`. + +The remainder of this chapter is not necessary to follow the rest of +the intro part. But be sure to come back and finish this chapter later. +::: + ++++ + +## Beyond this tutorial + +Matplotlib benefits from extensive documentation as well as a large +community of users and developers. Here are some links of interest: + ++++ + +### Tutorials + +- [Pyplot tutorial](https://matplotlib.org/users/pyplot_tutorial.html) + + - Introduction + - Controlling line properties + - Working with multiple figures and axes + - Working with text + +- [Image tutorial](https://matplotlib.org/users/image_tutorial.html) + + - Startup commands + - Importing image data into NumPy arrays + - Plotting NumPy arrays as images + +- [Text tutorial](https://matplotlib.org/users/index_text.html) + + - Text introduction + - Basic text commands + - Text properties and layout + - Writing mathematical expressions + - Text rendering With LaTeX + - Annotating text + +- [Artist tutorial](https://matplotlib.org/users/artists.html) + + - Introduction + - Customizing your objects + - Object containers + - Figure container + - Axes container + - Axis containers + - Tick containers + +- [Path tutorial](https://matplotlib.org/users/path_tutorial.html) + + - Introduction + - Bézier example + - Compound paths + +- [Transforms tutorial](https://matplotlib.org/users/transforms_tutorial.html) + + - Introduction + - Data coordinates + - Axes coordinates + - Blended transformations + - Using offset transforms to create a shadow effect + - The transformation pipeline + ++++ + +### Matplotlib documentation + +- [User guide](https://matplotlib.org/users/index.html) + +- [FAQ](https://matplotlib.org/faq/index.html) + + - Installation + - Usage + - How-To + - Troubleshooting + - Environment Variables + +- [Screenshots](https://matplotlib.org/users/screenshots.html) + ++++ + +### Code documentation + +The code is well documented and you can quickly access a specific command +from within a python session: + +```{code-cell} +import matplotlib.pyplot as plt +help(plt.plot) +``` + +### Galleries + +The [matplotlib gallery](https://matplotlib.org/gallery.html) is +also incredibly useful when you search how to render a given graphic. Each +example comes with its source. + ++++ + +### Mailing lists + +Finally, there is a [user mailing +list](https://mail.python.org/mailman/listinfo/matplotlib-users) where you can +ask for help and a [developers mailing +list](https://mail.python.org/mailman/listinfo/matplotlib-devel) that is more +technical. + ++++ + +## Quick reference + +Here is a set of tables that show main properties and styles. + +(mpl-line-properties)= + +### Line properties + +::: {list-table} +:header-rows: 1 +:widths: 20 30 50 + +- - Property + - Description + - Appearance + +- - alpha (or a) + - alpha transparency on 0-1 scale + - ::: {glue} plot_alpha + :doc: quick_reference_figures.md + ::: + +- - anti-aliased + - True or False - use anti-aliased rendering + - ::: {glue} plot_aliased + :doc: quick_reference_figures.md + ::: + ::: {glue} plot_antialiased + :doc: quick_reference_figures.md + ::: + +- - color (or c) + - matplotlib color arg + - ::: {glue} plot_color + :doc: quick_reference_figures.md + ::: + +- - linestyle (or ls) + - see [Line properties](mpl-line-properties) + - + +- - linewidth (or lw) + - float, the line width in points + - ::: {glue} plot_linewidth + :doc: quick_reference_figures.md + ::: + +- - solid_capstyle + - Cap style for solid lines + - ::: {glue} plot_solid_capstyle + :doc: quick_reference_figures.md + ::: + +- - solid_joinstyle + - Join style for solid lines + - ::: {glue} plot_solid_joinstyle + :doc: quick_reference_figures.md + ::: + +- - dash_capstyle + - Cap style for dashes + - ::: {glue} plot_dash_capstyle + :doc: quick_reference_figures.md + ::: + +- - dash_joinstyle + - Join style for dashes + - ::: {glue} plot_dash_joinstyle + :doc: quick_reference_figures.md + ::: + +- - marker + - see [Markers](mpl-markers) + - + +- - markeredgewidth (mew) + - line width around the marker symbol + - ::: {glue} plot_mew + :doc: quick_reference_figures.md + ::: + +- - markeredgecolor (mec) + - edge color if a marker is used + - ::: {glue} plot_mec + :doc: quick_reference_figures.md + ::: + +- - markerfacecolor (mfc) + - face color if a marker is used + - ::: {glue} plot_mfc + :doc: quick_reference_figures.md + ::: + +- - markersize (ms) + - size of the marker in points + - ::: {glue} plot_ms + :doc: quick_reference_figures.md + ::: + +::: + +See the [Line property figures](mpl-line-property-figures) for code to +generate graphics for the table above. + ++++ + +(mpl-line-styles)= + +### Line styles + +::: {glue} line_styles_fig +:doc: quick_reference_figures.md +::: + +See [Line style figure](mpl-line-style-figure) for code. + ++++ + +(mpl-markers)= + +### Markers + +::: {glue} marker_styles_fig +:doc: quick_reference_figures.md +::: + +See [Marker style figure](mpl-marker-style-figure) for code. + +### Colormaps + +All colormaps can be reversed by appending `_r`. For instance, `gray_r` is +the reverse of `gray`. + +If you want to know more about colormaps, check the [documentation on Colormaps in matplotlib](https://matplotlib.org/tutorials/colors/colormaps.html). + +::: {glue} colormap_fig +:doc: quick_reference_figures.md +::: + +See [Colormap figure](mpl-colormap-figure) for code. diff --git a/intro/matplotlib/index.rst b/intro/matplotlib/index.rst deleted file mode 100644 index b07e4fa92..000000000 --- a/intro/matplotlib/index.rst +++ /dev/null @@ -1,1262 +0,0 @@ - -.. _matplotlib: - -.. currentmodule:: matplotlib.pyplot - -==================== -Matplotlib: plotting -==================== - -.. sidebar:: **Thanks** - - Many thanks to **Bill Wing** and **Christoph Deil** for review and - corrections. - -**Authors**: *Nicolas Rougier, Mike Müller, Gaël Varoquaux* - -.. contents:: Chapter contents - :local: - :depth: 1 - -Introduction -============ - -.. tip:: - - `Matplotlib `__ is probably the most - used Python package for 2D-graphics. It provides both a quick - way to visualize data from Python and publication-quality figures in - many formats. We are going to explore matplotlib in interactive mode - covering most common cases. - -IPython, Jupyter, and matplotlib modes ---------------------------------------- - -.. tip:: - - The `Jupyter `_ notebook and the - `IPython `_ enhanced interactive Python, are - tuned for the scientific-computing workflow in Python, - in combination with Matplotlib: - -For interactive matplotlib sessions, turn on the **matplotlib mode** - -:IPython console: - - When using the IPython console, use:: - - In [1]: %matplotlib - -:Jupyter notebook: - - In the notebook, insert, **at the beginning of the - notebook** the following `magic - `_:: - - %matplotlib inline - -pyplot ------- - -.. tip:: - - *pyplot* provides a procedural interface to the matplotlib object-oriented - plotting library. It is modeled closely after Matlab™. Therefore, the - majority of plotting commands in pyplot have Matlab™ analogs with similar - arguments. Important commands are explained with interactive examples. - -:: - - import matplotlib.pyplot as plt - -Simple plot -=========== - -.. tip:: - - In this section, we want to draw the cosine and sine functions on the same - plot. Starting from the default settings, we'll enrich the figure step by - step to make it nicer. - - First step is to get the data for the sine and cosine functions: - -:: - - import numpy as np - - X = np.linspace(-np.pi, np.pi, 256) - C, S = np.cos(X), np.sin(X) - - -``X`` is now a numpy array with 256 values ranging from :math:`-\pi` to :math:`+\pi` -(included). ``C`` is the cosine (256 values) and ``S`` is the sine (256 -values). - -To run the example, you can type them in an IPython interactive session:: - - $ ipython --matplotlib - -This brings us to the IPython prompt: :: - - IPython 0.13 -- An enhanced Interactive Python. - ? -> Introduction to IPython's features. - %magic -> Information about IPython's 'magic' % functions. - help -> Python's own help system. - object? -> Details about 'object'. ?object also works, ?? prints more. - -.. tip:: - - You can also download each of the examples and run it using regular - python, but you will lose interactive data manipulation:: - - $ python plot_exercise_1.py - - You can get source for each step by clicking on the corresponding figure. - - -Plotting with default settings -------------------------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_1_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_1.html - -.. hint:: Documentation - - * `plot tutorial `_ - * :func:`~plot()` command - -.. tip:: - - Matplotlib comes with a set of default settings that allow - customizing all kinds of properties. You can control the defaults of - almost every property in matplotlib: figure size and dpi, line width, - color and style, axes, axis and grid properties, text and font - properties and so on. - -|clear-floats| - -:: - - import numpy as np - import matplotlib.pyplot as plt - - X = np.linspace(-np.pi, np.pi, 256) - C, S = np.cos(X), np.sin(X) - - plt.plot(X, C) - plt.plot(X, S) - - plt.show() - - -Instantiating defaults ----------------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_2_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_2.html - -.. hint:: Documentation - - * `Customizing matplotlib `_ - -In the script below, we've instantiated (and commented) all the figure settings -that influence the appearance of the plot. - -.. tip:: - - The settings have been explicitly set to their default values, but - now you can interactively play with the values to explore their - affect (see `Line properties`_ and `Line styles`_ below). - -|clear-floats| - -:: - - import numpy as np - import matplotlib.pyplot as plt - - # Create a figure of size 8x6 inches, 80 dots per inch - plt.figure(figsize=(8, 6), dpi=80) - - # Create a new subplot from a grid of 1x1 - plt.subplot(1, 1, 1) - - X = np.linspace(-np.pi, np.pi, 256) - C, S = np.cos(X), np.sin(X) - - # Plot cosine with a blue continuous line of width 1 (pixels) - plt.plot(X, C, color="blue", linewidth=1.0, linestyle="-") - - # Plot sine with a green continuous line of width 1 (pixels) - plt.plot(X, S, color="green", linewidth=1.0, linestyle="-") - - # Set x limits - plt.xlim(-4.0, 4.0) - - # Set x ticks - plt.xticks(np.linspace(-4, 4, 9)) - - # Set y limits - plt.ylim(-1.0, 1.0) - - # Set y ticks - plt.yticks(np.linspace(-1, 1, 5)) - - # Save figure using 72 dots per inch - # plt.savefig("exercise_2.png", dpi=72) - - # Show result on screen - plt.show() - - -Changing colors and line widths --------------------------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_3_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_3.html - -.. hint:: Documentation - - * `Controlling line properties `_ - * :class:`~matplotlib.lines.Line2D` API - -.. tip:: - - First step, we want to have the cosine in blue and the sine in red and a - slightly thicker line for both of them. We'll also slightly alter the figure - size to make it more horizontal. - -|clear-floats| - -:: - - ... - plt.figure(figsize=(10, 6), dpi=80) - plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-") - plt.plot(X, S, color="red", linewidth=2.5, linestyle="-") - ... - - -Setting limits --------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_4_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_4.html - -.. hint:: Documentation - - * :func:`xlim()` command - * :func:`ylim()` command - -.. tip:: - - Current limits of the figure are a bit too tight and we want to make - some space in order to clearly see all data points. - -|clear-floats| - -:: - - ... - plt.xlim(X.min() * 1.1, X.max() * 1.1) - plt.ylim(C.min() * 1.1, C.max() * 1.1) - ... - - - -Setting ticks -------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_5_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_5.html - -.. hint:: Documentation - - * :func:`xticks()` command - * :func:`yticks()` command - * `Tick container `_ - * `Tick locating and formatting `_ - -.. tip:: - - Current ticks are not ideal because they do not show the interesting values - (:math:`\pm \pi`,:math:`\pm \pi`/2) for sine and cosine. We'll change them such that they show - only these values. - -|clear-floats| - -:: - - ... - plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi]) - plt.yticks([-1, 0, +1]) - ... - - - -Setting tick labels -------------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_6_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_6.html - - -.. hint:: Documentation - - * `Working with text `_ - * :func:`~xticks()` command - * :func:`~yticks()` command - * :meth:`~matplotlib.axes.Axes.set_xticklabels()` - * :meth:`~matplotlib.axes.Axes.set_yticklabels()` - - -.. tip:: - - Ticks are now properly placed but their label is not very explicit. - We could guess that 3.142 is :math:`\pi` but it would be better to make it - explicit. When we set tick values, we can also provide a - corresponding label in the second argument list. Note that we'll use - latex to allow for nice rendering of the label. - -|clear-floats| - -:: - - ... - plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi], - [r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$']) - - plt.yticks([-1, 0, +1], - [r'$-1$', r'$0$', r'$+1$']) - ... - - - -Moving spines -------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_7_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_7.html - - -.. hint:: Documentation - - * :mod:`~matplotlib.spines` API - * `Axis container `_ - * `Transformations tutorial `_ - -.. tip:: - - Spines are the lines connecting the axis tick marks and noting the - boundaries of the data area. They can be placed at arbitrary - positions and until now, they were on the border of the axis. We'll - change that since we want to have them in the middle. Since there are - four of them (top/bottom/left/right), we'll discard the top and right - by setting their color to none and we'll move the bottom and left - ones to coordinate 0 in data space coordinates. - -|clear-floats| - -:: - - ... - ax = plt.gca() # gca stands for 'get current axis' - ax.spines['right'].set_color('none') - ax.spines['top'].set_color('none') - ax.xaxis.set_ticks_position('bottom') - ax.spines['bottom'].set_position(('data',0)) - ax.yaxis.set_ticks_position('left') - ax.spines['left'].set_position(('data',0)) - ... - - - -Adding a legend ---------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_8_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_8.html - - -.. hint:: Documentation - - * `Legend guide `_ - * :func:`legend()` command - * :mod:`~matplotlib.legend` API - -.. tip:: - - Let's add a legend in the upper left corner. This only requires - adding the keyword argument label (that will be used in the legend - box) to the plot commands. - -|clear-floats| - -:: - - ... - plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-", label="cosine") - plt.plot(X, S, color="red", linewidth=2.5, linestyle="-", label="sine") - - plt.legend(loc='upper left') - ... - - - -Annotate some points --------------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_9_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_9.html - - -.. hint:: Documentation - - * `Annotating axis `_ - * :func:`annotate()` command - -.. tip:: - - Let's annotate some interesting points using the annotate command. We - chose the :math:`2\pi / 3` value and we want to annotate both the sine and the - cosine. We'll first draw a marker on the curve as well as a straight - dotted line. Then, we'll use the annotate command to display some - text with an arrow. - -|clear-floats| - -:: - - ... - - t = 2 * np.pi / 3 - plt.plot([t, t], [0, np.cos(t)], color='blue', linewidth=2.5, linestyle="--") - plt.scatter([t, ], [np.cos(t), ], 50, color='blue') - - plt.annotate(r'$cos(\frac{2\pi}{3})=-\frac{1}{2}$', - xy=(t, np.cos(t)), xycoords='data', - xytext=(-90, -50), textcoords='offset points', fontsize=16, - arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2")) - - plt.plot([t, t],[0, np.sin(t)], color='red', linewidth=2.5, linestyle="--") - plt.scatter([t, ],[np.sin(t), ], 50, color='red') - - plt.annotate(r'$sin(\frac{2\pi}{3})=\frac{\sqrt{3}}{2}$', - xy=(t, np.sin(t)), xycoords='data', - xytext=(+10, +30), textcoords='offset points', fontsize=16, - arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2")) - ... - - - -Devil is in the details ------------------------- - -.. image:: auto_examples/exercises/images/sphx_glr_plot_exercise_10_001.png - :align: right - :scale: 35 - :target: auto_examples/exercises/plot_exercise_10.html - -.. hint:: Documentation - - * :mod:`~matplotlib.artist` API - * :meth:`~matplotlib.text.Text.set_bbox()` method - -.. tip:: - - The tick labels are now hardly visible because of the blue and red - lines. We can make them bigger and we can also adjust their - properties such that they'll be rendered on a semi-transparent white - background. This will allow us to see both the data and the labels. - -|clear-floats| - -:: - - ... - for label in ax.get_xticklabels() + ax.get_yticklabels(): - label.set_fontsize(16) - label.set_bbox(dict(facecolor='white', edgecolor='None', alpha=0.65)) - ... - - - - -Figures, Subplots, Axes and Ticks -================================= - -A **"figure"** in matplotlib means the whole window in the user interface. -Within this figure there can be **"subplots"**. - -.. tip:: - - So far we have used implicit figure and axes creation. This is handy for - fast plots. We can have more control over the display using figure, - subplot, and axes explicitly. While subplot positions the plots in a - regular grid, axes allows free placement within the figure. Both can be - useful depending on your intention. We've already worked with figures and - subplots without explicitly calling them. When we call plot, matplotlib - calls :func:`gca` to get the current axes and gca in turn calls :func:`gcf` to - get the current figure. If there is none it calls :func:`figure` to make one, - strictly speaking, to make a ``subplot(111)``. Let's look at the details. - -Figures -------- - -.. tip:: - - A figure is the windows in the GUI that has "Figure #" as title. Figures - are numbered starting from 1 as opposed to the normal Python way starting - from 0. This is clearly MATLAB-style. There are several parameters that - determine what the figure looks like: - -============== ======================= ============================================ -Argument Default Description -============== ======================= ============================================ -``num`` ``1`` number of figure -``figsize`` ``figure.figsize`` figure size in inches (width, height) -``dpi`` ``figure.dpi`` resolution in dots per inch -``facecolor`` ``figure.facecolor`` color of the drawing background -``edgecolor`` ``figure.edgecolor`` color of edge around the drawing background -``frameon`` ``True`` draw figure frame or not -============== ======================= ============================================ - -.. tip:: - - The defaults can be specified in the resource file and will be used most of - the time. Only the number of the figure is frequently changed. - - As with other objects, you can set figure properties also setp or with the - set_something methods. - - When you work with the GUI you can close a figure by clicking on the x in - the upper right corner. But you can close a figure programmatically by - calling close. Depending on the argument it closes (1) the current figure - (no argument), (2) a specific figure (figure number or figure instance as - argument), or (3) all figures (``"all"`` as argument). - -:: - - plt.close(1) # Closes figure 1 - - -Subplots --------- - -.. tip:: - - With subplot you can arrange plots in a regular grid. You need to specify - the number of rows and columns and the number of the plot. Note that the - `gridspec `_ command - is a more powerful alternative. - -.. avoid an ugly interplay between 'tip' and the images below: we want a - line-return - -|clear-floats| - -.. image:: auto_examples/images/sphx_glr_plot_subplot-horizontal_001.png - :scale: 25 - :target: auto_examples/plot_subplot-horizontal.html -.. image:: auto_examples/images/sphx_glr_plot_subplot-vertical_001.png - :scale: 25 - :target: auto_examples/plot_subplot-vertical.html -.. image:: auto_examples/images/sphx_glr_plot_subplot-grid_001.png - :scale: 25 - :target: auto_examples/plot_subplot-grid.html -.. image:: auto_examples/images/sphx_glr_plot_gridspec_001.png - :scale: 25 - :target: auto_examples/plot_gridspec.html - - -Axes ----- - -Axes are very similar to subplots but allow placement of plots at any location -in the figure. So if we want to put a smaller plot inside a bigger one we do -so with axes. - -.. image:: auto_examples/images/sphx_glr_plot_axes_001.png - :scale: 35 - :target: auto_examples/plot_axes.html -.. image:: auto_examples/images/sphx_glr_plot_axes-2_001.png - :scale: 35 - :target: auto_examples/plot_axes-2.html - - -Ticks ------ - -Well formatted ticks are an important part of publishing-ready -figures. Matplotlib provides a totally configurable system for ticks. There are -tick locators to specify where ticks should appear and tick formatters to give -ticks the appearance you want. Major and minor ticks can be located and -formatted independently from each other. Per default minor ticks are not shown, -i.e. there is only an empty list for them because it is as ``NullLocator`` (see -below). - -Tick Locators -............. - -Tick locators control the positions of the ticks. They are set as -follows:: - - ax = plt.gca() - ax.xaxis.set_major_locator(eval(locator)) - -There are several locators for different kind of requirements: - -.. raw:: latex - - ~ - -.. image:: auto_examples/options/images/sphx_glr_plot_ticks_001.png - :scale: 60 - :target: auto_examples/options/plot_ticks.html - -.. raw:: latex - - ~ - -All of these locators derive from the base class :class:`matplotlib.ticker.Locator`. -You can make your own locator deriving from it. Handling dates as ticks can be -especially tricky. Therefore, matplotlib provides special locators in -matplotlib.dates. - - -Other Types of Plots: examples and exercises -============================================= - -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_plot_ext_001.png - :scale: 39 - :target: `Regular Plots`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_scatter_ext_001.png - :scale: 39 - :target: `Scatter Plots`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_bar_ext_001.png - :scale: 39 - :target: `Bar Plots`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_contour_ext_001.png - :scale: 39 - :target: `Contour Plots`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_imshow_ext_001.png - :scale: 39 - :target: `Imshow`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_quiver_ext_001.png - :scale: 39 - :target: `Quiver Plots`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_pie_ext_001.png - :scale: 39 - :target: `Pie Charts`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_grid_ext_001.png - :scale: 39 - :target: `Grids`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_multiplot_ext_001.png - :scale: 39 - :target: `Multi Plots`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_polar_ext_001.png - :scale: 39 - :target: `Polar Axis`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_plot3d_ext_001.png - :scale: 39 - :target: `3D Plots`_ -.. image:: auto_examples/pretty_plots/images/sphx_glr_plot_text_ext_001.png - :scale: 39 - :target: `Text`_ - - -Regular Plots -------------- - -.. image:: auto_examples/images/sphx_glr_plot_plot_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_plot.html - -Starting from the code below, try to reproduce the graphic taking -care of filled areas: - -.. hint:: - - You need to use the :func:`fill_between()` command. - - -:: - - n = 256 - X = np.linspace(-np.pi, np.pi, n) - Y = np.sin(2 * X) - - plt.plot(X, Y + 1, color='blue', alpha=1.00) - plt.plot(X, Y - 1, color='blue', alpha=1.00) - -Click on the figure for solution. - - -Scatter Plots -------------- - -.. image:: auto_examples/images/sphx_glr_plot_scatter_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_scatter.html - -Starting from the code below, try to reproduce the graphic taking -care of marker size, color and transparency. - -.. hint:: - - Color is given by angle of (X,Y). - - -:: - - n = 1024 - rng = np.random.default_rng() - X = rng.normal(0,1,n) - Y = rng.normal(0,1,n) - - plt.scatter(X,Y) - -Click on figure for solution. - - -Bar Plots ---------- - -.. image:: auto_examples/images/sphx_glr_plot_bar_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_bar.html - -Starting from the code below, try to reproduce the graphic by -adding labels for red bars. - -.. hint:: - - You need to take care of text alignment. - -|clear-floats| - -:: - - n = 12 - X = np.arange(n) - rng = np.random.default_rng() - Y1 = (1 - X / float(n)) * rng.uniform(0.5, 1.0, n) - Y2 = (1 - X / float(n)) * rng.uniform(0.5, 1.0, n) - - plt.bar(X, +Y1, facecolor='#9999ff', edgecolor='white') - plt.bar(X, -Y2, facecolor='#ff9999', edgecolor='white') - - for x, y in zip(X, Y1): - plt.text(x + 0.4, y + 0.05, '%.2f' % y, ha='center', va='bottom') - - plt.ylim(-1.25, +1.25) - -Click on figure for solution. - - -Contour Plots -------------- - -.. image:: auto_examples/images/sphx_glr_plot_contour_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_contour.html - - -Starting from the code below, try to reproduce the graphic taking -care of the colormap (see `Colormaps`_ below). - -.. hint:: - - You need to use the :func:`clabel()` command. - -:: - - def f(x, y): - return (1 - x / 2 + x ** 5 + y ** 3) * np.exp(-x ** 2 -y ** 2) - - n = 256 - x = np.linspace(-3, 3, n) - y = np.linspace(-3, 3, n) - X, Y = np.meshgrid(x, y) - - plt.contourf(X, Y, f(X, Y), 8, alpha=.75, cmap='jet') - C = plt.contour(X, Y, f(X, Y), 8, colors='black', linewidth=.5) - -Click on figure for solution. - - - -Imshow ------- - -.. image:: auto_examples/images/sphx_glr_plot_imshow_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_imshow.html - - -Starting from the code below, try to reproduce the graphic taking -care of colormap, image interpolation and origin. - -.. hint:: - - You need to take care of the ``origin`` of the image in the imshow command and - use a :func:`colorbar()` - - -:: - - def f(x, y): - return (1 - x / 2 + x ** 5 + y ** 3) * np.exp(-x ** 2 - y ** 2) - - n = 10 - x = np.linspace(-3, 3, 4 * n) - y = np.linspace(-3, 3, 3 * n) - X, Y = np.meshgrid(x, y) - plt.imshow(f(X, Y)) - -Click on the figure for the solution. - - -Pie Charts ----------- - -.. image:: auto_examples/images/sphx_glr_plot_pie_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_pie.html - - -Starting from the code below, try to reproduce the graphic taking -care of colors and slices size. - -.. hint:: - - You need to modify Z. - -:: - - rng = np.random.default_rng() - Z = rng.uniform(0, 1, 20) - plt.pie(Z) - -Click on the figure for the solution. - - - -Quiver Plots ------------- - -.. image:: auto_examples/images/sphx_glr_plot_quiver_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_quiver.html - - -Starting from the code below, try to reproduce the graphic taking -care of colors and orientations. - -.. hint:: - - You need to draw arrows twice. - -:: - - n = 8 - X, Y = np.mgrid[0:n, 0:n] - plt.quiver(X, Y) - -Click on figure for solution. - - -Grids ------ - -.. image:: auto_examples/images/sphx_glr_plot_grid_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_grid.html - - -Starting from the code below, try to reproduce the graphic taking -care of line styles. - -:: - - axes = plt.gca() - axes.set_xlim(0, 4) - axes.set_ylim(0, 3) - axes.set_xticklabels([]) - axes.set_yticklabels([]) - - -Click on figure for solution. - - -Multi Plots ------------ - -.. image:: auto_examples/images/sphx_glr_plot_multiplot_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_multiplot.html - -Starting from the code below, try to reproduce the graphic. - -.. hint:: - - You can use several subplots with different partition. - - -:: - - plt.subplot(2, 2, 1) - plt.subplot(2, 2, 3) - plt.subplot(2, 2, 4) - -Click on figure for solution. - - -Polar Axis ----------- - -.. image:: auto_examples/images/sphx_glr_plot_polar_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_polar.html - - -.. hint:: - - You only need to modify the ``axes`` line - -Starting from the code below, try to reproduce the graphic. - - -:: - - plt.axes([0, 0, 1, 1]) - - N = 20 - theta = np.arange(0., 2 * np.pi, 2 * np.pi / N) - rng = np.random.default_rng() - radii = 10 * rng.random(N) - width = np.pi / 4 * rng.random(N) - bars = plt.bar(theta, radii, width=width, bottom=0.0) - - for r, bar in zip(radii, bars): - bar.set_facecolor(plt.cm.jet(r / 10.)) - bar.set_alpha(0.5) - -Click on figure for solution. - - -3D Plots --------- - -.. image:: auto_examples/images/sphx_glr_plot_plot3d_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_plot3d.html - -Starting from the code below, try to reproduce the graphic. - -.. hint:: - - You need to use :func:`contourf()` - - -:: - - from mpl_toolkits.mplot3d import Axes3D - - fig = plt.figure() - ax = Axes3D(fig) - X = np.arange(-4, 4, 0.25) - Y = np.arange(-4, 4, 0.25) - X, Y = np.meshgrid(X, Y) - R = np.sqrt(X**2 + Y**2) - Z = np.sin(R) - - ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='hot') - -Click on figure for solution. - -Text ----- - - -.. image:: auto_examples/images/sphx_glr_plot_text_001.png - :align: right - :scale: 35 - :target: auto_examples/plot_text.html - - -Try to do the same from scratch ! - -.. hint:: - - Have a look at the `matplotlib logo - `_. - - -Click on figure for solution. - -| - -____ - -| - -.. topic:: **Quick read** - - If you want to do a first quick pass through the Scientific Python Lectures - to learn the ecosystem, you can directly skip to the next chapter: - :ref:`scipy`. - - The remainder of this chapter is not necessary to follow the rest of - the intro part. But be sure to come back and finish this chapter later. - -Beyond this tutorial -==================== - -Matplotlib benefits from extensive documentation as well as a large -community of users and developers. Here are some links of interest: - -Tutorials ---------- - -.. hlist:: - - * `Pyplot tutorial `_ - - - Introduction - - Controlling line properties - - Working with multiple figures and axes - - Working with text - - * `Image tutorial `_ - - - Startup commands - - Importing image data into NumPy arrays - - Plotting NumPy arrays as images - - * `Text tutorial `_ - - - Text introduction - - Basic text commands - - Text properties and layout - - Writing mathematical expressions - - Text rendering With LaTeX - - Annotating text - - * `Artist tutorial `_ - - - Introduction - - Customizing your objects - - Object containers - - Figure container - - Axes container - - Axis containers - - Tick containers - - * `Path tutorial `_ - - - Introduction - - Bézier example - - Compound paths - - * `Transforms tutorial `_ - - - Introduction - - Data coordinates - - Axes coordinates - - Blended transformations - - Using offset transforms to create a shadow effect - - The transformation pipeline - - - -Matplotlib documentation ------------------------- - -.. hlist:: - - * `User guide `_ - - * `FAQ `_ - - - Installation - - Usage - - How-To - - Troubleshooting - - Environment Variables - - * `Screenshots `_ - - -Code documentation ------------------- - -The code is well documented and you can quickly access a specific command -from within a python session: - -:: - - >>> import matplotlib.pyplot as plt - >>> help(plt.plot) # doctest: +SKIP - Help on function plot in module matplotlib.pyplot: - - plot(*args: ...) -> 'list[Line2D]' - Plot y versus x as lines and/or markers. - - Call signatures:: - - plot([x], y, [fmt], *, data=None, **kwargs) - plot([x], y, [fmt], [x2], y2, [fmt2], ..., **kwargs) - ... - - -Galleries ---------- - -The `matplotlib gallery `_ is -also incredibly useful when you search how to render a given graphic. Each -example comes with its source. - - -Mailing lists --------------- - -Finally, there is a `user mailing list -`_ where you can -ask for help and a `developers mailing list -`_ that is more -technical. - - -Quick references -================ - -Here is a set of tables that show main properties and styles. - -Line properties ----------------- - -.. list-table:: - :widths: 20 30 50 - :header-rows: 1 - - * - Property - - Description - - Appearance - - * - alpha (or a) - - alpha transparency on 0-1 scale - - .. image:: auto_examples/options/images/sphx_glr_plot_alpha_001.png - - * - antialiased - - True or False - use antialised rendering - - .. image:: auto_examples/options/images/sphx_glr_plot_aliased_001.png - .. image:: auto_examples/options/images/sphx_glr_plot_antialiased_001.png - - * - color (or c) - - matplotlib color arg - - .. image:: auto_examples/options/images/sphx_glr_plot_color_001.png - - * - linestyle (or ls) - - see `Line properties`_ - - - - * - linewidth (or lw) - - float, the line width in points - - .. image:: auto_examples/options/images/sphx_glr_plot_linewidth_001.png - - * - solid_capstyle - - Cap style for solid lines - - .. image:: auto_examples/options/images/sphx_glr_plot_solid_capstyle_001.png - - * - solid_joinstyle - - Join style for solid lines - - .. image:: auto_examples/options/images/sphx_glr_plot_solid_joinstyle_001.png - - * - dash_capstyle - - Cap style for dashes - - .. image:: auto_examples/options/images/sphx_glr_plot_dash_capstyle_001.png - - * - dash_joinstyle - - Join style for dashes - - .. image:: auto_examples/options/images/sphx_glr_plot_dash_joinstyle_001.png - - * - marker - - see `Markers`_ - - - - * - markeredgewidth (mew) - - line width around the marker symbol - - .. image:: auto_examples/options/images/sphx_glr_plot_mew_001.png - - * - markeredgecolor (mec) - - edge color if a marker is used - - .. image:: auto_examples/options/images/sphx_glr_plot_mec_001.png - - * - markerfacecolor (mfc) - - face color if a marker is used - - .. image:: auto_examples/options/images/sphx_glr_plot_mfc_001.png - - * - markersize (ms) - - size of the marker in points - - .. image:: auto_examples/options/images/sphx_glr_plot_ms_001.png - - - -Line styles ------------ - -.. image:: auto_examples/options/images/sphx_glr_plot_linestyles_001.png - -Markers -------- - -.. image:: auto_examples/options/images/sphx_glr_plot_markers_001.png - :scale: 90 - -Colormaps ---------- - -All colormaps can be reversed by appending ``_r``. For instance, ``gray_r`` is -the reverse of ``gray``. - -If you want to know more about colormaps, check the `documentation on Colormaps in matplotlib `_. - -.. image:: auto_examples/options/images/sphx_glr_plot_colormaps_001.png - :scale: 80 - - -Full code examples -================== - -.. include:: auto_examples/index.rst - :start-line: 1 diff --git a/intro/matplotlib/quick_reference_figures.md b/intro/matplotlib/quick_reference_figures.md new file mode 100644 index 000000000..5d90eef10 --- /dev/null +++ b/intro/matplotlib/quick_reference_figures.md @@ -0,0 +1,604 @@ +--- +jupytext: + notebook_metadata_filter: all,-language_info + split_at_heading: true + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +(mpl-reference-figures)= + ++++ + +# Generate figures for quick reference tables + +This final section contains the code for figures used in the [line +properties](mpl-line-properties) table in the [Matplotlib](matplotlib) page. + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +``` + +```{code-cell} +# Machinery to store outputs for later use. +# This is for rendering in the Jupyter Book version of these pages. +from myst_nb import glue +``` + +(mpl-line-property-figures)= + ++++ + +## Line property figures + +This example demonstrates using alpha for transparency: + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0.1, 1, 0.8), frameon=False) + +for i in range(1, 11): + plt.axvline(i, linewidth=1, color="blue", alpha=0.25 + 0.75 * i / 10.0) + +plt.xlim(0, 11) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in reference table. +glue("plot_alpha", fig, display=False) +``` + +This example demonstrates aliased versus anti-aliased text. + +First, aliased text (`antialiased=False`): + +```{code-cell} +size = 128, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) + +plt.axes((0, 0, 1, 1), frameon=False) + +plt.rcParams["text.antialiased"] = False +plt.text(0.5, 0.5, "Aliased", ha="center", va="center") + +plt.xlim(0, 1) +plt.ylim(0, 1) +plt.xticks([]) +plt.yticks([]) + +# Reset rcParams back to defaults +plt.rcdefaults() + +# Store figure for use in reference table. +glue("plot_aliased", fig, display=False) +``` + +Next, `antialiased=True`. + +```{code-cell} +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0, 1, 1), frameon=False) + +plt.rcParams["text.antialiased"] = True +plt.text(0.5, 0.5, "Anti-aliased", ha="center", va="center") + +plt.xlim(0, 1) +plt.ylim(0, 1) +plt.xticks([]) +plt.yticks([]) + +# Reset rcParams back to defaults +plt.rcdefaults() + +# Store figure for use in reference table. +glue("plot_antialiased", fig, display=False) +``` + +An example demoing the various colors taken by Matplotlib's plot. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0.1, 1, 0.8), frameon=False) + +for i in range(1, 11): + plt.plot([i, i], [0, 1], lw=1.5) + +plt.xlim(0, 11) +plt.xticks([]) +plt.yticks([]); + +# Store figure for use in reference table. +glue("plot_color", fig, display=False) +``` + +Plot various linewidths with Matplotlib. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0.1, 1, 0.8), frameon=False) + +for i in range(1, 11): + plt.plot([i, i], [0, 1], color="b", lw=i / 2.0) + +plt.xlim(0, 11) +plt.ylim(0, 1) +plt.xticks([]) +plt.yticks([]); + +# Store figure for use in reference table. +glue("plot_linewidth", fig, display=False) +``` + +An example demoing the solid cap style in Matplotlib. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0, 1, 1), frameon=False) + +plt.plot(np.arange(4), np.ones(4), color="blue", linewidth=8, solid_capstyle="butt") + +plt.plot( + 5 + np.arange(4), np.ones(4), color="blue", linewidth=8, solid_capstyle="round" +) + +plt.plot( + 10 + np.arange(4), + np.ones(4), + color="blue", + linewidth=8, + solid_capstyle="projecting", +) + +plt.xlim(0, 14) +plt.xticks([]) +plt.yticks([]); + +# Store figure for use in reference table. +glue("plot_solid_capstyle", fig, display=False) +``` + +An example showing the different solid joint styles in Matplotlib. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0, 1, 1), frameon=False) + +plt.plot(np.arange(3), [0, 1, 0], color="blue", linewidth=8, solid_joinstyle="miter") +plt.plot( + 4 + np.arange(3), [0, 1, 0], color="blue", linewidth=8, solid_joinstyle="bevel" +) +plt.plot( + 8 + np.arange(3), [0, 1, 0], color="blue", linewidth=8, solid_joinstyle="round" +) + +plt.xlim(0, 12) +plt.ylim(-1, 2) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in reference table. +glue("plot_solid_joinstyle", fig, display=False) +``` + +An example demoing the dash capstyle. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0, 1, 1), frameon=False) + +plt.plot( + np.arange(4), + np.ones(4), + color="blue", + dashes=[15, 15], + linewidth=8, + dash_capstyle="butt", +) + +plt.plot( + 5 + np.arange(4), + np.ones(4), + color="blue", + dashes=[15, 15], + linewidth=8, + dash_capstyle="round", +) + +plt.plot( + 10 + np.arange(4), + np.ones(4), + color="blue", + dashes=[15, 15], + linewidth=8, + dash_capstyle="projecting", +) + +plt.xlim(0, 14) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in reference table. +glue("plot_dash_capstyle", fig, display=False) +``` + +Example demoing the dash join style. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0, 1, 1), frameon=False) + +plt.plot( + np.arange(3), + [0, 1, 0], + color="blue", + dashes=[12, 5], + linewidth=8, + dash_joinstyle="miter", +) +plt.plot( + 4 + np.arange(3), + [0, 1, 0], + color="blue", + dashes=[12, 5], + linewidth=8, + dash_joinstyle="bevel", +) +plt.plot( + 8 + np.arange(3), + [0, 1, 0], + color="blue", + dashes=[12, 5], + linewidth=8, + dash_joinstyle="round", +) + +plt.xlim(0, 12) +plt.ylim(-1, 2) +plt.xticks([]) +plt.yticks([]); + +# Store figure for use in reference table. +glue("plot_dash_joinstyle", fig, display=False) +``` + +Demo the marker edge widths of Matplotlib's markers. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0, 1, 1), frameon=False) + +for i in range(1, 11): + plt.plot( + [ + i, + ], + [ + 1, + ], + "s", + markersize=5, + markeredgewidth=1 + i / 10.0, + markeredgecolor="k", + markerfacecolor="w", + ) +plt.xlim(0, 11) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in reference table. +glue("plot_mew", fig, display=False) +``` + +Demo the marker edge color of Matplotlib's markers. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0, 1, 1), frameon=False) + +rng = np.random.default_rng() + +for i in range(1, 11): + r, g, b = np.random.uniform(0, 1, 3) + plt.plot( + [ + i, + ], + [ + 1, + ], + "s", + markersize=5, + markerfacecolor="w", + markeredgewidth=1.5, + markeredgecolor=(r, g, b, 1), + ) + +plt.xlim(0, 11) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in reference table. +glue("plot_mec", fig, display=False) +``` + +Demo the marker face color of Matplotlib's markers. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0, 1, 1), frameon=False) + +rng = np.random.default_rng() + +for i in range(1, 11): + r, g, b = np.random.uniform(0, 1, 3) + plt.plot( + [ + i, + ], + [ + 1, + ], + "s", + markersize=8, + markerfacecolor=(r, g, b, 1), + markeredgewidth=0.1, + markeredgecolor=(0, 0, 0, 0.5), + ) +plt.xlim(0, 11) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in reference table. +glue("plot_mfc", fig, display=False) +``` + +Demo the marker size control in Matplotlib. + +```{code-cell} +size = 256, 16 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +fig.patch.set_alpha(0) +plt.axes((0, 0, 1, 1), frameon=False) + +for i in range(1, 11): + plt.plot( + [ + i, + ], + [ + 1, + ], + "s", + markersize=i, + markerfacecolor="w", + markeredgewidth=0.5, + markeredgecolor="k", + ) + +plt.xlim(0, 11) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in reference table. +glue("plot_ms", fig, display=False) +``` + +(mpl-line-style-figure)= + ++++ + +## Line styles figure + +```{code-cell} +def linestyle(ls, i): + X = i * 0.5 * np.ones(11) + Y = np.arange(11) + plt.plot( + X, + Y, + ls, + color=(0.0, 0.0, 1, 1), + lw=3, + ms=8, + mfc=(0.75, 0.75, 1, 1), + mec=(0, 0, 1, 1), + ) + plt.text(0.5 * i, 10.25, ls, rotation=90, fontsize=15, va="bottom") + +linestyles = [ + "-", + "--", + ":", + "-.", + ".", + ",", + "o", + "^", + "v", + "<", + ">", + "s", + "+", + "x", + "d", + "1", + "2", + "3", + "4", + "h", + "p", + "|", + "_", + "D", + "H", +] +n_lines = len(linestyles) + +size = 20 * n_lines, 300 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +plt.axes((0, 0.01, 1, 0.9), frameon=False) + +for i, ls in enumerate(linestyles): + linestyle(ls, i) + +plt.xlim(-0.2, 0.2 + 0.5 * n_lines) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in reference sections. +glue("line_styles_fig", fig, display=False) +``` + +(mpl-marker-style-figure)= + ++++ + +## Marker style figure + +```{code-cell} +def marker(m, i): + X = i * 0.5 * np.ones(11) + Y = np.arange(11) + + plt.plot(X, Y, lw=1, marker=m, ms=10, mfc=(0.75, 0.75, 1, 1), mec=(0, 0, 1, 1)) + plt.text(0.5 * i, 10.25, repr(m), rotation=90, fontsize=15, va="bottom") + +markers = [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + "o", + "h", + "_", + "1", + "2", + "3", + "4", + "8", + "p", + "^", + "v", + "<", + ">", + "|", + "d", + ",", + "+", + "s", + "*", + "|", + "x", + "D", + "H", + ".", +] + +n_markers = len(markers) + +size = 20 * n_markers, 300 +dpi = 72.0 +figsize = size[0] / float(dpi), size[1] / float(dpi) +fig = plt.figure(figsize=figsize, dpi=dpi) +plt.axes((0, 0.01, 1, 0.9), frameon=False) + +for i, m in enumerate(markers): + marker(m, i) + +plt.xlim(-0.2, 0.2 + 0.5 * n_markers) +plt.xticks([]) +plt.yticks([]) + +# Store figure for use in reference sections. +glue("marker_styles_fig", fig, display=False) +``` + +(mpl-colormap-figure)= + ++++ + +## Colormap figure + +```{code-cell} +plt.rc("text", usetex=False) +a = np.outer(np.arange(0, 1, 0.01), np.ones(10)) + +fig = plt.figure(figsize=(10, 5)) +plt.subplots_adjust(top=0.8, bottom=0.05, left=0.01, right=0.99) +maps = [m for m in plt.colormaps if not m.endswith("_r")] +maps.sort() +l = len(maps) + 1 + +for i, m in enumerate(maps): + plt.subplot(1, l, i + 1) + plt.axis("off") + plt.imshow(a, aspect="auto", cmap=plt.get_cmap(m), origin="lower") + plt.title(m, rotation=90, fontsize=10, va="bottom") + +# Restore Matplotlib defaults. +plt.rcdefaults() + +# Store figure for use in reference sections. +glue("colormap_fig", fig, display=False) +``` diff --git a/intro/numpy/advanced_operations.md b/intro/numpy/advanced_operations.md new file mode 100644 index 000000000..84ddcf781 --- /dev/null +++ b/intro/numpy/advanced_operations.md @@ -0,0 +1,250 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Advanced operations + +## Polynomials + +NumPy also contains polynomials in different bases: + +For example, $3x^2 + 2x - 1$: + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +``` + +```{code-cell} +p = np.poly1d([3, 2, -1]) +p(0) +``` + +```{code-cell} +p.roots +``` + +```{code-cell} +p.order +``` + +```{code-cell} +x = np.linspace(0, 1, 20) +rng = np.random.default_rng() +y = np.cos(x) + 0.3*rng.random(20) +p = np.poly1d(np.polyfit(x, y, 3)) + +t = np.linspace(0, 1, 200) # use a larger number of points for smoother plotting +plt.plot(x, y, 'o', t, p(t), '-'); +``` + +See +for more. + +### More polynomials (with more bases) + +NumPy also has a more sophisticated polynomial interface, which supports +e.g. the Chebyshev basis. + +$3x^2 + 2x - 1$: + +```{code-cell} +p = np.polynomial.Polynomial([-1, 2, 3]) # coefs in different order! +p(0) +``` + +```{code-cell} +p.roots() +``` + +```{code-cell} +p.degree() # In general polynomials do not always expose 'order' +``` + +Example using polynomials in Chebyshev basis, for polynomials in +range `[-1, 1]`: + +```{code-cell} +x = np.linspace(-1, 1, 2000) +rng = np.random.default_rng() +y = np.cos(x) + 0.3*rng.random(2000) +p = np.polynomial.Chebyshev.fit(x, y, 90) +``` + +```{code-cell} +plt.plot(x, y, 'r.') +plt.plot(x, p(x), 'k-', lw=3) +``` + +The Chebyshev polynomials have some advantages in interpolation. + ++++ + +## Loading data files + +### Text files + +Example: {download}`populations.txt `. + +```{code-cell} +data = np.loadtxt('data/populations.txt') +data +``` + +```{code-cell} +np.savetxt('pop2.txt', data) +data2 = np.loadtxt('pop2.txt') +``` + +:::{note} +If you have a complicated text file, what you can try are: + +- `np.genfromtxt` +- Using Python's I/O functions and e.g. regexps for parsing + (Python is quite well suited for this) + ::: + +### Reminder: Navigating the filesystem with Jupyter and IPython + +Show current directory: + +```{code-cell} +pwd +``` + +Change to `data` subdirectory: + +```{code-cell} +cd data +``` + +Show filesystem listing for current directory: + +```{code-cell} +ls +``` + +Change back to containing directory. + +```{code-cell} +cd .. +``` + +### Images + +Using Matplotlib: + +```{code-cell} +img = plt.imread('data/elephant.png') +img.shape, img.dtype +``` + +```{code-cell} +# Plot and save the original figure +plt.imshow(img) +plt.savefig('plot.png') +``` + +```{code-cell} +# Plot and save the red channel of the image. +plt.imsave('red_elephant.png', img[:,:,0], cmap=plt.cm.gray) +``` + +This saved only one channel (of RGB): + +```{code-cell} +plt.imshow(plt.imread('red_elephant.png')) +``` + +Other libraries: + +```{code-cell} +import imageio.v3 as iio + +# Lower resolution (every sixth pixel in each dimension). +iio.imwrite('tiny_elephant.png', (img[::6,::6] * 255).astype(np.uint8)) +plt.imshow(plt.imread('tiny_elephant.png'), interpolation='nearest') +``` + +### NumPy's own format + +NumPy has its own binary format, not portable but with efficient I/O: + +```{code-cell} +data = np.ones((3, 3)) +np.save('pop.npy', data) +data3 = np.load('pop.npy') +``` + +### Well-known (& more obscure) file formats + +- HDF5: [h5py](https://www.h5py.org/), [PyTables](https://www.pytables.org) +- NetCDF: `scipy.io.netcdf_file`, [netcdf4-python](https://code.google.com/archive/p/netcdf4-python), ... +- Matlab: `scipy.io.loadmat`, `scipy.io.savemat` +- MatrixMarket: `scipy.io.mmread`, `scipy.io.mmwrite` +- IDL: `scipy.io.readsav` + +... if somebody uses it, there's probably also a Python library for it. + +::: {exercise-start} +:label: npa-load-proc-ex +:class: dropdown +::: + +Write code that loads data from {download}`populations.txt +`: and drops the last column and the first 5 rows. Save +the smaller dataset to `pop2.txt`. + +::: {exercise-end} +::: + +::: {solution-start} npa-load-proc-ex +:class: dropdown +::: + +```{code-cell} +data = np.loadtxt("data/populations.txt") +reduced_data = data[5:, :-1] +np.savetxt("pop2.txt", reduced_data) +``` + +::: {solution-end} +::: + + + + + + + + + +:::{admonition} NumPy internals +If you are interested in the NumPy internals, there is a good discussion in +{ref}`advanced-numpy`. +::: diff --git a/intro/numpy/advanced_operations.rst b/intro/numpy/advanced_operations.rst deleted file mode 100644 index 3263a94eb..000000000 --- a/intro/numpy/advanced_operations.rst +++ /dev/null @@ -1,220 +0,0 @@ -.. For doctests - >>> import numpy as np - >>> # For doctest on headless environments - >>> import matplotlib - >>> matplotlib.use('Agg') - >>> import matplotlib.pyplot as plt - - - -.. currentmodule:: numpy - -Advanced operations -=================== - -.. contents:: Section contents - :local: - :depth: 1 - -Polynomials ------------ - -NumPy also contains polynomials in different bases: - -For example, :math:`3x^2 + 2x - 1`:: - - >>> p = np.poly1d([3, 2, -1]) - >>> p(0) - np.int64(-1) - >>> p.roots - array([-1. , 0.33333333]) - >>> p.order - 2 - -:: - - >>> x = np.linspace(0, 1, 20) - >>> rng = np.random.default_rng() - >>> y = np.cos(x) + 0.3*rng.random(20) - >>> p = np.poly1d(np.polyfit(x, y, 3)) - - >>> t = np.linspace(0, 1, 200) # use a larger number of points for smoother plotting - >>> plt.plot(x, y, 'o', t, p(t), '-') - [, ] - -.. image:: auto_examples/images/sphx_glr_plot_polyfit_001.png - :width: 50% - :target: auto_examples/plot_polyfit.html - :align: center - -See https://numpy.org/doc/stable/reference/routines.polynomials.poly1d.html -for more. - -More polynomials (with more bases) -................................... - -NumPy also has a more sophisticated polynomial interface, which supports -e.g. the Chebyshev basis. - -:math:`3x^2 + 2x - 1`:: - - >>> p = np.polynomial.Polynomial([-1, 2, 3]) # coefs in different order! - >>> p(0) - np.float64(-1.0) - >>> p.roots() - array([-1. , 0.33333333]) - >>> p.degree() # In general polynomials do not always expose 'order' - 2 - -Example using polynomials in Chebyshev basis, for polynomials in -range ``[-1, 1]``:: - - >>> x = np.linspace(-1, 1, 2000) - >>> rng = np.random.default_rng() - >>> y = np.cos(x) + 0.3*rng.random(2000) - >>> p = np.polynomial.Chebyshev.fit(x, y, 90) - - >>> plt.plot(x, y, 'r.') - [] - >>> plt.plot(x, p(x), 'k-', lw=3) - [] - -.. image:: auto_examples/images/sphx_glr_plot_chebyfit_001.png - :width: 50% - :target: auto_examples/plot_chebyfit.html - :align: center - -The Chebyshev polynomials have some advantages in interpolation. - -Loading data files -------------------- - -Text files -........... - -Example: :download:`populations.txt <../../data/populations.txt>`: - -.. include:: ../../data/populations.txt - :end-line: 5 - :literal: - -:: - - >>> data = np.loadtxt('data/populations.txt') - >>> data - array([[ 1900., 30000., 4000., 48300.], - [ 1901., 47200., 6100., 48200.], - [ 1902., 70200., 9800., 41500.], - ... - -:: - - >>> np.savetxt('pop2.txt', data) - >>> data2 = np.loadtxt('pop2.txt') - -.. note:: If you have a complicated text file, what you can try are: - - - ``np.genfromtxt`` - - - Using Python's I/O functions and e.g. regexps for parsing - (Python is quite well suited for this) - -.. topic:: Reminder: Navigating the filesystem with IPython - - .. ipython:: - - In [1]: pwd # show current directory - '/home/user/stuff/2011-numpy-tutorial' - In [2]: cd ex - '/home/user/stuff/2011-numpy-tutorial/ex' - In [3]: ls - populations.txt species.txt - -Images -....... - -Using Matplotlib:: - - >>> img = plt.imread('data/elephant.png') - >>> img.shape, img.dtype - ((200, 300, 3), dtype('float32')) - >>> plt.imshow(img) - - >>> plt.savefig('plot.png') - - >>> plt.imsave('red_elephant.png', img[:,:,0], cmap=plt.cm.gray) - -.. image:: auto_examples/images/sphx_glr_plot_elephant_001.png - :width: 50% - :target: auto_examples/plot_elephant.html - :align: center - -This saved only one channel (of RGB):: - - >>> plt.imshow(plt.imread('red_elephant.png')) - - -.. image:: auto_examples/images/sphx_glr_plot_elephant_002.png - :width: 50% - :target: auto_examples/plot_elephant.html - :align: center - -Other libraries:: - - >>> import imageio.v3 as iio - >>> iio.imwrite('tiny_elephant.png', (img[::6,::6] * 255).astype(np.uint8)) - >>> plt.imshow(plt.imread('tiny_elephant.png'), interpolation='nearest') - - -.. image:: auto_examples/images/sphx_glr_plot_elephant_003.png - :width: 50% - :target: auto_examples/plot_elephant.html - :align: center - - -NumPy's own format -................... - -NumPy has its own binary format, not portable but with efficient I/O:: - - >>> data = np.ones((3, 3)) - >>> np.save('pop.npy', data) - >>> data3 = np.load('pop.npy') - -Well-known (& more obscure) file formats -......................................... - -* HDF5: `h5py `__, `PyTables `__ -* NetCDF: ``scipy.io.netcdf_file``, `netcdf4-python `__, ... -* Matlab: ``scipy.io.loadmat``, ``scipy.io.savemat`` -* MatrixMarket: ``scipy.io.mmread``, ``scipy.io.mmwrite`` -* IDL: ``scipy.io.readsav`` - -... if somebody uses it, there's probably also a Python library for it. - - -.. topic:: Exercise: Text data files - :class: green - - Write a Python script that loads data from :download:`populations.txt - <../../data/populations.txt>`:: and drop the last column and the first - 5 rows. Save the smaller dataset to ``pop2.txt``. - - -.. loadtxt, savez, load, fromfile, tofile - -.. real life: point to HDF5, NetCDF, etc. - -.. EXE: use loadtxt to load a data file -.. EXE: use savez and load to save data in binary format -.. EXE: use tofile and fromfile to put and get binary data bytes in/from a file - follow-up: .view() -.. EXE: parsing text files -- Python can do this reasonably well natively! - throw in the mix some random text file to be parsed (eg. PPM) -.. EXE: advanced: read the data in a PPM file - - -.. topic:: NumPy internals - - If you are interested in the NumPy internals, there is a good discussion in - :ref:`advanced_numpy`. diff --git a/intro/numpy/array_object.md b/intro/numpy/array_object.md new file mode 100644 index 000000000..6898c204f --- /dev/null +++ b/intro/numpy/array_object.md @@ -0,0 +1,906 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# The NumPy array object + +```{code-cell} +:tags: [hide-input] + +# Our usual import. +import numpy as np +``` + +## What are NumPy and NumPy arrays? + +### NumPy arrays + ++++ + +**NumPy** provides: + +- An extension package to Python for multi-dimensional arrays. +- An implementation that is closer to hardware (efficiency). +- Package designed for scientific computation (convenience). +- An implementation of _array oriented computing_. + +```{code-cell} +import numpy as np + +a = np.array([0, 1, 2, 3]) +a +``` + +::: {note} +:class: dropdown + +For example, An array containing: + +- values of an experiment/simulation at discrete time steps + +- signal recorded by a measurement device, e.g. sound wave + +- pixels of an image, grey-level or colour + +- 3-D data measured at different X-Y-Z positions, e.g. MRI scan + +- ... + ::: + +**Why it is useful:** Memory-efficient container that provides fast numerical +operations. + +```{code-cell} +L = range(1000) +%timeit [i**2 for i in L] +``` + +```{code-cell} +a = np.arange(1000) +%timeit a**2 +``` + + + + + + + + +### NumPy Reference documentation + +**On the web**: + + + +**Interactive help:** + +```ipython +In [5]: np.array? +String Form: +Docstring: +array(object, dtype=None, copy=True, order=None, subok=False, ndmin=0, ... +``` + +You can also use the Python builtin `help` command to show the docstring for a function: + +```{code-cell} +help(np.array) +``` + +#### Looking for something: + +```ipython +In [6]: np.con*? +np.concatenate +np.conj +np.conjugate +np.convolve +``` + +### Import conventions + +The recommended convention to import NumPy is: + +```{code-cell} +import numpy as np +``` + +## Creating arrays + +### Manual construction of arrays + +- **1-D**: + +```{code-cell} +a = np.array([0, 1, 2, 3]) +a +``` + +```{code-cell} +a.ndim +``` + +```{code-cell} +a.shape +``` + +```{code-cell} +len(a) +``` + +- **2-D, 3-D, ...**: + +```{code-cell} +b = np.array([[0, 1, 2], [3, 4, 5]]) # 2 x 3 array +b +``` + +```{code-cell} +b.ndim +``` + +```{code-cell} +b.shape +``` + +```{code-cell} +len(b) # returns the size of the first dimension +``` + +```{code-cell} +c = np.array([[[1], [2]], [[3], [4]]]) +c +``` + +```{code-cell} +c.shape +``` + +::: {exercise-start} +:label: np-ao-first-ex +:class: dropdown +::: + +- Create a simple two dimensional array. First, redo the examples + from above. And then create your own: how about odd numbers + counting backwards on the first row, and even numbers on the second? +- Use the functions {func}`len`, {func}`numpy.shape` on these arrays. + How do they relate to each other? And to the `ndim` attribute of + the arrays? + +::: {exercise-end} +::: + ++++ + +### Functions for creating arrays + +::: {note} +:class: dropdown + +In practice, we rarely enter items one by one... +::: + +**Evenly spaced**: + +```{code-cell} +a = np.arange(10) # 0 .. n-1 (!) +a +``` + +```{code-cell} +b = np.arange(1, 9, 2) # start, end (exclusive), step +b +``` + +— or **by number of points** + +```{code-cell} +c = np.linspace(0, 1, 6) # start, end, num-points +c +``` + +```{code-cell} +d = np.linspace(0, 1, 5, endpoint=False) +d +``` + +**Common arrays** + +```{code-cell} +a = np.ones((3, 3)) # reminder: (3, 3) is a tuple +a +``` + +```{code-cell} +b = np.zeros((2, 2)) +b +``` + +```{code-cell} +c = np.eye(3) +c +``` + +```{code-cell} +d = np.diag(np.array([1, 2, 3, 4])) +d +``` + +- {mod}`numpy.random`: random numbers (Mersenne Twister PRNG): + +```{code-cell} +rng = np.random.default_rng(27446968) +a = rng.random(4) # uniform in [0, 1] +a +``` + +```{code-cell} +b = rng.standard_normal(4) # Gaussian +b +``` + +::: {exercise-start} +:label: np-ao-func1-ex +:class: dropdown +::: + +- Experiment with `arange`, `linspace`, `ones`, `zeros`, `eye` and + `diag`. +- Create different kinds of arrays with random numbers. +- Try setting the seed before creating an array with random values. +- Look at the function `np.empty`. What does it do? When might this be + useful? + +::: {exercise-end} +::: + +::: {exercise-start} +:label: np-ao-func2-ex +:class: dropdown +::: + +- construct an array containing: 1 2 3 4 5 +- construct an array containing: -5, -4, -3, -2, -1 +- Construct: 2 4 6 8 +- Construct 15 equispaced numbers in range [0, 10] + +::: {exercise-end} +::: + +::: {solution-start} np-ao-func2-ex +:class: dropdown +::: + +```{code-cell} +np.arange(1, 6) +``` + +```{code-cell} +np.arange(-5, 0) +``` + +```{code-cell} +np.arange(2, 10, 2) +``` + +```{code-cell} +np.linspace(0, 10, 15) +``` + +::: {solution-end} +::: + ++++ + +## Basic data types + +You may have noticed that, in some instances, array elements are displayed with +a trailing dot (e.g. `2.` vs `2`). This is due to a difference in the +data-type used: + +```{code-cell} +a = np.array([1, 2, 3]) +a.dtype +``` + +```{code-cell} +b = np.array([1., 2., 3.]) +b.dtype +``` + +::: {note} +:class: dropdown + +Different data-types allow us to store data more compactly in memory, +but most of the time we simply work with floating point numbers. +Note that, in the example above, NumPy auto-detects the data-type +from the input. +::: + +You can explicitly specify which data-type you want: + +```{code-cell} +c = np.array([1, 2, 3], dtype=float) +c.dtype +``` + +The **default** data type is floating point: + +```{code-cell} +a = np.ones((3, 3)) +a.dtype +``` + +There are also other types: + ++++ + +## Bool + +```{code-cell} +e = np.array([True, False, False, True]) +e.dtype +``` + +## Strings + +```{code-cell} +f = np.array(['Bonjour', 'Hello', 'Hallo']) +f.dtype # <--- strings containing max. 7 letters +``` + +## Much more: + +- `int32` +- `int64` +- `uint32` +- `uint64` +- ... + + + +## Basic visualization + +Now that we have our first data arrays, we are going to visualize them. + +Start by launching IPython: + +```bash +$ ipython # or ipython3 depending on your install +``` + +Or the notebook: + +```bash +$ jupyter notebook +``` + +If you are using IPython enable interactive plots with: + +```{code-cell} +%matplotlib +``` + +Interactive plots are enabled automatically in the Jupyter Notebook. + +_Matplotlib_ is a 2D plotting package. We can import its functions as below: + +```{code-cell} +import matplotlib.pyplot as plt # the tidy way +``` + +And then use (note that you have to use `show` explicitly if you have not enabled interactive plots with `%matplotlib`): + +```{code-cell} +# Example data +x = np.linspace(0, 2 * np.pi) +y = np.cos(x) + +plt.plot(x, y) # line plot +plt.show() # <-- shows the plot (not needed with interactive plots) +``` + +Or, if you have enabled interactive plots with `%matplotlib`: + +```{code-cell} +plt.plot(x, y) # line plot +``` + +- **1D plotting**: + +```{code-cell} +x = np.linspace(0, 3, 20) +y = np.linspace(0, 9, 20) +plt.plot(x, y) # line plot +``` + +```{code-cell} +plt.plot(x, y, 'o') # dot plot +``` + +- **2D arrays** (such as images): + +```{code-cell} +rng = np.random.default_rng(27446968) +image = rng.random((30, 30)) +plt.imshow(image, cmap=plt.cm.hot) +plt.colorbar() +``` + +:::{admonition} See also + +More in the: {ref}`matplotlib chapter ` +::: + +::: {exercise-start} +:label: np-ao-viz-ex +:class: dropdown +::: + +- Plot some simple arrays: a cosine as a function of time and a 2D + matrix. +- Try using the `gray` colormap on the 2D matrix. + +::: {exercise-end} +::: + ++++ + +## Indexing and slicing + +The items of an array can be accessed and assigned to the same way as +other Python sequences (e.g. lists): + +```{code-cell} +a = np.arange(10) +a +``` + +```{code-cell} +a[0], a[2], a[-1] +``` + +:::{warning} +Indices begin at 0, like other Python sequences (and C/C++). +In contrast, in Fortran or Matlab, indices begin at 1. +::: + +The usual python idiom for reversing a sequence is supported: + +```{code-cell} +a[::-1] +``` + +For multidimensional arrays, indices are tuples of integers: + +```{code-cell} +a = np.diag(np.arange(3)) +a +``` + +```{code-cell} +a[1, 1] +``` + +```{code-cell} +a[2, 1] = 10 # third line, second column +a +``` + +```{code-cell} +a[1] +``` + +:::{note} + +- In 2D, the first dimension corresponds to **rows**, the second + to **columns**. +- for multidimensional `a`, `a[0]` is interpreted by + taking all elements in the unspecified dimensions. + ::: + +**Slicing**: Arrays, like other Python sequences can also be sliced: + +```{code-cell} +a = np.arange(10) +a +``` + +```{code-cell} +a[2:9:3] # [start:end:step] +``` + +Note that the last index is not included! : + +```{code-cell} +a[:4] +``` + +All three slice components are not required: by default, `start` is 0, +`end` is the last and `step` is 1: + +```{code-cell} +a[1:3] +``` + +```{code-cell} +a[::2] +``` + +```{code-cell} +a[3:] +``` + +A small illustrated summary of NumPy indexing and slicing... + +![](../../pyximages/numpy_indexing.png) + +You can also combine assignment and slicing: + +```{code-cell} +a = np.arange(10) +a[5:] = 10 +a +``` + +```{code-cell} +b = np.arange(5) +a[5:] = b[::-1] +a +``` + +::: {exercise-start} +:label: np-ao-slicing-ex +:class: dropdown +::: + +- Try the different flavours of slicing, using `start`, `end` and + `step`: starting from a linspace, try to obtain odd numbers + counting backwards, and even numbers counting forwards. +- Reproduce the slices in the diagram above. You may + use the following expression to create the array: + +```python +np.arange(6) + np.arange(0, 51, 10)[:, np.newaxis] +``` + +::: {exercise-end} +::: + ++++ + +::: {exercise-start} +:label: np-ao-creation-ex +:class: dropdown +::: + +An exercise on array creation. + +Create the following arrays (with correct data types): + +```python +[[1, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 2], + [1, 6, 1, 1]] + +[[0., 0., 0., 0., 0.], + [2., 0., 0., 0., 0.], + [0., 3., 0., 0., 0.], + [0., 0., 4., 0., 0.], + [0., 0., 0., 5., 0.], + [0., 0., 0., 0., 6.]] +``` + +Par on course: 3 statements for each. + +_Hint_: Individual array elements can be accessed similarly to a list, +e.g. `a[1]` or `a[1, 2]`. + +_Hint_: Examine the docstring for `diag`. + +::: {exercise-end} +::: + +::: {solution-start} np-ao-creation-ex +:class: dropdown +::: + +```{code-cell} +a = np.ones((4, 4), dtype=int) +a[3, 1] = 6 +a[2, 3] = 2 +a +``` + +```{code-cell} +b = np.zeros((6, 5)) +b[1:] = np.diag(np.arange(2, 7)) +b +``` + +::: {solution-end} +::: + +::: {exercise-start} +:label: np-ao-tiling-ex +:class: dropdown +::: + +Exercise on tiling for array creation. + +Skim through the documentation for `np.tile`, and use this function +to construct the array: + +```python +[[4, 3, 4, 3, 4, 3], + [2, 1, 2, 1, 2, 1], + [4, 3, 4, 3, 4, 3], + [2, 1, 2, 1, 2, 1]] +``` + +::: {exercise-end} +::: + +::: {solution-start} np-ao-tiling-ex +:class: dropdown +::: + +```{code-cell} +block = np.array([[4, 3], [2, 1]]) +a = np.tile(block, (2, 3)) +a +``` + +::: {solution-end} +::: + ++++ + +## Copies and views + +A slicing operation creates a **view** on the original array, which is +just a way of accessing array data. Thus the original array is not +copied in memory. You can use `np.may_share_memory()` to check if two arrays +share the same memory block. Note however, that this uses heuristics and may +give you false positives. + +**When modifying the view, the original array is modified as well**: + +```{code-cell} +a = np.arange(10) +a +``` + +```{code-cell} +b = a[::2] +b +``` + +```{code-cell} +np.may_share_memory(a, b) +``` + +```{code-cell} +b[0] = 12 +b +``` + +```{code-cell} +a # (!) +``` + +```{code-cell} +a = np.arange(10) +c = a[::2].copy() # force a copy +c[0] = 12 +a +``` + +```{code-cell} +np.may_share_memory(a, c) +``` + +This behavior can be surprising at first sight... but it allows to save both +memory and time. + + + + + + + + + + + + +### Worked example: Prime number sieve + +![](images/prime-sieve.png) + +Compute prime numbers in 0--99, with a sieve + +First — construct a shape (100,) boolean array `is_prime`, filled with True in +the beginning: + +```{code-cell} +is_prime = np.ones((100,), dtype=bool) +``` + +Next, cross out 0 and 1 which are not primes: + +```{code-cell} +is_prime[:2] = 0 +``` + +For each integer `j` starting from 2, cross out its higher multiples: + +```{code-cell} +N_max = int(np.sqrt(len(is_prime) - 1)) +for j in range(2, N_max + 1): + is_prime[2*j::j] = False +``` + +Skim through `help(np.nonzero)`, and print the prime numbers + +- Follow-up: + + - Move the above code into a script file named `prime_sieve.py` + - Run it to check it works + - Use the optimization suggested in [the sieve of Eratosthenes](https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes): + + > 1. Skip `j` which are already known to not be primes + > 2. The first number to cross out is $j^2$ + ++++ + +## Fancy indexing + +::: {note} +:class: dropdown + +NumPy arrays can be indexed with slices, but also with boolean or +integer arrays (**masks**). This method is called _fancy indexing_. +It creates **copies not views**. +::: + +### Using boolean masks + +```{code-cell} +rng = np.random.default_rng(27446968) +a = rng.integers(0, 21, 15) +a +``` + +```{code-cell} +(a % 3 == 0) +``` + +```{code-cell} +mask = (a % 3 == 0) +extract_from_a = a[mask] # or, a[a%3==0] +extract_from_a # extract a sub-array with the mask +``` + +Indexing with a mask can be very useful to assign a new value to a sub-array: + +```{code-cell} +a[a % 3 == 0] = -1 +a +``` + +### Indexing with an array of integers + +```{code-cell} +a = np.arange(0, 100, 10) +a +``` + +Indexing can be done with an array of integers, where the same index is repeated +several time: + +```{code-cell} +a[[2, 3, 2, 4, 2]] # note: [2, 3, 2, 4, 2] is a Python list +``` + +New values can be assigned with this kind of indexing: + +```{code-cell} +a[[9, 7]] = -100 +a +``` + +**Tip** + +When a new array is created by indexing with an array of integers, the +new array has the same shape as the array of integers: + +```{code-cell} +a = np.arange(10) +idx = np.array([[3, 4], [9, 7]]) +idx.shape +``` + +```{code-cell} +a[idx] +``` + +--- + +The image below illustrates various fancy indexing applications + +![](../../pyximages/numpy_fancy_indexing.png) + +::: {exercise-start} +:label: np-ao-fancy-ex +:class: dropdown +::: + +- Again, reproduce the fancy indexing shown in the diagram above. +- Use fancy indexing on the left and array creation on the right to assign + values into an array, for instance by setting parts of the array in + the diagram above to zero. + +::: {exercise-end} +::: + +We can even use fancy indexing and {ref}`broadcasting ` at +the same time: + +```{code-cell} +a = np.arange(12).reshape(3,4) +a +``` + +```{code-cell} +i = np.array([[0, 1], [1, 2]]) +a[i, 2] # same as a[i, 2 * np.ones((2, 2), dtype=int)] +``` diff --git a/intro/numpy/array_object.rst b/intro/numpy/array_object.rst deleted file mode 100644 index b9cdafabd..000000000 --- a/intro/numpy/array_object.rst +++ /dev/null @@ -1,814 +0,0 @@ -.. - >>> import numpy as np - >>> import matplotlib.pyplot as plt - - -.. currentmodule:: numpy - -The NumPy array object -====================== - -.. contents:: Section contents - :local: - :depth: 1 - -What are NumPy and NumPy arrays? --------------------------------- - -NumPy arrays -............ - -:**Python** objects: - - - high-level number objects: integers, floating point - - - containers: lists (costless insertion and append), dictionaries - (fast lookup) - -:**NumPy** provides: - - - extension package to Python for multi-dimensional arrays - - - closer to hardware (efficiency) - - - designed for scientific computation (convenience) - - - Also known as *array oriented computing* - -| - -.. sourcecode:: pycon - - >>> import numpy as np - >>> a = np.array([0, 1, 2, 3]) - >>> a - array([0, 1, 2, 3]) - -.. tip:: - - For example, An array containing: - - * values of an experiment/simulation at discrete time steps - - * signal recorded by a measurement device, e.g. sound wave - - * pixels of an image, grey-level or colour - - * 3-D data measured at different X-Y-Z positions, e.g. MRI scan - - * ... - -**Why it is useful:** Memory-efficient container that provides fast numerical -operations. - -.. ipython:: - - In [1]: L = range(1000) - - In [2]: %timeit [i**2 for i in L] - 1000 loops, best of 3: 403 us per loop - - In [3]: a = np.arange(1000) - - In [4]: %timeit a**2 - 100000 loops, best of 3: 12.7 us per loop - - -.. extension package to Python to support multidimensional arrays - -.. diagram, import conventions - -.. scope of this tutorial: drill in features of array manipulation in - Python, and try to give some indication on how to get things done - in good style - -.. a fixed number of elements (cf. certain exceptions) -.. each element of same size and type -.. efficiency vs. Python lists - -NumPy Reference documentation -.............................. - -- On the web: https://numpy.org/doc/ - -- Interactive help: - - .. ipython:: - - In [5]: np.array? - String Form: - Docstring: - array(object, dtype=None, copy=True, order=None, subok=False, ndmin=0, ... - - .. tip:: - - .. sourcecode:: pycon - - >>> help(np.array) - Help on built-in function array in module numpy: - - array(...) - array(object, dtype=None, ... - - -- Looking for something: - - .. ipython:: - - In [6]: np.con*? - np.concatenate - np.conj - np.conjugate - np.convolve - -Import conventions -.................. - -The recommended convention to import NumPy is: - -.. sourcecode:: pycon - - >>> import numpy as np - - -Creating arrays ---------------- - -Manual construction of arrays -.............................. - -* **1-D**: - - .. sourcecode:: pycon - - >>> a = np.array([0, 1, 2, 3]) - >>> a - array([0, 1, 2, 3]) - >>> a.ndim - 1 - >>> a.shape - (4,) - >>> len(a) - 4 - -* **2-D, 3-D, ...**: - - .. sourcecode:: pycon - - >>> b = np.array([[0, 1, 2], [3, 4, 5]]) # 2 x 3 array - >>> b - array([[0, 1, 2], - [3, 4, 5]]) - >>> b.ndim - 2 - >>> b.shape - (2, 3) - >>> len(b) # returns the size of the first dimension - 2 - - >>> c = np.array([[[1], [2]], [[3], [4]]]) - >>> c - array([[[1], - [2]], - - [[3], - [4]]]) - >>> c.shape - (2, 2, 1) - -.. topic:: **Exercise: Simple arrays** - :class: green - - * Create a simple two dimensional array. First, redo the examples - from above. And then create your own: how about odd numbers - counting backwards on the first row, and even numbers on the second? - * Use the functions :func:`len`, :func:`numpy.shape` on these arrays. - How do they relate to each other? And to the ``ndim`` attribute of - the arrays? - -Functions for creating arrays -.............................. - -.. tip:: - - In practice, we rarely enter items one by one... - -* Evenly spaced: - - .. sourcecode:: pycon - - >>> a = np.arange(10) # 0 .. n-1 (!) - >>> a - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - >>> b = np.arange(1, 9, 2) # start, end (exclusive), step - >>> b - array([1, 3, 5, 7]) - -* or by number of points: - - .. sourcecode:: pycon - - >>> c = np.linspace(0, 1, 6) # start, end, num-points - >>> c - array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]) - >>> d = np.linspace(0, 1, 5, endpoint=False) - >>> d - array([0. , 0.2, 0.4, 0.6, 0.8]) - -* Common arrays: - - .. sourcecode:: pycon - - >>> a = np.ones((3, 3)) # reminder: (3, 3) is a tuple - >>> a - array([[1., 1., 1.], - [1., 1., 1.], - [1., 1., 1.]]) - >>> b = np.zeros((2, 2)) - >>> b - array([[0., 0.], - [0., 0.]]) - >>> c = np.eye(3) - >>> c - array([[1., 0., 0.], - [0., 1., 0.], - [0., 0., 1.]]) - >>> d = np.diag(np.array([1, 2, 3, 4])) - >>> d - array([[1, 0, 0, 0], - [0, 2, 0, 0], - [0, 0, 3, 0], - [0, 0, 0, 4]]) - -* :mod:`np.random`: random numbers (Mersenne Twister PRNG): - - .. sourcecode:: pycon - - >>> rng = np.random.default_rng(27446968) - >>> a = rng.random(4) # uniform in [0, 1] - >>> a - array([0.64613018, 0.48984931, 0.50851229, 0.22563948]) - - >>> b = rng.standard_normal(4) # Gaussian - >>> b - array([-0.38250769, -0.61536465, 0.98131732, 0.59353096]) - -.. topic:: **Exercise: Creating arrays using functions** - :class: green - - * Experiment with ``arange``, ``linspace``, ``ones``, ``zeros``, ``eye`` and - ``diag``. - * Create different kinds of arrays with random numbers. - * Try setting the seed before creating an array with random values. - * Look at the function ``np.empty``. What does it do? When might this be - useful? - -.. EXE: construct 1 2 3 4 5 -.. EXE: construct -5, -4, -3, -2, -1 -.. EXE: construct 2 4 6 8 -.. EXE: look what is in an empty() array -.. EXE: construct 15 equispaced numbers in range [0, 10] - -Basic data types ----------------- - -You may have noticed that, in some instances, array elements are displayed with -a trailing dot (e.g. ``2.`` vs ``2``). This is due to a difference in the -data-type used: - -.. sourcecode:: pycon - - >>> a = np.array([1, 2, 3]) - >>> a.dtype - dtype('int64') - - >>> b = np.array([1., 2., 3.]) - >>> b.dtype - dtype('float64') - -.. tip:: - - Different data-types allow us to store data more compactly in memory, - but most of the time we simply work with floating point numbers. - Note that, in the example above, NumPy auto-detects the data-type - from the input. - ------------------------------ - -You can explicitly specify which data-type you want: - -.. sourcecode:: pycon - - >>> c = np.array([1, 2, 3], dtype=float) - >>> c.dtype - dtype('float64') - - -The **default** data type is floating point: - -.. sourcecode:: pycon - - >>> a = np.ones((3, 3)) - >>> a.dtype - dtype('float64') - -There are also other types: - -:Complex: - - .. sourcecode:: pycon - - >>> d = np.array([1+2j, 3+4j, 5+6*1j]) - >>> d.dtype - dtype('complex128') - -:Bool: - - .. sourcecode:: pycon - - >>> e = np.array([True, False, False, True]) - >>> e.dtype - dtype('bool') - -:Strings: - - .. sourcecode:: pycon - - >>> f = np.array(['Bonjour', 'Hello', 'Hallo']) - >>> f.dtype # <--- strings containing max. 7 letters - dtype('>> %matplotlib # doctest: +SKIP - -Or, from the notebook, enable plots in the notebook: - -.. sourcecode:: pycon - - >>> %matplotlib inline # doctest: +SKIP - -The ``inline`` is important for the notebook, so that plots are displayed in -the notebook and not in a new window. - -*Matplotlib* is a 2D plotting package. We can import its functions as below: - -.. sourcecode:: pycon - - >>> import matplotlib.pyplot as plt # the tidy way - -And then use (note that you have to use ``show`` explicitly if you have not enabled interactive plots with ``%matplotlib``): - -.. sourcecode:: pycon - - >>> plt.plot(x, y) # line plot # doctest: +SKIP - >>> plt.show() # <-- shows the plot (not needed with interactive plots) # doctest: +SKIP - -Or, if you have enabled interactive plots with ``%matplotlib``: - -.. sourcecode:: pycon - - >>> plt.plot(x, y) # line plot # doctest: +SKIP - -* **1D plotting**: - -.. sourcecode:: pycon - - >>> x = np.linspace(0, 3, 20) - >>> y = np.linspace(0, 9, 20) - >>> plt.plot(x, y) # line plot - [] - >>> plt.plot(x, y, 'o') # dot plot - [] - -.. image:: auto_examples/images/sphx_glr_plot_basic1dplot_001.png - :width: 40% - :target: auto_examples/plot_basic1dplot.html - :align: center - -* **2D arrays** (such as images): - -.. sourcecode:: pycon - - >>> rng = np.random.default_rng(27446968) - >>> image = rng.random((30, 30)) - >>> plt.imshow(image, cmap=plt.cm.hot) - - >>> plt.colorbar() - - -.. image:: auto_examples/images/sphx_glr_plot_basic2dplot_001.png - :width: 50% - :target: auto_examples/plot_basic2dplot.html - :align: center - -.. seealso:: More in the: :ref:`matplotlib chapter ` - -.. topic:: **Exercise: Simple visualizations** - :class: green - - * Plot some simple arrays: a cosine as a function of time and a 2D - matrix. - * Try using the ``gray`` colormap on the 2D matrix. - - -Indexing and slicing --------------------- - -The items of an array can be accessed and assigned to the same way as -other Python sequences (e.g. lists): - -.. sourcecode:: pycon - - >>> a = np.arange(10) - >>> a - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - >>> a[0], a[2], a[-1] - (np.int64(0), np.int64(2), np.int64(9)) - -.. warning:: - - Indices begin at 0, like other Python sequences (and C/C++). - In contrast, in Fortran or Matlab, indices begin at 1. - -The usual python idiom for reversing a sequence is supported: - -.. sourcecode:: pycon - - >>> a[::-1] - array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) - -For multidimensional arrays, indices are tuples of integers: - -.. sourcecode:: pycon - - >>> a = np.diag(np.arange(3)) - >>> a - array([[0, 0, 0], - [0, 1, 0], - [0, 0, 2]]) - >>> a[1, 1] - np.int64(1) - >>> a[2, 1] = 10 # third line, second column - >>> a - array([[ 0, 0, 0], - [ 0, 1, 0], - [ 0, 10, 2]]) - >>> a[1] - array([0, 1, 0]) - - -.. note:: - - * In 2D, the first dimension corresponds to **rows**, the second - to **columns**. - * for multidimensional ``a``, ``a[0]`` is interpreted by - taking all elements in the unspecified dimensions. - -**Slicing**: Arrays, like other Python sequences can also be sliced: - -.. sourcecode:: pycon - - >>> a = np.arange(10) - >>> a - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - >>> a[2:9:3] # [start:end:step] - array([2, 5, 8]) - -Note that the last index is not included! : - -.. sourcecode:: pycon - - >>> a[:4] - array([0, 1, 2, 3]) - -All three slice components are not required: by default, `start` is 0, -`end` is the last and `step` is 1: - -.. sourcecode:: pycon - - >>> a[1:3] - array([1, 2]) - >>> a[::2] - array([0, 2, 4, 6, 8]) - >>> a[3:] - array([3, 4, 5, 6, 7, 8, 9]) - -A small illustrated summary of NumPy indexing and slicing... - -.. only:: latex - - .. image:: ../../pyximages/numpy_indexing.pdf - :align: center - -.. only:: html - - .. image:: ../../pyximages/numpy_indexing.png - :align: center - :width: 70% - -You can also combine assignment and slicing: - -.. sourcecode:: pycon - - >>> a = np.arange(10) - >>> a[5:] = 10 - >>> a - array([ 0, 1, 2, 3, 4, 10, 10, 10, 10, 10]) - >>> b = np.arange(5) - >>> a[5:] = b[::-1] - >>> a - array([0, 1, 2, 3, 4, 4, 3, 2, 1, 0]) - -.. topic:: **Exercise: Indexing and slicing** - :class: green - - * Try the different flavours of slicing, using ``start``, ``end`` and - ``step``: starting from a linspace, try to obtain odd numbers - counting backwards, and even numbers counting forwards. - * Reproduce the slices in the diagram above. You may - use the following expression to create the array: - - .. sourcecode:: pycon - - >>> np.arange(6) + np.arange(0, 51, 10)[:, np.newaxis] - array([[ 0, 1, 2, 3, 4, 5], - [10, 11, 12, 13, 14, 15], - [20, 21, 22, 23, 24, 25], - [30, 31, 32, 33, 34, 35], - [40, 41, 42, 43, 44, 45], - [50, 51, 52, 53, 54, 55]]) - -.. topic:: **Exercise: Array creation** - :class: green - - Create the following arrays (with correct data types):: - - [[1, 1, 1, 1], - [1, 1, 1, 1], - [1, 1, 1, 2], - [1, 6, 1, 1]] - - [[0., 0., 0., 0., 0.], - [2., 0., 0., 0., 0.], - [0., 3., 0., 0., 0.], - [0., 0., 4., 0., 0.], - [0., 0., 0., 5., 0.], - [0., 0., 0., 0., 6.]] - - Par on course: 3 statements for each - - *Hint*: Individual array elements can be accessed similarly to a list, - e.g. ``a[1]`` or ``a[1, 2]``. - - *Hint*: Examine the docstring for ``diag``. - -.. topic:: Exercise: Tiling for array creation - :class: green - - Skim through the documentation for ``np.tile``, and use this function - to construct the array:: - - [[4, 3, 4, 3, 4, 3], - [2, 1, 2, 1, 2, 1], - [4, 3, 4, 3, 4, 3], - [2, 1, 2, 1, 2, 1]] - -Copies and views ----------------- - -A slicing operation creates a **view** on the original array, which is -just a way of accessing array data. Thus the original array is not -copied in memory. You can use ``np.may_share_memory()`` to check if two arrays -share the same memory block. Note however, that this uses heuristics and may -give you false positives. - -**When modifying the view, the original array is modified as well**: - -.. sourcecode:: pycon - - >>> a = np.arange(10) - >>> a - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - >>> b = a[::2] - >>> b - array([0, 2, 4, 6, 8]) - >>> np.may_share_memory(a, b) - True - >>> b[0] = 12 - >>> b - array([12, 2, 4, 6, 8]) - >>> a # (!) - array([12, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - - >>> a = np.arange(10) - >>> c = a[::2].copy() # force a copy - >>> c[0] = 12 - >>> a - array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - - >>> np.may_share_memory(a, c) - False - - - -This behavior can be surprising at first sight... but it allows to save both -memory and time. - - -.. EXE: [1, 2, 3, 4, 5] -> [1, 2, 3] -.. EXE: [1, 2, 3, 4, 5] -> [4, 5] -.. EXE: [1, 2, 3, 4, 5] -> [1, 3, 5] -.. EXE: [1, 2, 3, 4, 5] -> [2, 4] -.. EXE: create an array [1, 1, 1, 1, 0, 0, 0] -.. EXE: create an array [0, 0, 0, 0, 1, 1, 1] -.. EXE: create an array [0, 1, 0, 1, 0, 1, 0] -.. EXE: create an array [1, 0, 1, 0, 1, 0, 1] -.. EXE: create an array [1, 0, 2, 0, 3, 0, 4] -.. CHA: archimedean sieve - -.. topic:: Worked example: Prime number sieve - :class: green - - .. image:: images/prime-sieve.png - - Compute prime numbers in 0--99, with a sieve - - * Construct a shape (100,) boolean array ``is_prime``, - filled with True in the beginning: - - .. sourcecode:: pycon - - >>> is_prime = np.ones((100,), dtype=bool) - - * Cross out 0 and 1 which are not primes: - - .. sourcecode:: pycon - - >>> is_prime[:2] = 0 - - * For each integer ``j`` starting from 2, cross out its higher multiples: - - .. sourcecode:: pycon - - >>> N_max = int(np.sqrt(len(is_prime) - 1)) - >>> for j in range(2, N_max + 1): - ... is_prime[2*j::j] = False - - * Skim through ``help(np.nonzero)``, and print the prime numbers - - * Follow-up: - - - Move the above code into a script file named ``prime_sieve.py`` - - - Run it to check it works - - - Use the optimization suggested in `the sieve of Eratosthenes - `_: - - 1. Skip ``j`` which are already known to not be primes - - 2. The first number to cross out is :math:`j^2` - -Fancy indexing --------------- - -.. tip:: - - NumPy arrays can be indexed with slices, but also with boolean or - integer arrays (**masks**). This method is called *fancy indexing*. - It creates **copies not views**. - -Using boolean masks -................... - -.. sourcecode:: pycon - - >>> rng = np.random.default_rng(27446968) - >>> a = rng.integers(0, 21, 15) - >>> a - array([ 3, 13, 12, 10, 10, 10, 18, 4, 8, 5, 6, 11, 12, 17, 3]) - >>> (a % 3 == 0) - array([ True, False, True, False, False, False, True, False, False, - False, True, False, True, False, True]) - >>> mask = (a % 3 == 0) - >>> extract_from_a = a[mask] # or, a[a%3==0] - >>> extract_from_a # extract a sub-array with the mask - array([ 3, 12, 18, 6, 12, 3]) - -Indexing with a mask can be very useful to assign a new value to a sub-array: - -.. sourcecode:: pycon - - >>> a[a % 3 == 0] = -1 - >>> a - array([-1, 13, -1, 10, 10, 10, -1, 4, 8, 5, -1, 11, -1, 17, -1]) - - -Indexing with an array of integers -.................................. - -.. sourcecode:: pycon - - >>> a = np.arange(0, 100, 10) - >>> a - array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) - -Indexing can be done with an array of integers, where the same index is repeated -several time: - -.. sourcecode:: pycon - - >>> a[[2, 3, 2, 4, 2]] # note: [2, 3, 2, 4, 2] is a Python list - array([20, 30, 20, 40, 20]) - -New values can be assigned with this kind of indexing: - -.. sourcecode:: pycon - - >>> a[[9, 7]] = -100 - >>> a - array([ 0, 10, 20, 30, 40, 50, 60, -100, 80, -100]) - -.. tip:: - - When a new array is created by indexing with an array of integers, the - new array has the same shape as the array of integers: - - .. sourcecode:: pycon - - >>> a = np.arange(10) - >>> idx = np.array([[3, 4], [9, 7]]) - >>> idx.shape - (2, 2) - >>> a[idx] - array([[3, 4], - [9, 7]]) - - -____ - -The image below illustrates various fancy indexing applications - -.. only:: latex - - .. image:: ../../pyximages/numpy_fancy_indexing.pdf - :align: center - -.. only:: html - - .. image:: ../../pyximages/numpy_fancy_indexing.png - :align: center - :width: 80% - -.. topic:: **Exercise: Fancy indexing** - :class: green - - * Again, reproduce the fancy indexing shown in the diagram above. - * Use fancy indexing on the left and array creation on the right to assign - values into an array, for instance by setting parts of the array in - the diagram above to zero. - -.. We can even use fancy indexing and :ref:`broadcasting ` at -.. the same time: -.. -.. .. sourcecode:: pycon -.. -.. >>> a = np.arange(12).reshape(3,4) -.. >>> a -.. array([[ 0, 1, 2, 3], -.. [ 4, 5, 6, 7], -.. [ 8, 9, 10, 11]]) -.. >>> i = np.array([[0, 1], [1, 2]]) -.. >>> a[i, 2] # same as a[i, 2*np.ones((2, 2), dtype=int)] -.. array([[ 2, 6], -.. [ 6, 10]]) diff --git a/intro/numpy/elaborate_arrays.md b/intro/numpy/elaborate_arrays.md new file mode 100644 index 000000000..ecb027777 --- /dev/null +++ b/intro/numpy/elaborate_arrays.md @@ -0,0 +1,266 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import matplotlib.pyplot as plt +``` + +# More elaborate arrays + +## More data types + +### Casting + +"Bigger" type wins in mixed-type operations: + +```{code-cell} +np.array([1, 2, 3]) + 1.5 +``` + +Assignment never changes the type! + +```{code-cell} +a = np.array([1, 2, 3]) +a.dtype +``` + +```{code-cell} +a[0] = 1.9 # <-- float is truncated to integer +a +``` + +Forced casts: + +```{code-cell} +a = np.array([1.7, 1.2, 1.6]) +b = a.astype(int) # <-- truncates to integer +b +``` + +Rounding: + +```{code-cell} +a = np.array([1.2, 1.5, 1.6, 2.5, 3.5, 4.5]) +b = np.around(a) +b # still floating-point +``` + +```{code-cell} +c = np.around(a).astype(int) +c +``` + +### Different data type sizes + +Integers (signed): + +| Class | Bits | +| ------- | ------------------------------------------ | +| `int8` | 8 bits | +| `int16` | 16b its | +| `int32` | 32 bits (same as `int` on 32-bit platform) | +| `int64` | 64 bits (same as `int` on 64-bit platform) | + +```{code-cell} +np.array([1], dtype=int).dtype +``` + +```{code-cell} +np.iinfo(np.int32).max, 2**31 - 1 +``` + +Unsigned integers: + +| Class | Bits | +| -------- | ------- | +| `uint8` | 8 bits | +| `uint16` | 16 bits | +| `uint32` | 32 bits | +| `uint64` | 64 bits | + +```{code-cell} +np.iinfo(np.uint32).max, 2**32 - 1 +``` + +Floating-point numbers: + +| Data Type | Size (bits) | +| ---------- | ------------------------------------------------------ | +| `float16` | 16 bits | +| `float32` | 32 bits | +| `float64` | 64 bits (same as `float`) | +| `float96` | 96 bits, platform-dependent (same as `np.longdouble`) | +| `float128` | 128 bits, platform-dependent (same as `np.longdouble`) | + +```{code-cell} +np.finfo(np.float32).eps +``` + +```{code-cell} +np.finfo(np.float64).eps +``` + +```{code-cell} +np.float32(1e-8) + np.float32(1) == 1 +``` + +```{code-cell} +np.float64(1e-8) + np.float64(1) == 1 +``` + +Complex floating-point numbers: + +| Data Type | Size (bits) | +| ------------ | -------------------------------------- | +| `complex64` | two 32-bit floats | +| `complex128` | two 64-bit floats | +| `complex192` | two 96-bit floats, platform-dependent | +| `complex256` | two 128-bit floats, platform-dependent | + +:::{admonition} Smaller data types +If you don't know you need special data types, then you probably don't. + +Comparison on using `float32` instead of `float64`: + +- Half the size in memory and on disk + +- Half the memory bandwidth required (may be a bit faster in some operations) + + ```ipython + In [1]: a = np.zeros((int(1e6),), dtype=np.float64) + + In [2]: b = np.zeros((int(1e6),), dtype=np.float32) + + In [3]: %timeit a*a + 1000 loops, best of 3: 1.78 ms per loop + + In [4]: %timeit b*b + 1000 loops, best of 3: 1.07 ms per loop + ``` + +- But: bigger rounding errors — sometimes in surprising places + (i.e., don't use them unless you really need them) + ::: + +## Structured data types + +| Data Type | Description | +| ------------- | ------------------ | +| `sensor_code` | 4-character string | +| `position` | float | +| `value` | float | + +```{code-cell} +samples = np.zeros((6,), dtype=[('sensor_code', 'S4'), + ('position', float), ('value', float)]) +samples.ndim +``` + +```{code-cell} +samples.shape +``` + +```{code-cell} +samples.dtype.names +``` + +```{code-cell} +samples[:] = [('ALFA', 1, 0.37), ('BETA', 1, 0.11), ('TAU', 1, 0.13), + ('ALFA', 1.5, 0.37), ('ALFA', 3, 0.11), ('TAU', 1.2, 0.13)] +samples +``` + +Field access works by indexing with field names: + +```{code-cell} +samples['sensor_code'] +``` + +```{code-cell} +samples['value'] +``` + +```{code-cell} +samples[0] +``` + +```{code-cell} +samples[0]['sensor_code'] = 'TAU' +samples[0] +``` + +Multiple fields at once: + +```{code-cell} +samples[['position', 'value']] +``` + +Fancy indexing works, as usual: + +```{code-cell} +samples[samples['sensor_code'] == b'ALFA'] +``` + +:::{note} +There are a bunch of other syntaxes for constructing structured +arrays, see [here](https://numpy.org/doc/stable/user/basics.rec.html) +and [here](https://numpy.org/doc/stable/reference/arrays.dtypes.html#specifying-and-constructing-data-types). +::: + +## {class}`maskedarray`: dealing with (propagation of) missing data + +- For floats one could use NaN's, but masks work for all types: + +```{code-cell} +x = np.ma.array([1, 2, 3, 4], mask=[0, 1, 0, 1]) +x +``` + +```{code-cell} +y = np.ma.array([1, 2, 3, 4], mask=[0, 1, 1, 1]) +x + y +``` + +- Masking versions of common functions: + +```{code-cell} +np.ma.sqrt([1, -1, 2, -2]) +``` + +:::{note} +There are other useful {ref}`array siblings ` +::: + +--- + +While it is off topic in a chapter on NumPy, let's take a moment to +recall good coding practice, which really do pay off in the long run: + +:::{admonition} Good practices + +- Explicit variable names (no need of a comment to explain what is in + the variable) + +- Style: spaces after commas, around `=`, etc. + + A certain number of rules for writing "beautiful" code (and, more + importantly, using the same conventions as everybody else!) are + given in the [Style Guide for Python Code](https://peps.python.org/pep-0008) and the [Docstring + Conventions](https://peps.python.org/pep-0257) page (to + manage help strings). + +- Except some rare cases, variable names and comments in English. + ::: diff --git a/intro/numpy/elaborate_arrays.rst b/intro/numpy/elaborate_arrays.rst deleted file mode 100644 index d35230c13..000000000 --- a/intro/numpy/elaborate_arrays.rst +++ /dev/null @@ -1,252 +0,0 @@ -.. For doctests - - >>> import numpy as np - >>> import matplotlib.pyplot as plt - -.. currentmodule:: numpy - -More elaborate arrays -====================== - -.. contents:: Section contents - :local: - :depth: 1 - -More data types ---------------- - -Casting -........ - -"Bigger" type wins in mixed-type operations:: - - >>> np.array([1, 2, 3]) + 1.5 - array([2.5, 3.5, 4.5]) - -Assignment never changes the type! :: - - >>> a = np.array([1, 2, 3]) - >>> a.dtype - dtype('int64') - >>> a[0] = 1.9 # <-- float is truncated to integer - >>> a - array([1, 2, 3]) - -Forced casts:: - - >>> a = np.array([1.7, 1.2, 1.6]) - >>> b = a.astype(int) # <-- truncates to integer - >>> b - array([1, 1, 1]) - -Rounding:: - - >>> a = np.array([1.2, 1.5, 1.6, 2.5, 3.5, 4.5]) - >>> b = np.around(a) - >>> b # still floating-point - array([1., 2., 2., 2., 4., 4.]) - >>> c = np.around(a).astype(int) - >>> c - array([1, 2, 2, 2, 4, 4]) - -Different data type sizes -.......................... - -Integers (signed): - -=================== ============================================================== -:class:`int8` 8 bits -:class:`int16` 16 bits -:class:`int32` 32 bits (same as :class:`int` on 32-bit platform) -:class:`int64` 64 bits (same as :class:`int` on 64-bit platform) -=================== ============================================================== - -:: - - >>> np.array([1], dtype=int).dtype - dtype('int64') - >>> np.iinfo(np.int32).max, 2**31 - 1 - (2147483647, 2147483647) - - -Unsigned integers: - -=================== ============================================================== -:class:`uint8` 8 bits -:class:`uint16` 16 bits -:class:`uint32` 32 bits -:class:`uint64` 64 bits -=================== ============================================================== - -:: - - >>> np.iinfo(np.uint32).max, 2**32 - 1 - (4294967295, 4294967295) - - -Floating-point numbers: - -=================== ============================================================== -:class:`float16` 16 bits -:class:`float32` 32 bits -:class:`float64` 64 bits (same as :class:`float`) -:class:`float96` 96 bits, platform-dependent (same as :class:`np.longdouble`) -:class:`float128` 128 bits, platform-dependent (same as :class:`np.longdouble`) -=================== ============================================================== - -:: - - >>> np.finfo(np.float32).eps - np.float32(1.1920929e-07) - >>> np.finfo(np.float64).eps - np.float64(2.220446049250313e-16) - - >>> np.float32(1e-8) + np.float32(1) == 1 - np.True_ - >>> np.float64(1e-8) + np.float64(1) == 1 - np.False_ - -Complex floating-point numbers: - -=================== ============================================================== -:class:`complex64` two 32-bit floats -:class:`complex128` two 64-bit floats -:class:`complex192` two 96-bit floats, platform-dependent -:class:`complex256` two 128-bit floats, platform-dependent -=================== ============================================================== - -.. topic:: Smaller data types - - If you don't know you need special data types, then you probably don't. - - Comparison on using ``float32`` instead of ``float64``: - - - Half the size in memory and on disk - - Half the memory bandwidth required (may be a bit faster in some operations) - - .. ipython:: - - In [1]: a = np.zeros((int(1e6),), dtype=np.float64) - - In [2]: b = np.zeros((int(1e6),), dtype=np.float32) - - In [3]: %timeit a*a - 1000 loops, best of 3: 1.78 ms per loop - - In [4]: %timeit b*b - 1000 loops, best of 3: 1.07 ms per loop - - - But: bigger rounding errors --- sometimes in surprising places - (i.e., don't use them unless you really need them) - - -Structured data types ---------------------- - -=============== ==================== -``sensor_code`` (4-character string) -``position`` (float) -``value`` (float) -=============== ==================== - -:: - - >>> samples = np.zeros((6,), dtype=[('sensor_code', 'S4'), - ... ('position', float), ('value', float)]) - >>> samples.ndim - 1 - >>> samples.shape - (6,) - >>> samples.dtype.names - ('sensor_code', 'position', 'value') - >>> samples[:] = [('ALFA', 1, 0.37), ('BETA', 1, 0.11), ('TAU', 1, 0.13), - ... ('ALFA', 1.5, 0.37), ('ALFA', 3, 0.11), ('TAU', 1.2, 0.13)] - >>> samples - array([(b'ALFA', 1. , 0.37), (b'BETA', 1. , 0.11), (b'TAU', 1. , 0.13), - (b'ALFA', 1.5, 0.37), (b'ALFA', 3. , 0.11), (b'TAU', 1.2, 0.13)], - dtype=[('sensor_code', 'S4'), ('position', '>> samples['sensor_code'] - array([b'ALFA', b'BETA', b'TAU', b'ALFA', b'ALFA', b'TAU'], dtype='|S4') - >>> samples['value'] - array([0.37, 0.11, 0.13, 0.37, 0.11, 0.13]) - >>> samples[0] - np.void((b'ALFA', 1.0, 0.37), dtype=[('sensor_code', 'S4'), ('position', '>> samples[0]['sensor_code'] = 'TAU' - >>> samples[0] - np.void((b'TAU', 1.0, 0.37), dtype=[('sensor_code', 'S4'), ('position', '>> samples[['position', 'value']] - array([(1. , 0.37), (1. , 0.11), (1. , 0.13), (1.5, 0.37), - (3. , 0.11), (1.2, 0.13)], - dtype={'names': ['position', 'value'], 'formats': ['>> samples[samples['sensor_code'] == b'ALFA'] - array([(b'ALFA', 1.5, 0.37), (b'ALFA', 3. , 0.11)], - dtype=[('sensor_code', 'S4'), ('position', '`__ - and `here `__. - - -:class:`maskedarray`: dealing with (propagation of) missing data ------------------------------------------------------------------- - -* For floats one could use NaN's, but masks work for all types:: - - >>> x = np.ma.array([1, 2, 3, 4], mask=[0, 1, 0, 1]) - >>> x - masked_array(data=[1, --, 3, --], - mask=[False, True, False, True], - fill_value=999999) - - - >>> y = np.ma.array([1, 2, 3, 4], mask=[0, 1, 1, 1]) - >>> x + y - masked_array(data=[2, --, --, --], - mask=[False, True, True, True], - fill_value=999999) - - -* Masking versions of common functions:: - - >>> np.ma.sqrt([1, -1, 2, -2]) #doctest:+ELLIPSIS - masked_array(data=[1.0, --, 1.41421356237... --], - mask=[False, True, False, True], - fill_value=1e+20) - - - -.. note:: - - There are other useful :ref:`array siblings ` - - -_____ - -While it is off topic in a chapter on NumPy, let's take a moment to -recall good coding practice, which really do pay off in the long run: - -.. topic:: Good practices - - * Explicit variable names (no need of a comment to explain what is in - the variable) - - * Style: spaces after commas, around ``=``, etc. - - A certain number of rules for writing "beautiful" code (and, more - importantly, using the same conventions as everybody else!) are - given in the `Style Guide for Python Code - `_ and the `Docstring - Conventions `_ page (to - manage help strings). - - * Except some rare cases, variable names and comments in English. diff --git a/intro/numpy/examples/plot_chebyfit.py b/intro/numpy/examples/plot_chebyfit.py deleted file mode 100644 index aef27f6ce..000000000 --- a/intro/numpy/examples/plot_chebyfit.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -Fitting in Chebyshev basis -========================== - -Plot noisy data and their polynomial fit in a Chebyshev basis - -""" - -import numpy as np -import matplotlib.pyplot as plt - -rng = np.random.default_rng(27446968) - -x = np.linspace(-1, 1, 2000) -y = np.cos(x) + 0.3 * rng.random(2000) -p = np.polynomial.Chebyshev.fit(x, y, 90) - -plt.plot(x, y, "r.") -plt.plot(x, p(x), "k-", lw=3) -plt.show() diff --git a/intro/numpy/examples/plot_elephant.py b/intro/numpy/examples/plot_elephant.py deleted file mode 100644 index ad3f7b827..000000000 --- a/intro/numpy/examples/plot_elephant.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Reading and writing an elephant -=============================== - -Read and write images - -""" - -import numpy as np -import matplotlib.pyplot as plt - -################################# -# original figure -################################# - -plt.figure() -img = plt.imread("../../../data/elephant.png") -plt.imshow(img) - -################################# -# red channel displayed in grey -################################# - -plt.figure() -img_red = img[:, :, 0] -plt.imshow(img_red, cmap="gray") - -################################# -# lower resolution -################################# - -plt.figure() -img_tiny = img[::6, ::6] -plt.imshow(img_tiny, interpolation="nearest") - -plt.show() diff --git a/intro/numpy/examples/plot_populations.py b/intro/numpy/examples/plot_populations.py index 22d25a0e7..c57a29778 100644 --- a/intro/numpy/examples/plot_populations.py +++ b/intro/numpy/examples/plot_populations.py @@ -9,7 +9,7 @@ import numpy as np import matplotlib.pyplot as plt -data = np.loadtxt("../../../data/populations.txt") +data = np.loadtxt("../data/populations.txt") year, hares, lynxes, carrots = data.T plt.axes((0.2, 0.1, 0.5, 0.8)) diff --git a/intro/numpy/exercises.md b/intro/numpy/exercises.md new file mode 100644 index 000000000..b7c405b5d --- /dev/null +++ b/intro/numpy/exercises.md @@ -0,0 +1,488 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(numpy-exercises)= + +# Some exercises + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +``` + +## Array manipulations + +::: {exercise-start} +:label: array-manipulation +:class: dropdown +::: + +**Form the 2-D array (without typing it in explicitly)** + ++++ + +```python +[[1, 6, 11], + [2, 7, 12], + [3, 8, 13], + [4, 9, 14], + [5, 10, 15]] +``` + +and generate a new array containing its 2nd and 4th rows. + +**Divide each column of the array** + +```{code-cell} +import numpy as np +a = np.arange(25).reshape(5, 5) +``` + +elementwise with the array `b = np.array([1., 5, 10, 15, 20])`. +(Hint: `np.newaxis`). + +**Harder one, random numbers** + +Generate a 10 x 3 array of random numbers (in range \[0,1\]). For each row, pick the number closest to 0.5. + +- Use `abs` and `argmin` to find the column `j` closest for + each row. +- Use fancy indexing to extract the numbers. (Hint: `a[i,j]` -- the array `i` + must contain the row numbers corresponding to stuff in `j`.) + +::: {exercise-end} +::: + +::: {solution-start} array-manipulation +:class: dropdown +::: + +```{code-cell} +import numpy as np +from numpy import newaxis + +# Part 1. + +a = np.arange(1, 16).reshape(3, -1).T +print(a) +``` + +::: {solution-end} +::: + ++++ + +## Picture manipulation: Framing a Face + +Let's do some manipulations on NumPy arrays by starting with an image +of a raccoon. `scipy` provides a 2D array of this image with the +`scipy.datasets.face` function: + +```{code-cell} +import scipy as sp +face = sp.datasets.face(gray=True) # 2D grayscale image +``` + +Here are a few images we will be able to obtain with our manipulations: +use different colormaps, crop the image, change some parts of the image. + +![](images/faces.png) + +Let's use the `imshow` function of matplotlib to display the image. + +```{code-cell} +import matplotlib.pyplot as plt +face = sp.datasets.face(gray=True) +plt.imshow(face) +``` + +The face is displayed in false colors. A colormap must be specified for it +to be displayed in grey. + +```{code-cell} +plt.imshow(face, cmap=plt.cm.gray) +``` + +### Narrow centering + +Create an array of the image with a narrower centering; remove 100 pixels from +all the borders of the image. To check the result, display this new array with +`imshow`. + +```{code-cell} +crop_face = face[100:-100, 100:-100] +``` + +### Frame face + +We will now frame the face with a black locket. For this, we need to create +a mask corresponding to the pixels we want to be black. The center of the face +is around (660, 330), so we defined the mask by this condition `(y-300)\*\*2 + +- (x-660)\*\*2` + +```{code-cell} +sy, sx = face.shape +y, x = np.ogrid[0:sy, 0:sx] # x and y indices of pixels +y.shape, x.shape +``` + +```{code-cell} +centerx, centery = (660, 300) # center of the image +mask = ((y - centery)**2 + (x - centerx)**2) > 230**2 # circle +``` + +then we assign the value 0 to the pixels of the image corresponding to the +mask. The syntax is extremely simple and intuitive: + +```{code-cell} +face[mask] = 0 +plt.imshow(face) +``` + +Follow-up: + +- copy all instructions of this exercise in a script called : `face_locket.py` + then execute this script in IPython with `%run face_locket.py`. +- Change the circle to an ellipsoid. + +## Data statistics + +The data in {download}`populations.txt <../../data/populations.txt>` +describes the populations of hares and lynxes (and carrots) in +northern Canada during 20 years: + +```{code-cell} +data = np.loadtxt('data/populations.txt') +year, hares, lynxes, carrots = data.T # trick: columns to variables +``` + +```{code-cell} +import matplotlib.pyplot as plt + +plt.axes([0.2, 0.1, 0.5, 0.8]) +plt.plot(year, hares, year, lynxes, year, carrots) +plt.legend(('Hare', 'Lynx', 'Carrot'), loc=(1.05, 0.5)) +``` + +::: {exercise-start} +:label: statistics-with-arrays +:class: dropdown +::: + +Compute and print, based on the data in `populations.txt`... + +1. The mean and std of the populations of each species for the years + in the period. +2. Which year each species had the largest population. +3. Which species has the largest population for each year. + (Hint: `argsort` & fancy indexing of + `np.array(['H', 'L', 'C'])`) +4. Which years any of the populations is above 50000. + (Hint: comparisons and `np.any`) +5. The top 2 years for each species when they had the lowest + populations. (Hint: `argsort`, fancy indexing) +6. Compare (plot) the change in hare population (see + `help(np.gradient)`) and the number of lynxes. Check correlation + (see `help(np.corrcoef)`). + +... all without for-loops. + +::: {exercise-end} +::: + +::: {solution-start} statistics-with-arrays +:class: dropdown +::: + +```{code-cell} +import numpy as np + +data = np.loadtxt("data/populations.txt") +year, hares, lynxes, carrots = data.T +populations = data[:, 1:] + +print(" Hares, Lynxes, Carrots") +print("Mean:", populations.mean(axis=0)) +print("Std:", populations.std(axis=0)) + +j_max_years = np.argmax(populations, axis=0) +print("Max. year:", year[j_max_years]) + +max_species = np.argmax(populations, axis=1) +species = np.array(["Hare", "Lynx", "Carrot"]) +print("Max species:") +print(year) +print(species[max_species]) + +above_50000 = np.any(populations > 50000, axis=1) +print("Any above 50000:", year[above_50000]) + +j_top_2 = np.argsort(populations, axis=0)[:2] +print("Top 2 years with lowest populations for each:") +print(year[j_top_2]) + +hare_grad = np.gradient(hares, 1.0) +print("diff(Hares) vs. Lynxes correlation", np.corrcoef(hare_grad, lynxes)[0, 1]) + +import matplotlib.pyplot as plt + +plt.plot(year, hare_grad, year, -lynxes) +plt.savefig("plot.png") +``` + +::: {solution-end} +::: + +## Crude integral approximations + +::: {exercise-start} +:label: integral-approx +:class: dropdown +::: + +Write a function `f(a, b, c)` that returns $a^b - c$. Form +a 24x12x6 array containing its values in parameter ranges `[0,1] x +[0,1] x [0,1]`. + +Approximate the 3-d integral + +$$ +\int_0^1\int_0^1\int_0^1(a^b-c)da\,db\,dc +$$ + +over this volume with the mean. The exact result is: $\ln 2 - +\frac{1}{2}\approx0.1931\ldots$ --- what is your relative error? + +(Hints: use elementwise operations and broadcasting. +You can make `np.ogrid` give a number of points in given range +with `np.ogrid[0:1:20j]`.) + +**Reminder** Python functions: + +```{code-cell} +def f(a, b, c): + return some_result +``` + +::: {exercise-end} +::: + +::: {solution-start} integral-approx +:class: dropdown +::: + +```{code-cell} +import numpy as np +from numpy import newaxis + + +def f(a, b, c): + return a**b - c + + +a = np.linspace(0, 1, 24) +b = np.linspace(0, 1, 12) +c = np.linspace(0, 1, 6) + +samples = f(a[:, newaxis, newaxis], b[newaxis, :, newaxis], c[newaxis, newaxis, :]) + +# or, +# +# a, b, c = np.ogrid[0:1:24j, 0:1:12j, 0:1:6j] +# samples = f(a, b, c) + +integral = samples.mean() + +print("Approximation:", integral) +print("Exact:", np.log(2) - 0.5) +``` + +::: {solution-end} +::: + ++++ + +## Mandelbrot set + +::: {exercise-start} +:label: mandelbrot-fractal +:class: dropdown +::: + +Write a script that computes the Mandelbrot fractal. The Mandelbrot +iteration: + +```{code-cell} +N_max = 50 +some_threshold = 50 + +c = x + 1j*y + +z = 0 +for j in range(N_max): + z = z**2 + c +``` + +Point (x, y) belongs to the Mandelbrot set if $|z|$ \< +`some_threshold`. + +Do this computation by: + +```{code-cell} +:tags: [hide-input] + +mask = np.ones((3, 3)) +``` + +1. Construct a grid of c = x + 1j\*y values in range [-2, 1] x [-1.5, 1.5] +2. Do the iteration +3. Form the 2-d boolean mask indicating which points are in the set +4. Save the result to an image with: + + ```python + import matplotlib.pyplot as plt + plt.imshow(mask.T, extent=[-2, 1, -1.5, 1.5]) + plt.gray() + plt.savefig('mandelbrot.png') + ``` + +::: {exercise-end} +::: + +::: {solution-start} mandelbrot-fractal +:class: dropdown +::: + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +from numpy import newaxis + + +def compute_mandelbrot(N_max, some_threshold, nx, ny): + # A grid of c-values + x = np.linspace(-2, 1, nx) + y = np.linspace(-1.5, 1.5, ny) + + c = x[:, newaxis] + 1j * y[newaxis, :] + + # Mandelbrot iteration + + z = c + for j in range(N_max): + z = z**2 + c + + mandelbrot_set = abs(z) < some_threshold + + return mandelbrot_set + +# Save + +mandelbrot_set = compute_mandelbrot(50, 50.0, 601, 401) + +plt.imshow(mandelbrot_set.T, extent=[-2, 1, -1.5, 1.5]) # type: ignore[arg-type] +plt.gray() +plt.savefig("mandelbrot.png") +``` + +::: {solution-end} +::: + ++++ + +## Markov chain + +![](images/markov-chain.png) + +::: {exercise-start} +:label: markov-implementation +:class: dropdown +::: + +Markov chain transition matrix `P`, and probability distribution on +the states `p`: + +1. `0 <= P[i,j] <= 1`: probability to go from state `i` to state `j` +2. Transition rule: $p_{new} = P^T p_{old}$ +3. `all(sum(P, axis=1) == 1)`, `p.sum() == 1`: normalization + +Write a script that works with 5 states, and: + +- Constructs a random matrix, and normalizes each row so that it + is a transition matrix. +- Starts from a random (normalized) probability distribution + `p` and takes 50 steps => `p_50` +- Computes the stationary distribution: the eigenvector of `P.T` + with eigenvalue 1 (numerically: closest to 1) => `p_stationary` + + Remember to normalize the eigenvector — I didn't... + +- Checks if `p_50` and `p_stationary` are equal to tolerance 1e-5 + +Toolbox: `np.random`, `@`, `np.linalg.eig`, reductions, `abs()`, `argmin`, +comparisons, `all`, `np.linalg.norm`, etc. + +::: {exercise-end} +::: + +::: {solution-start} markov-implementation +:class: dropdown +::: + +Solution to Markov chain exercise. + +```{code-cell} +import numpy as np + +rng = np.random.default_rng(27446968) + +n_states = 5 +n_steps = 50 +tolerance = 1e-5 + +# Random transition matrix and state vector +P = rng.random(size=(n_states, n_states)) +p = rng.random(n_states) + +# Normalize rows in P +P /= P.sum(axis=1)[:, np.newaxis] + +# Normalize p +p /= p.sum() + +# Take steps +for k in range(n_steps): + p = P.T @ p + +p_50 = p +print(p_50) + +# Compute stationary state +w, v = np.linalg.eig(P.T) + +j_stationary = np.argmin(abs(w - 1.0)) +p_stationary = v[:, j_stationary].real +p_stationary /= p_stationary.sum() +print(p_stationary) + +# Compare +if all(abs(p_50 - p_stationary) < tolerance): + print("Tolerance satisfied in infty-norm") + +if np.linalg.norm(p_50 - p_stationary) < tolerance: + print("Tolerance satisfied in 2-norm") +``` + +::: {solution-end} +::: diff --git a/intro/numpy/exercises.rst b/intro/numpy/exercises.rst deleted file mode 100644 index 548a28420..000000000 --- a/intro/numpy/exercises.rst +++ /dev/null @@ -1,268 +0,0 @@ -.. for doctests - >>> import matplotlib.pyplot as plt - -.. _numpy_exercises: - -Some exercises -============== - -Array manipulations --------------------- - -1. Form the 2-D array (without typing it in explicitly):: - - [[1, 6, 11], - [2, 7, 12], - [3, 8, 13], - [4, 9, 14], - [5, 10, 15]] - - and generate a new array containing its 2nd and 4th rows. - -2. Divide each column of the array: - - .. sourcecode:: pycon - - >>> import numpy as np - >>> a = np.arange(25).reshape(5, 5) - - elementwise with the array ``b = np.array([1., 5, 10, 15, 20])``. - (Hint: ``np.newaxis``). - -3. Harder one: Generate a 10 x 3 array of random numbers (in range [0,1]). - For each row, pick the number closest to 0.5. - - - Use ``abs`` and ``argmin`` to find the column ``j`` closest for - each row. - - - Use fancy indexing to extract the numbers. (Hint: ``a[i,j]`` -- - the array ``i`` must contain the row numbers corresponding to stuff in - ``j``.) - - -Picture manipulation: Framing a Face ------------------------------------- - -Let's do some manipulations on NumPy arrays by starting with an image -of a raccoon. ``scipy`` provides a 2D array of this image with the -``scipy.datasets.face`` function:: - - - >>> import scipy as sp - >>> face = sp.datasets.face(gray=True) # 2D grayscale image - -Here are a few images we will be able to obtain with our manipulations: -use different colormaps, crop the image, change some parts of the image. - -.. image:: images/faces.png - :align: center - -* Let's use the imshow function of matplotlib to display the image. - - .. sourcecode:: pycon - - >>> import matplotlib.pyplot as plt - >>> face = sp.datasets.face(gray=True) - >>> plt.imshow(face) - - -* The face is displayed in false colors. A colormap must be - specified for it to be displayed in grey. - - .. sourcecode:: pycon - - >>> plt.imshow(face, cmap=plt.cm.gray) - - -* Create an array of the image with a narrower centering : for example, - remove 100 pixels from all the borders of the image. To check the result, - display this new array with ``imshow``. - - .. sourcecode:: pycon - - >>> crop_face = face[100:-100, 100:-100] - -* We will now frame the face with a black locket. For this, we - need to create a mask corresponding to the pixels we want to be - black. The center of the face is around (660, 330), so we defined - the mask by this condition ``(y-300)**2 + (x-660)**2`` - - .. sourcecode:: pycon - - >>> sy, sx = face.shape - >>> y, x = np.ogrid[0:sy, 0:sx] # x and y indices of pixels - >>> y.shape, x.shape - ((768, 1), (1, 1024)) - >>> centerx, centery = (660, 300) # center of the image - >>> mask = ((y - centery)**2 + (x - centerx)**2) > 230**2 # circle - - then we assign the value 0 to the pixels of the image corresponding - to the mask. The syntax is extremely simple and intuitive: - - .. sourcecode:: pycon - - >>> face[mask] = 0 - >>> plt.imshow(face) - - -* Follow-up: copy all instructions of this exercise in a script called - ``face_locket.py`` then execute this script in IPython with ``%run - face_locket.py``. - - Change the circle to an ellipsoid. - -Data statistics ----------------- - -The data in :download:`populations.txt <../../data/populations.txt>` -describes the populations of hares and lynxes (and carrots) in -northern Canada during 20 years: - -.. sourcecode:: pycon - - >>> data = np.loadtxt('data/populations.txt') - >>> year, hares, lynxes, carrots = data.T # trick: columns to variables - - >>> import matplotlib.pyplot as plt - >>> plt.axes([0.2, 0.1, 0.5, 0.8]) - - >>> plt.plot(year, hares, year, lynxes, year, carrots) - [, ...] - >>> plt.legend(('Hare', 'Lynx', 'Carrot'), loc=(1.05, 0.5)) - - -.. image:: auto_examples/images/sphx_glr_plot_populations_001.png - :width: 50% - :target: auto_examples/plot_populations.html - :align: center - -Computes and print, based on the data in ``populations.txt``... - -1. The mean and std of the populations of each species for the years - in the period. - -2. Which year each species had the largest population. - -3. Which species has the largest population for each year. - (Hint: ``argsort`` & fancy indexing of - ``np.array(['H', 'L', 'C'])``) - -4. Which years any of the populations is above 50000. - (Hint: comparisons and ``np.any``) - -5. The top 2 years for each species when they had the lowest - populations. (Hint: ``argsort``, fancy indexing) - -6. Compare (plot) the change in hare population (see - ``help(np.gradient)``) and the number of lynxes. Check correlation - (see ``help(np.corrcoef)``). - -... all without for-loops. - -Solution: :download:`Python source file ` - -Crude integral approximations ------------------------------ - -Write a function ``f(a, b, c)`` that returns :math:`a^b - c`. Form -a 24x12x6 array containing its values in parameter ranges ``[0,1] x -[0,1] x [0,1]``. - -Approximate the 3-d integral - -.. math:: \int_0^1\int_0^1\int_0^1(a^b-c)da\,db\,dc - -over this volume with the mean. The exact result is: :math:`\ln 2 - -\frac{1}{2}\approx0.1931\ldots` --- what is your relative error? - -(Hints: use elementwise operations and broadcasting. -You can make ``np.ogrid`` give a number of points in given range -with ``np.ogrid[0:1:20j]``.) - -**Reminder** Python functions:: - - def f(a, b, c): - return some_result - -Solution: :download:`Python source file ` - -Mandelbrot set ---------------- - -.. image:: auto_examples/images/sphx_glr_plot_mandelbrot_001.png - :width: 50% - :target: auto_examples/plot_mandelbrot.html - :align: center - -Write a script that computes the Mandelbrot fractal. The Mandelbrot -iteration:: - - N_max = 50 - some_threshold = 50 - - c = x + 1j*y - - z = 0 - for j in range(N_max): - z = z**2 + c - -Point (x, y) belongs to the Mandelbrot set if :math:`|z|` < -``some_threshold``. - -Do this computation by: - -.. For doctests - >>> mask = np.ones((3, 3)) - -1. Construct a grid of c = x + 1j*y values in range [-2, 1] x [-1.5, 1.5] - -2. Do the iteration - -3. Form the 2-d boolean mask indicating which points are in the set - -4. Save the result to an image with: - - .. sourcecode:: pycon - - >>> import matplotlib.pyplot as plt - >>> plt.imshow(mask.T, extent=[-2, 1, -1.5, 1.5]) - - >>> plt.gray() - >>> plt.savefig('mandelbrot.png') - -Solution: :download:`Python source file ` - -Markov chain -------------- - -.. image:: images/markov-chain.png - -Markov chain transition matrix ``P``, and probability distribution on -the states ``p``: - -1. ``0 <= P[i,j] <= 1``: probability to go from state ``i`` to state ``j`` - -2. Transition rule: :math:`p_{new} = P^T p_{old}` - -3. ``all(sum(P, axis=1) == 1)``, ``p.sum() == 1``: normalization - -Write a script that works with 5 states, and: - -- Constructs a random matrix, and normalizes each row so that it - is a transition matrix. - -- Starts from a random (normalized) probability distribution - ``p`` and takes 50 steps => ``p_50`` - -- Computes the stationary distribution: the eigenvector of ``P.T`` - with eigenvalue 1 (numerically: closest to 1) => ``p_stationary`` - -Remember to normalize the eigenvector --- I didn't... - -- Checks if ``p_50`` and ``p_stationary`` are equal to tolerance 1e-5 - -Toolbox: ``np.random``, ``@``, ``np.linalg.eig``, -reductions, ``abs()``, ``argmin``, comparisons, ``all``, -``np.linalg.norm``, etc. - -Solution: :download:`Python source file ` diff --git a/intro/numpy/gallery.rst b/intro/numpy/gallery.rst deleted file mode 100644 index 939efe548..000000000 --- a/intro/numpy/gallery.rst +++ /dev/null @@ -1,8 +0,0 @@ -Full code examples -================== - -.. include the gallery. Skip the first line to avoid the "orphan" - declaration - -.. include:: auto_examples/index.rst - :start-line: 1 diff --git a/intro/numpy/index.md b/intro/numpy/index.md new file mode 100644 index 000000000..2f90f9032 --- /dev/null +++ b/intro/numpy/index.md @@ -0,0 +1,9 @@ +(numpy)= + +# NumPy: creating and manipulating numerical data + +**Authors**: _Emmanuelle Gouillart, Didrik Pinte, Gaël Varoquaux, and +Pauli Virtanen_ + +This chapter gives an overview of NumPy, the core tool for performant +numerical computing with Python. diff --git a/intro/numpy/index.rst b/intro/numpy/index.rst deleted file mode 100644 index ee86ac103..000000000 --- a/intro/numpy/index.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. _numpy: - -*********************************************** -NumPy: creating and manipulating numerical data -*********************************************** - -**Authors**: *Emmanuelle Gouillart, Didrik Pinte, Gaël Varoquaux, and -Pauli Virtanen* - -.. .. contents:: Chapters contents - :local: - :depth: 4 - -This chapter gives an overview of NumPy, the core tool for performant -numerical computing with Python. - -____ - -.. include:: ../../includes/big_toc_css.rst - :start-line: 1 - -.. toctree:: - array_object.rst - operations.rst - elaborate_arrays.rst - advanced_operations.rst - exercises.rst - gallery.rst diff --git a/intro/numpy/operations.md b/intro/numpy/operations.md new file mode 100644 index 000000000..ec8c8936c --- /dev/null +++ b/intro/numpy/operations.md @@ -0,0 +1,917 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +# Numerical operations on arrays + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import matplotlib.pyplot as plt +``` + +## Elementwise operations + +### Basic operations + +With scalars: + +```{code-cell} +a = np.array([1, 2, 3, 4]) +a + 1 +``` + +```{code-cell} +2 ** a +``` + +All arithmetic operates elementwise: + +```{code-cell} +b = np.ones(4) + 1 +a - b +``` + +```{code-cell} +a * b +``` + +```{code-cell} +j = np.arange(5) +2**(j + 1) - j +``` + +These operations are of course much faster than if you did them in pure python: + +```{code-cell} +a = np.arange(10000) +%timeit a + 1 +``` + +```{code-cell} +l = range(10000) +%timeit [i+1 for i in l] +``` + +**Warning: array multiplication is not matrix multiplication** + +Consider these examples: + +```{code-cell} +c = np.ones((3, 3)) +c * c # NOT matrix multiplication! +``` + +**Matrix multiplication:** + +```{code-cell} +c @ c +``` + +::: {exercise-start} +:label: elementwise-exercise +:class: dropdown +::: + +- Try simple arithmetic elementwise operations: add even elements + with odd elements +- Time them against their pure python counterparts using `%timeit`. +- Generate: + + - `[2**0, 2**1, 2**2, 2**3, 2**4]` + - `a_j = 2^(3*j) - j` + +::: {exercise-end} +::: + +### Other operations + +#### Comparisons + +```{code-cell} +a = np.array([1, 2, 3, 4]) +b = np.array([4, 2, 2, 4]) +a == b +``` + +```{code-cell} +a > b +``` + +Array-wise comparisons: + +```{code-cell} +a = np.array([1, 2, 3, 4]) +b = np.array([4, 2, 2, 4]) +c = np.array([1, 2, 3, 4]) +np.array_equal(a, b) +``` + +```{code-cell} +np.array_equal(a, c) +``` + +#### Logical operations + +```{code-cell} +a = np.array([1, 1, 0, 0], dtype=bool) +b = np.array([1, 0, 1, 0], dtype=bool) +np.logical_or(a, b) +``` + +```{code-cell} +np.logical_and(a, b) +``` + +#### Transcendental functions + +```{code-cell} +a = np.arange(5) +np.sin(a) +``` + +```{code-cell} +np.exp(a) +``` + +```{code-cell} +np.log(np.exp(a)) +``` + +#### Shape mismatches + +```{code-cell} +:tags: [raises-exception] + +a = np.arange(4) +a + np.array([1, 2]) +``` + +_Broadcasting?_ We'll return to that {ref}`later `. + +#### Transposition + +```{code-cell} +a = np.triu(np.ones((3, 3)), 1) # see help(np.triu) +a +``` + +```{code-cell} +a.T +``` + +Remember, **the transposition is a view**. + +The transpose returns a _view_ of the original array: + +```{code-cell} +a = np.arange(9).reshape(3, 3) +a.T[0, 2] = 999 +a.T +``` + +```{code-cell} +a +``` + +#### Linear algebra + +The sub-module {mod}`numpy.linalg` implements basic linear algebra, such as +solving linear systems, singular value decomposition, etc. However, it is +not guaranteed to be compiled using efficient routines, and thus we +recommend the use of {mod}`scipy.linalg`, as detailed in section +{ref}`scipy-linalg` + +::: {exercise-start} +:label: other-operations-exercise +:class: dropdown +::: + +- Look at the help for `np.allclose`. When might this be useful? +- Look at the help for `np.triu` and `np.tril`. + +::: {exercise-end} +::: + +## Basic reductions + +### Computing sums + +```{code-cell} +x = np.array([1, 2, 3, 4]) +np.sum(x) +``` + +```{code-cell} +x.sum() +``` + +![](images/reductions.png) + +Sum by rows and by columns: + +```{code-cell} +x = np.array([[1, 1], [2, 2]]) +x +``` + +```{code-cell} +x.sum(axis=0) # columns (first dimension) +``` + +```{code-cell} +x[:, 0].sum(), x[:, 1].sum() +``` + +```{code-cell} +x.sum(axis=1) # rows (second dimension) +``` + +```{code-cell} +x[0, :].sum(), x[1, :].sum() +``` + +Here is the same idea in higher dimensions: + +```{code-cell} +rng = np.random.default_rng(27446968) +x = rng.random((2, 2, 2)) +x.sum(axis=2)[0, 1] +``` + +```{code-cell} +x[0, 1, :].sum() +``` + +### Other reductions + +These work the same way (and take `axis=`) + +#### Extrema + +```{code-cell} +x = np.array([1, 3, 2]) +x.min() +``` + +```{code-cell} +x.max() +``` + +```{code-cell} +x.argmin() # index of minimum +``` + +```{code-cell} +x.argmax() # index of maximum +``` + +#### Logical operations + +```{code-cell} +np.all([True, True, False]) +``` + +```{code-cell} +np.any([True, True, False]) +``` + +This can be used for array comparisons: + +```{code-cell} +a = np.zeros((100, 100)) +np.any(a != 0) +``` + +```{code-cell} +np.all(a == a) +``` + +```{code-cell} +a = np.array([1, 2, 3, 2]) +b = np.array([2, 2, 3, 2]) +c = np.array([6, 4, 4, 5]) +((a <= b) & (b <= c)).all() +``` + +**Statistics:** + +```{code-cell} +x = np.array([1, 2, 3, 1]) +y = np.array([[1, 2, 3], [5, 6, 1]]) +x.mean() +``` + +```{code-cell} +np.median(x) +``` + +```{code-cell} +np.median(y, axis=-1) # last axis +``` + +```{code-cell} +x.std() # full population standard dev. +``` + +... and many more (best to learn as you go). + +::: {exercise-start} +:label: reductions-exercise +:class: dropdown +::: + +Given there is a `sum`, what other function might you expect to see? +What is the difference between `sum` and `cumsum`? + +::: {exercise-end} +::: + +#### Worked Example: diffusion using a random walk algorithm + +![](random_walk.png) + +Let us consider a simple 1D random walk process: at each time step a +walker jumps right or left with equal probability. + +We are interested in finding the typical distance from the origin of a +random walker after `t` left or right jumps? We are going to +simulate many "walkers" to find this law, and we are going to do so +using array computing tricks: we are going to create a 2D array with +the "stories" (each walker has a story) in one direction, and the +time in the other: + +![](random_walk_schema.png) + +```{code-cell} +n_stories = 1000 # number of walkers +t_max = 200 # time during which we follow the walker +``` + +We randomly choose all the steps 1 or -1 of the walk: + +```{code-cell} +t = np.arange(t_max) +rng = np.random.default_rng() +steps = 2 * rng.integers(0, 1 + 1, (n_stories, t_max)) - 1 # +1 because the high value is exclusive +np.unique(steps) # Verification: all steps are 1 or -1 +``` + +We build the walks by summing steps along the time: + +```{code-cell} +positions = np.cumsum(steps, axis=1) # axis = 1: dimension of time +sq_distance = positions**2 +``` + +We get the mean in the axis of the stories: + +```{code-cell} +mean_sq_distance = np.mean(sq_distance, axis=0) +``` + +Plot the results: + +```{code-cell} +plt.figure(figsize=(4, 3)) +plt.plot(t, np.sqrt(mean_sq_distance), 'g.', t, np.sqrt(t), 'y-') +plt.xlabel(r"$t$") +plt.ylabel(r"$\sqrt{\langle (\delta x)^2 \rangle}$") +plt.tight_layout() # provide sufficient space for labels +``` + +We find a well-known result in physics: the Root Mean Square (RMS) distance +grows as the square root of the time! + +## Interim summary and exercises + +| Operation type | Numpy functions | +| -------------- | ---------------------------- | +| arithmetic | `sum`, `prod`, `mean`, `std` | +| Extrema | `min`, `max` | +| logical | `all`, `any` | + +Also, recall the `axis` argument to select the dimension over which an operation will be applied: + +```{code-cell} +arr = np.array([[99, 12], [11, 2]]) +arr +``` + +```{code-cell} +# Without axis=, operation applied over whole (flatted, 1D) array. +np.min(arr) +``` + +```{code-cell} +# Operate along first axis (rows). +np.min(arr, axis=0) +``` + +```{code-cell} +# Operate along second axis (columns). +np.min(arr, axis=1) +``` + +::: {exercise-start} +:label: any-all-ex +:class: dropdown +::: + +We load an array from a text file: + +```{code-cell} +an_array = np.loadtxt('data/an_array.txt') +``` + +1. Verify if all elements in `an array` are equal to 1: +2. Verify if any elements in an array are equal to 1 +3. Compute mean and standard deviation. +4. Challenge: write a function `my_std` that computes the standard deviation + of the elements in the array, where you are only allowed to use `np.sum` + from Numpy in your function. Check your function returns a value close to that from `np.std` (use `np.allclose` for that check). + +::: {exercise-end} +::: + +::: {solution-start} any-all-ex +:class: dropdown +::: + +```{code-cell} +# 1. Verify if all elements in `an array` are equal to 1: +np.all(an_array == 1) + +# 2. Verify if any elements in an array are equal to 1 +np.any(an_array == 1) + +# 3. Compute mean and standard deviation. +print('Mean', np.mean(an_array)) +print('STD', np.std(an_array)) +``` + +```{code-cell} +# 4. Challenge: write a function `my_std` that computes the standard deviation +# of the elements in the array, where you are only allowed to use `np.sum` from +# Numpy in your function. + +def my_std(a): + n = a.size + m = np.sum(a) / n + return np.sqrt(np.sum((a - m) ** 2) / n) + +# Check we get the same answers from our function as for Numpy. +assert np.allclose(my_std(an_array), np.std(an_array)) +assert np.allclose(my_std(an_array.ravel()), np.std(an_array)) + +rng = np.random.default_rng() +for i in range(10): + another_array = rng.uniform(size=(10, 4)) + assert np.allclose(my_std(another_array), np.std(another_array)) +``` + +::: {solution-end} +::: + +(broadcasting)= + +## Broadcasting + +- Basic operations on `numpy` arrays (addition, etc.) are elementwise + +- This works on arrays of the same size. + +- **Nevertheless** , it's also possible to do + operations on arrays of different sizes if + _NumPy_ can transform these arrays so that + they all have the same size: this conversion + is called **broadcasting**. + +The image below gives an example of broadcasting: + +![](images/numpy_broadcasting.png) + +Let's verify: + +```{code-cell} +a = np.tile(np.arange(0, 40, 10), (3, 1)).T +a +``` + +```{code-cell} +b = np.array([0, 1, 2]) +a + b +``` + +We have already used broadcasting without knowing it!: + +```{code-cell} +a = np.ones((4, 5)) +a[0] = 2 # we assign an array of dimension 0 to an array of dimension 1 +a +``` + +A useful trick: + +```{code-cell} +a = np.arange(0, 40, 10) +a.shape +``` + +```{code-cell} +a = a[:, np.newaxis] # adds a new axis -> 2D array +a.shape +``` + +```{code-cell} +a +``` + +```{code-cell} +a + b +``` + +::: {note} +:class: dropdown + +Broadcasting seems a bit magical, but it is actually quite natural to +use it when we want to solve a problem whose output data is an array +with more dimensions than input data. +::: + +### Worked Example: Broadcasting + +Let's construct an array of distances (in miles) between cities of +Route 66: Chicago, Springfield, Saint-Louis, Tulsa, Oklahoma City, +Amarillo, Santa Fe, Albuquerque, Flagstaff and Los Angeles. + +```{code-cell} +mileposts = np.array([0, 198, 303, 736, 871, 1175, 1475, 1544, + 1913, 2448]) +distance_array = np.abs(mileposts - mileposts[:, np.newaxis]) +distance_array +``` + +![](images/route66.png) + +A lot of grid-based or network-based problems can also use +broadcasting. For instance, if we want to compute the distance from +the origin of points on a 5x5 grid, we can do + +```{code-cell} +x, y = np.arange(5), np.arange(5)[:, np.newaxis] +distance = np.sqrt(x ** 2 + y ** 2) +distance +``` + +Or in color: + +```{code-cell} +plt.pcolor(distance) +plt.colorbar() +``` + +**Remark** : the {func}`numpy.ogrid` function allows to directly create +vectors x and y of the previous example, with two "significant dimensions": + +```{code-cell} +x, y = np.ogrid[0:5, 0:5] +x, y +``` + +```{code-cell} +x.shape, y.shape +distance = np.sqrt(x ** 2 + y ** 2) +``` + +So, `np.ogrid` is very useful as soon as we have to handle +computations on a grid. On the other hand, `np.mgrid` directly +provides matrices full of indices for cases where we can't (or don't +want to) benefit from broadcasting: + +```{code-cell} +x, y = np.mgrid[0:4, 0:4] +x +``` + +```{code-cell} +y +``` + + + + + + + + + +:::{admonition} See also + +{ref}`broadcasting-advanced`: discussion of broadcasting in +the {ref}`advanced-numpy` chapter. +::: + +## Array shape manipulation + +### Flattening + +```{code-cell} +a = np.array([[1, 2, 3], [4, 5, 6]]) +a.ravel() +``` + +```{code-cell} +a.T +``` + +```{code-cell} +a.T.ravel() +``` + +Higher dimensions: last dimensions ravel out "first". + +### Reshaping + +The inverse operation to flattening: + +```{code-cell} +a.shape +``` + +```{code-cell} +b = a.ravel() +b = b.reshape((2, 3)) +b +``` + +Or, + +```{code-cell} +a.reshape((2, -1)) # unspecified (-1) value is inferred +``` + +:::{warning} +`ndarray.reshape` **may** return a view (cf `help(np.reshape)`)), +or copy +::: + +For example, consider: + +```{code-cell} +b[0, 0] = 99 +a +``` + +Beware: reshape may also return a copy!: + +```{code-cell} +a = np.zeros((3, 2)) +b = a.T.reshape(3*2) +b[0] = 9 +a +``` + +To understand this you need to learn more about the memory layout of a NumPy array. + +### Adding a dimension + +Indexing with the `np.newaxis` object allows us to add an axis to an array +(you have seen this already above in the broadcasting section): + +```{code-cell} +z = np.array([1, 2, 3]) +z +``` + +```{code-cell} +z[:, np.newaxis] +``` + +```{code-cell} +z[np.newaxis, :] +``` + +### Dimension shuffling + +```{code-cell} +a = np.arange(4*3*2).reshape(4, 3, 2) +a.shape +``` + +```{code-cell} +a[0, 2, 1] +``` + +```{code-cell} +b = a.transpose(1, 2, 0) +b.shape +``` + +```{code-cell} +b[2, 1, 0] +``` + +Also creates a view: + +```{code-cell} +b[2, 1, 0] = -1 +a[0, 2, 1] +``` + +### Resizing + +Size of an array can be changed with `ndarray.resize`: + +```{code-cell} +a = np.arange(4) +a.resize((8,)) +a +``` + +However, it must not be referred to somewhere else: + +```{code-cell} +:tags: [raises-exception] + +b = a +a.resize((4,)) +``` + + + + + + + + + + + + +::: {exercise-start} +:label: shape-manipulation-exercise +:class: dropdown +::: + +- Look at the docstring for `reshape`, especially the notes section which + has some more information about copies and views. +- Use `flatten` as an alternative to `ravel`. What is the difference? + (Hint: check which one returns a view and which a copy) +- Experiment with `transpose` for dimension shuffling. + +::: {exercise-end} +::: + +## Sorting data + +Sorting along an axis: + +```{code-cell} +a = np.array([[4, 3, 5], [1, 2, 1]]) +b = np.sort(a, axis=1) +b +``` + +:::{note} +Sorts each row separately! +::: + +In-place sort: + +```{code-cell} +a.sort(axis=1) +a +``` + +Sorting with fancy indexing: + +```{code-cell} +a = np.array([4, 3, 1, 2]) +j = np.argsort(a) +j +``` + +```{code-cell} +a[j] +``` + +Finding minima and maxima: + +```{code-cell} +a = np.array([4, 3, 1, 2]) +j_max = np.argmax(a) +j_min = np.argmin(a) +j_max, j_min +``` + + + +::: {exercise-start} +:label: sorting-exercise +:class: dropdown +::: + +- Try both in-place and out-of-place sorting. +- Try creating arrays with different dtypes and sorting them. +- Use `all` or `array_equal` to check the results. +- Look at `np.random.shuffle` for a way to create sortable input quicker. +- Combine `ravel`, `sort` and `reshape`. +- Look at the `axis` keyword for `sort` and rewrite the previous + exercise. + +::: {exercise-end} +::: + +## Summary + +**What do you need to know to get started?** + +- Know how to create arrays : `array`, `arange`, `ones`, + `zeros`. + +- Know the shape of the array with `array.shape`, then use slicing + to obtain different views of the array: `array[::2]`, + etc. Adjust the shape of the array using `reshape` or flatten it + with `ravel`. + +- Obtain a subset of the elements of an array and/or modify their values + with masks, with e.g.: + + ```python + a[a < 0] = 0 + ``` + +- Know miscellaneous operations on arrays, such as finding the mean or max + (`array.max()`, `array.mean()`). No need to retain everything, but + have the reflex to search in the documentation (online docs, + `help()`)!! + +- For advanced use: master the indexing with arrays of integers, as well as + broadcasting. Know more NumPy functions to handle various array + operations. + +:::{admonition} Quick read +If you want to do a first quick pass through the Scientific Python Lectures +to learn the ecosystem, you can directly skip to the next chapter: +{ref}`matplotlib`. + +The remainder of this chapter is not necessary to follow the rest of +the intro part. But be sure to come back and finish this chapter, as +well as to do some more {ref}`exercises `. +::: diff --git a/intro/numpy/operations.rst b/intro/numpy/operations.rst deleted file mode 100644 index 4e1853692..000000000 --- a/intro/numpy/operations.rst +++ /dev/null @@ -1,881 +0,0 @@ - -.. For doctests - - >>> import numpy as np - >>> # For doctest on headless environments - >>> import matplotlib.pyplot as plt - -.. currentmodule:: numpy - -Numerical operations on arrays -============================== - -.. contents:: Section contents - :local: - :depth: 1 - - -Elementwise operations ----------------------- - -Basic operations -................ - -With scalars: - -.. sourcecode:: pycon - - >>> a = np.array([1, 2, 3, 4]) - >>> a + 1 - array([2, 3, 4, 5]) - >>> 2**a - array([ 2, 4, 8, 16]) - -All arithmetic operates elementwise: - -.. sourcecode:: pycon - - >>> b = np.ones(4) + 1 - >>> a - b - array([-1., 0., 1., 2.]) - >>> a * b - array([2., 4., 6., 8.]) - - >>> j = np.arange(5) - >>> 2**(j + 1) - j - array([ 2, 3, 6, 13, 28]) - -These operations are of course much faster than if you did them in pure python: - -.. sourcecode:: pycon - - >>> a = np.arange(10000) - >>> %timeit a + 1 # doctest: +SKIP - 10000 loops, best of 3: 24.3 us per loop - >>> l = range(10000) - >>> %timeit [i+1 for i in l] # doctest: +SKIP - 1000 loops, best of 3: 861 us per loop - - -.. warning:: **Array multiplication is not matrix multiplication:** - - .. sourcecode:: pycon - - >>> c = np.ones((3, 3)) - >>> c * c # NOT matrix multiplication! - array([[1., 1., 1.], - [1., 1., 1.], - [1., 1., 1.]]) - -.. note:: **Matrix multiplication:** - - .. sourcecode:: pycon - - >>> c @ c - array([[3., 3., 3.], - [3., 3., 3.], - [3., 3., 3.]]) - -.. topic:: **Exercise: Elementwise operations** - :class: green - - * Try simple arithmetic elementwise operations: add even elements - with odd elements - * Time them against their pure python counterparts using ``%timeit``. - * Generate: - - * ``[2**0, 2**1, 2**2, 2**3, 2**4]`` - * ``a_j = 2^(3*j) - j`` - - -Other operations -................ - -**Comparisons:** - -.. sourcecode:: pycon - - >>> a = np.array([1, 2, 3, 4]) - >>> b = np.array([4, 2, 2, 4]) - >>> a == b - array([False, True, False, True]) - >>> a > b - array([False, False, True, False]) - -.. tip:: - - Array-wise comparisons: - - .. sourcecode:: pycon - - >>> a = np.array([1, 2, 3, 4]) - >>> b = np.array([4, 2, 2, 4]) - >>> c = np.array([1, 2, 3, 4]) - >>> np.array_equal(a, b) - False - >>> np.array_equal(a, c) - True - - -**Logical operations:** - -.. sourcecode:: pycon - - >>> a = np.array([1, 1, 0, 0], dtype=bool) - >>> b = np.array([1, 0, 1, 0], dtype=bool) - >>> np.logical_or(a, b) - array([ True, True, True, False]) - >>> np.logical_and(a, b) - array([ True, False, False, False]) - -**Transcendental functions:** - -.. sourcecode:: pycon - - >>> a = np.arange(5) - >>> np.sin(a) - array([ 0. , 0.84147098, 0.90929743, 0.14112001, -0.7568025 ]) - >>> np.exp(a) - array([ 1. , 2.71828183, 7.3890561 , 20.08553692, 54.59815003]) - >>> np.log(np.exp(a)) - array([0., 1., 2., 3., 4.]) - - -**Shape mismatches** - -.. sourcecode:: pycon - - >>> a = np.arange(4) - >>> a + np.array([1, 2]) - Traceback (most recent call last): - File "", line 1, in - ValueError: operands could not be broadcast together with shapes (4,) (2,) - -*Broadcasting?* We'll return to that :ref:`later `. - -**Transposition:** - -.. sourcecode:: pycon - - >>> a = np.triu(np.ones((3, 3)), 1) # see help(np.triu) - >>> a - array([[0., 1., 1.], - [0., 0., 1.], - [0., 0., 0.]]) - >>> a.T - array([[0., 0., 0.], - [1., 0., 0.], - [1., 1., 0.]]) - - -.. note:: **The transposition is a view** - - The transpose returns a *view* of the original array:: - - >>> a = np.arange(9).reshape(3, 3) - >>> a.T[0, 2] = 999 - >>> a.T - array([[ 0, 3, 999], - [ 1, 4, 7], - [ 2, 5, 8]]) - >>> a - array([[ 0, 1, 2], - [ 3, 4, 5], - [999, 7, 8]]) - -.. note:: **Linear algebra** - - The sub-module :mod:`numpy.linalg` implements basic linear algebra, such as - solving linear systems, singular value decomposition, etc. However, it is - not guaranteed to be compiled using efficient routines, and thus we - recommend the use of :mod:`scipy.linalg`, as detailed in section - :ref:`scipy_linalg` - -.. topic:: Exercise other operations - :class: green - - * Look at the help for ``np.allclose``. When might this be useful? - * Look at the help for ``np.triu`` and ``np.tril``. - - -Basic reductions ----------------- - -Computing sums -.............. - -.. sourcecode:: pycon - - >>> x = np.array([1, 2, 3, 4]) - >>> np.sum(x) - np.int64(10) - >>> x.sum() - np.int64(10) - -.. image:: images/reductions.png - :align: right - -Sum by rows and by columns: - -.. sourcecode:: pycon - - >>> x = np.array([[1, 1], [2, 2]]) - >>> x - array([[1, 1], - [2, 2]]) - >>> x.sum(axis=0) # columns (first dimension) - array([3, 3]) - >>> x[:, 0].sum(), x[:, 1].sum() - (np.int64(3), np.int64(3)) - >>> x.sum(axis=1) # rows (second dimension) - array([2, 4]) - >>> x[0, :].sum(), x[1, :].sum() - (np.int64(2), np.int64(4)) - -.. tip:: - - Same idea in higher dimensions: - - .. sourcecode:: pycon - - >>> rng = np.random.default_rng(27446968) - >>> x = rng.random((2, 2, 2)) - >>> x.sum(axis=2)[0, 1] - np.float64(0.73415...) - >>> x[0, 1, :].sum() - np.float64(0.73415...) - -Other reductions -................ - ---- works the same way (and take ``axis=``) - -**Extrema:** - -.. sourcecode:: pycon - - >>> x = np.array([1, 3, 2]) - >>> x.min() - np.int64(1) - >>> x.max() - np.int64(3) - - >>> x.argmin() # index of minimum - np.int64(0) - >>> x.argmax() # index of maximum - np.int64(1) - -**Logical operations:** - -.. sourcecode:: pycon - - >>> np.all([True, True, False]) - np.False_ - >>> np.any([True, True, False]) - np.True_ - -.. note:: - - Can be used for array comparisons: - - .. sourcecode:: pycon - - >>> a = np.zeros((100, 100)) - >>> np.any(a != 0) - np.False_ - >>> np.all(a == a) - np.True_ - - >>> a = np.array([1, 2, 3, 2]) - >>> b = np.array([2, 2, 3, 2]) - >>> c = np.array([6, 4, 4, 5]) - >>> ((a <= b) & (b <= c)).all() - np.True_ - -**Statistics:** - -.. sourcecode:: pycon - - >>> x = np.array([1, 2, 3, 1]) - >>> y = np.array([[1, 2, 3], [5, 6, 1]]) - >>> x.mean() - np.float64(1.75) - >>> np.median(x) - np.float64(1.5) - >>> np.median(y, axis=-1) # last axis - array([2., 5.]) - - >>> x.std() # full population standard dev. - np.float64(0.82915619758884995) - - -... and many more (best to learn as you go). - -.. topic:: **Exercise: Reductions** - :class: green - - * Given there is a ``sum``, what other function might you expect to see? - * What is the difference between ``sum`` and ``cumsum``? - - -.. topic:: Worked Example: diffusion using a random walk algorithm - - .. image:: random_walk.png - :align: center - - .. tip:: - - Let us consider a simple 1D random walk process: at each time step a - walker jumps right or left with equal probability. - - We are interested in finding the typical distance from the origin of a - random walker after ``t`` left or right jumps? We are going to - simulate many "walkers" to find this law, and we are going to do so - using array computing tricks: we are going to create a 2D array with - the "stories" (each walker has a story) in one direction, and the - time in the other: - - .. only:: latex - - .. image:: random_walk_schema.png - :align: center - - .. only:: html - - .. image:: random_walk_schema.png - :align: center - :width: 100% - - .. sourcecode:: pycon - - >>> n_stories = 1000 # number of walkers - >>> t_max = 200 # time during which we follow the walker - - We randomly choose all the steps 1 or -1 of the walk: - - .. sourcecode:: pycon - - >>> t = np.arange(t_max) - >>> rng = np.random.default_rng() - >>> steps = 2 * rng.integers(0, 1 + 1, (n_stories, t_max)) - 1 # +1 because the high value is exclusive - >>> np.unique(steps) # Verification: all steps are 1 or -1 - array([-1, 1]) - - We build the walks by summing steps along the time: - - .. sourcecode:: pycon - - >>> positions = np.cumsum(steps, axis=1) # axis = 1: dimension of time - >>> sq_distance = positions**2 - - We get the mean in the axis of the stories: - - .. sourcecode:: pycon - - >>> mean_sq_distance = np.mean(sq_distance, axis=0) - - Plot the results: - - .. sourcecode:: pycon - - >>> plt.figure(figsize=(4, 3)) -
- >>> plt.plot(t, np.sqrt(mean_sq_distance), 'g.', t, np.sqrt(t), 'y-') - [, ] - >>> plt.xlabel(r"$t$") - Text(...'$t$') - >>> plt.ylabel(r"$\sqrt{\langle (\delta x)^2 \rangle}$") - Text(...'$\\sqrt{\\langle (\\delta x)^2 \\rangle}$') - >>> plt.tight_layout() # provide sufficient space for labels - - .. image:: auto_examples/images/sphx_glr_plot_randomwalk_001.png - :width: 50% - :target: auto_examples/plot_randomwalk.html - :align: center - - We find a well-known result in physics: the RMS distance grows as the - square root of the time! - - -.. arithmetic: sum/prod/mean/std - -.. extrema: min/max - -.. logical: all/any - -.. the axis argument - -.. EXE: verify if all elements in an array are equal to 1 -.. EXE: verify if any elements in an array are equal to 1 -.. EXE: load data with loadtxt from a file, and compute its basic statistics - -.. CHA: implement mean and std using only sum() - -.. _broadcasting: - -Broadcasting ------------- - -* Basic operations on ``numpy`` arrays (addition, etc.) are elementwise - -* This works on arrays of the same size. - - | **Nevertheless**, It's also possible to do operations on arrays of different - | sizes if *NumPy* can transform these arrays so that they all have - | the same size: this conversion is called **broadcasting**. - -The image below gives an example of broadcasting: - -.. only:: latex - - .. image:: images/numpy_broadcasting.png - :align: center - -.. only:: html - - .. image:: images/numpy_broadcasting.png - :align: center - :width: 100% - -Let's verify: - -.. sourcecode:: pycon - - >>> a = np.tile(np.arange(0, 40, 10), (3, 1)).T - >>> a - array([[ 0, 0, 0], - [10, 10, 10], - [20, 20, 20], - [30, 30, 30]]) - >>> b = np.array([0, 1, 2]) - >>> a + b - array([[ 0, 1, 2], - [10, 11, 12], - [20, 21, 22], - [30, 31, 32]]) - -We have already used broadcasting without knowing it!: - -.. sourcecode:: pycon - - >>> a = np.ones((4, 5)) - >>> a[0] = 2 # we assign an array of dimension 0 to an array of dimension 1 - >>> a - array([[2., 2., 2., 2., 2.], - [1., 1., 1., 1., 1.], - [1., 1., 1., 1., 1.], - [1., 1., 1., 1., 1.]]) - -A useful trick: - -.. sourcecode:: pycon - - >>> a = np.arange(0, 40, 10) - >>> a.shape - (4,) - >>> a = a[:, np.newaxis] # adds a new axis -> 2D array - >>> a.shape - (4, 1) - >>> a - array([[ 0], - [10], - [20], - [30]]) - >>> a + b - array([[ 0, 1, 2], - [10, 11, 12], - [20, 21, 22], - [30, 31, 32]]) - - -.. tip:: - - Broadcasting seems a bit magical, but it is actually quite natural to - use it when we want to solve a problem whose output data is an array - with more dimensions than input data. - -.. topic:: Worked Example: Broadcasting - :class: green - - Let's construct an array of distances (in miles) between cities of - Route 66: Chicago, Springfield, Saint-Louis, Tulsa, Oklahoma City, - Amarillo, Santa Fe, Albuquerque, Flagstaff and Los Angeles. - - .. sourcecode:: pycon - - >>> mileposts = np.array([0, 198, 303, 736, 871, 1175, 1475, 1544, - ... 1913, 2448]) - >>> distance_array = np.abs(mileposts - mileposts[:, np.newaxis]) - >>> distance_array - array([[ 0, 198, 303, 736, 871, 1175, 1475, 1544, 1913, 2448], - [ 198, 0, 105, 538, 673, 977, 1277, 1346, 1715, 2250], - [ 303, 105, 0, 433, 568, 872, 1172, 1241, 1610, 2145], - [ 736, 538, 433, 0, 135, 439, 739, 808, 1177, 1712], - [ 871, 673, 568, 135, 0, 304, 604, 673, 1042, 1577], - [1175, 977, 872, 439, 304, 0, 300, 369, 738, 1273], - [1475, 1277, 1172, 739, 604, 300, 0, 69, 438, 973], - [1544, 1346, 1241, 808, 673, 369, 69, 0, 369, 904], - [1913, 1715, 1610, 1177, 1042, 738, 438, 369, 0, 535], - [2448, 2250, 2145, 1712, 1577, 1273, 973, 904, 535, 0]]) - - - .. image:: images/route66.png - :align: center - :scale: 60 - -A lot of grid-based or network-based problems can also use -broadcasting. For instance, if we want to compute the distance from -the origin of points on a 5x5 grid, we can do - -.. sourcecode:: pycon - - >>> x, y = np.arange(5), np.arange(5)[:, np.newaxis] - >>> distance = np.sqrt(x ** 2 + y ** 2) - >>> distance - array([[0. , 1. , 2. , 3. , 4. ], - [1. , 1.41421356, 2.23606798, 3.16227766, 4.12310563], - [2. , 2.23606798, 2.82842712, 3.60555128, 4.47213595], - [3. , 3.16227766, 3.60555128, 4.24264069, 5. ], - [4. , 4.12310563, 4.47213595, 5. , 5.65685425]]) - -Or in color: - -.. sourcecode:: pycon - - >>> plt.pcolor(distance) - - >>> plt.colorbar() - - -.. image:: auto_examples/images/sphx_glr_plot_distances_001.png - :width: 50% - :target: auto_examples/plot_distances.html - :align: center - - -**Remark** : the :func:`numpy.ogrid` function allows to directly create vectors x -and y of the previous example, with two "significant dimensions": - -.. sourcecode:: pycon - - >>> x, y = np.ogrid[0:5, 0:5] - >>> x, y - (array([[0], - [1], - [2], - [3], - [4]]), array([[0, 1, 2, 3, 4]])) - >>> x.shape, y.shape - ((5, 1), (1, 5)) - >>> distance = np.sqrt(x ** 2 + y ** 2) - -.. tip:: - - So, ``np.ogrid`` is very useful as soon as we have to handle - computations on a grid. On the other hand, ``np.mgrid`` directly - provides matrices full of indices for cases where we can't (or don't - want to) benefit from broadcasting: - - .. sourcecode:: pycon - - >>> x, y = np.mgrid[0:4, 0:4] - >>> x - array([[0, 0, 0, 0], - [1, 1, 1, 1], - [2, 2, 2, 2], - [3, 3, 3, 3]]) - >>> y - array([[0, 1, 2, 3], - [0, 1, 2, 3], - [0, 1, 2, 3], - [0, 1, 2, 3]]) - -.. rules - -.. some usage examples: scalars, 1-d matrix products - -.. newaxis - -.. EXE: add 1-d array to a scalar -.. EXE: add 1-d array to a 2-d array -.. EXE: multiply matrix from the right with a diagonal array -.. CHA: constructing grids -- meshgrid using only newaxis - -.. seealso:: - - :ref:`broadcasting_advanced`: discussion of broadcasting in - the :ref:`advanced_numpy` chapter. - - -Array shape manipulation ------------------------- - -Flattening -.......... - -.. sourcecode:: pycon - - >>> a = np.array([[1, 2, 3], [4, 5, 6]]) - >>> a.ravel() - array([1, 2, 3, 4, 5, 6]) - >>> a.T - array([[1, 4], - [2, 5], - [3, 6]]) - >>> a.T.ravel() - array([1, 4, 2, 5, 3, 6]) - -Higher dimensions: last dimensions ravel out "first". - -Reshaping -......... - -The inverse operation to flattening: - -.. sourcecode:: pycon - - >>> a.shape - (2, 3) - >>> b = a.ravel() - >>> b = b.reshape((2, 3)) - >>> b - array([[1, 2, 3], - [4, 5, 6]]) - -Or, - -.. sourcecode:: pycon - - >>> a.reshape((2, -1)) # unspecified (-1) value is inferred - array([[1, 2, 3], - [4, 5, 6]]) - -.. warning:: - - ``ndarray.reshape`` **may** return a view (cf ``help(np.reshape)``)), - or copy - -.. tip:: - - .. sourcecode:: pycon - - >>> b[0, 0] = 99 - >>> a - array([[99, 2, 3], - [ 4, 5, 6]]) - - Beware: reshape may also return a copy!: - - .. sourcecode:: pycon - - >>> a = np.zeros((3, 2)) - >>> b = a.T.reshape(3*2) - >>> b[0] = 9 - >>> a - array([[0., 0.], - [0., 0.], - [0., 0.]]) - - To understand this you need to learn more about the memory layout of a NumPy array. - -Adding a dimension -.................. - -Indexing with the ``np.newaxis`` object allows us to add an axis to an array -(you have seen this already above in the broadcasting section): - -.. sourcecode:: pycon - - >>> z = np.array([1, 2, 3]) - >>> z - array([1, 2, 3]) - - >>> z[:, np.newaxis] - array([[1], - [2], - [3]]) - - >>> z[np.newaxis, :] - array([[1, 2, 3]]) - - - -Dimension shuffling -................... - -.. sourcecode:: pycon - - >>> a = np.arange(4*3*2).reshape(4, 3, 2) - >>> a.shape - (4, 3, 2) - >>> a[0, 2, 1] - np.int64(5) - >>> b = a.transpose(1, 2, 0) - >>> b.shape - (3, 2, 4) - >>> b[2, 1, 0] - np.int64(5) - -Also creates a view: - -.. sourcecode:: pycon - - >>> b[2, 1, 0] = -1 - >>> a[0, 2, 1] - np.int64(-1) - -Resizing -........ - -Size of an array can be changed with ``ndarray.resize``: - -.. sourcecode:: pycon - - >>> a = np.arange(4) - >>> a.resize((8,)) - >>> a - array([0, 1, 2, 3, 0, 0, 0, 0]) - -However, it must not be referred to somewhere else: - -.. sourcecode:: pycon - - >>> b = a - >>> a.resize((4,)) - Traceback (most recent call last): - File "", line 1, in - ValueError: cannot resize an array that references or is referenced - by another array in this way. - Use the np.resize function or refcheck=False - -.. seealso: ``help(np.tensordot)`` - -.. resizing: how to do it, and *when* is it possible (not always!) - -.. reshaping (demo using an image?) - -.. dimension shuffling - -.. when to use: some pre-made algorithm (e.g. in Fortran) accepts only - 1-D data, but you'd like to vectorize it - -.. EXE: load data incrementally from a file, by appending to a resizing array -.. EXE: vectorize a pre-made routine that only accepts 1-D data -.. EXE: manipulating matrix direct product spaces back and forth (give an example from physics -- spin index and orbital indices) -.. EXE: shuffling dimensions when writing a general vectorized function -.. CHA: the mathematical 'vec' operation - -.. topic:: **Exercise: Shape manipulations** - :class: green - - * Look at the docstring for ``reshape``, especially the notes section which - has some more information about copies and views. - * Use ``flatten`` as an alternative to ``ravel``. What is the difference? - (Hint: check which one returns a view and which a copy) - * Experiment with ``transpose`` for dimension shuffling. - -Sorting data ------------- - -Sorting along an axis: - -.. sourcecode:: pycon - - >>> a = np.array([[4, 3, 5], [1, 2, 1]]) - >>> b = np.sort(a, axis=1) - >>> b - array([[3, 4, 5], - [1, 1, 2]]) - -.. note:: Sorts each row separately! - -In-place sort: - -.. sourcecode:: pycon - - >>> a.sort(axis=1) - >>> a - array([[3, 4, 5], - [1, 1, 2]]) - -Sorting with fancy indexing: - -.. sourcecode:: pycon - - >>> a = np.array([4, 3, 1, 2]) - >>> j = np.argsort(a) - >>> j - array([2, 3, 1, 0]) - >>> a[j] - array([1, 2, 3, 4]) - -Finding minima and maxima: - -.. sourcecode:: pycon - - >>> a = np.array([4, 3, 1, 2]) - >>> j_max = np.argmax(a) - >>> j_min = np.argmin(a) - >>> j_max, j_min - (np.int64(0), np.int64(2)) - - -.. XXX: need a frame for summaries - - * Arithmetic etc. are elementwise operations - * Basic linear algebra, ``@`` - * Reductions: ``sum(axis=1)``, ``std()``, ``all()``, ``any()`` - * Broadcasting: ``a = np.arange(4); a[:,np.newaxis] + a[np.newaxis,:]`` - * Shape manipulation: ``a.ravel()``, ``a.reshape(2, 2)`` - * Fancy indexing: ``a[a > 3]``, ``a[[2, 3]]`` - * Sorting data: ``.sort()``, ``np.sort``, ``np.argsort``, ``np.argmax`` - -.. topic:: **Exercise: Sorting** - :class: green - - * Try both in-place and out-of-place sorting. - * Try creating arrays with different dtypes and sorting them. - * Use ``all`` or ``array_equal`` to check the results. - * Look at ``np.random.shuffle`` for a way to create sortable input quicker. - * Combine ``ravel``, ``sort`` and ``reshape``. - * Look at the ``axis`` keyword for ``sort`` and rewrite the previous - exercise. - -Summary -------- - -**What do you need to know to get started?** - -* Know how to create arrays : ``array``, ``arange``, ``ones``, - ``zeros``. - -* Know the shape of the array with ``array.shape``, then use slicing - to obtain different views of the array: ``array[::2]``, - etc. Adjust the shape of the array using ``reshape`` or flatten it - with ``ravel``. - -* Obtain a subset of the elements of an array and/or modify their values - with masks - - .. sourcecode:: pycon - - >>> a[a < 0] = 0 - -* Know miscellaneous operations on arrays, such as finding the mean or max - (``array.max()``, ``array.mean()``). No need to retain everything, but - have the reflex to search in the documentation (online docs, - ``help()``)!! - -* For advanced use: master the indexing with arrays of integers, as well as - broadcasting. Know more NumPy functions to handle various array - operations. - -.. topic:: **Quick read** - - If you want to do a first quick pass through the Scientific Python Lectures - to learn the ecosystem, you can directly skip to the next chapter: - :ref:`matplotlib`. - - The remainder of this chapter is not necessary to follow the rest of - the intro part. But be sure to come back and finish this chapter, as - well as to do some more :ref:`exercises `. diff --git a/intro/numpy/solutions/1_1_array_creation.py b/intro/numpy/solutions/1_1_array_creation.py deleted file mode 100644 index 23a26543b..000000000 --- a/intro/numpy/solutions/1_1_array_creation.py +++ /dev/null @@ -1,11 +0,0 @@ -import numpy as np - -a = np.ones((4, 4), dtype=int) -a[3, 1] = 6 -a[2, 3] = 2 - -b = np.zeros((6, 5)) -b[1:] = np.diag(np.arange(2, 7)) - -print(a) -print(b) diff --git a/intro/numpy/solutions/1_2_text_data.py b/intro/numpy/solutions/1_2_text_data.py deleted file mode 100644 index 4b5a90f8a..000000000 --- a/intro/numpy/solutions/1_2_text_data.py +++ /dev/null @@ -1,5 +0,0 @@ -import numpy as np - -data = np.loadtxt("../../../data/populations.txt") -reduced_data = data[5:, :-1] -np.savetxt("pop2.txt", reduced_data) diff --git a/intro/numpy/solutions/1_3_tiling.py b/intro/numpy/solutions/1_3_tiling.py deleted file mode 100644 index 87af57ccf..000000000 --- a/intro/numpy/solutions/1_3_tiling.py +++ /dev/null @@ -1,6 +0,0 @@ -import numpy as np - -block = np.array([[4, 3], [2, 1]]) -a = np.tile(block, (2, 3)) - -print(a) diff --git a/intro/numpy/solutions/2_2_data_statistics.py b/intro/numpy/solutions/2_2_data_statistics.py index 7c26ad387..9854ba9c0 100644 --- a/intro/numpy/solutions/2_2_data_statistics.py +++ b/intro/numpy/solutions/2_2_data_statistics.py @@ -1,6 +1,6 @@ import numpy as np -data = np.loadtxt("../../../data/populations.txt") +data = np.loadtxt("../data/populations.txt") year, hares, lynxes, carrots = data.T populations = data[:, 1:] diff --git a/intro/scipy/data b/intro/scipy/data new file mode 120000 index 000000000..e67b45590 --- /dev/null +++ b/intro/scipy/data @@ -0,0 +1 @@ +../../data \ No newline at end of file diff --git a/intro/scipy/examples/solutions/plot_fft_image_denoise.py b/intro/scipy/examples/solutions/plot_fft_image_denoise.py deleted file mode 100644 index a0c4890a2..000000000 --- a/intro/scipy/examples/solutions/plot_fft_image_denoise.py +++ /dev/null @@ -1,111 +0,0 @@ -r""" -====================== -Image denoising by FFT -====================== - -Denoise an image (:download:`../../../../data/moonlanding.png`) by -implementing a blur with an FFT. - -Implements, via FFT, the following convolution: - -.. math:: - - f_1(t) = \int dt'\, K(t-t') f_0(t') - -.. math:: - - \tilde{f}_1(\omega) = \tilde{K}(\omega) \tilde{f}_0(\omega) - -""" - -############################################################ -# Read and plot the image -############################################################ -import numpy as np -import matplotlib.pyplot as plt - -im = plt.imread("../../../../data/moonlanding.png").astype(float) - -plt.figure() -plt.imshow(im, "gray") -plt.title("Original image") - - -############################################################ -# Compute the 2d FFT of the input image -############################################################ -import scipy as sp - -im_fft = sp.fft.fft2(im) - -# Show the results - - -def plot_spectrum(im_fft): - from matplotlib.colors import LogNorm - - # A logarithmic colormap - plt.imshow(np.abs(im_fft), norm=LogNorm(vmin=5)) - plt.colorbar() - - -plt.figure() -plot_spectrum(im_fft) -plt.title("Fourier transform") - -############################################################ -# Filter in FFT -############################################################ - -# In the lines following, we'll make a copy of the original spectrum and -# truncate coefficients. - -# Define the fraction of coefficients (in each direction) we keep -keep_fraction = 0.1 - -# Call ff a copy of the original transform. NumPy arrays have a copy -# method for this purpose. -im_fft2 = im_fft.copy() - -# Set r and c to be the number of rows and columns of the array. -r, c = im_fft2.shape - -# Set to zero all rows with indices between r*keep_fraction and -# r*(1-keep_fraction): -im_fft2[int(r * keep_fraction) : int(r * (1 - keep_fraction))] = 0 - -# Similarly with the columns: -im_fft2[:, int(c * keep_fraction) : int(c * (1 - keep_fraction))] = 0 - -plt.figure() -plot_spectrum(im_fft2) -plt.title("Filtered Spectrum") - - -############################################################ -# Reconstruct the final image -############################################################ - -# Reconstruct the denoised image from the filtered spectrum, keep only the -# real part for display. -im_new = sp.fft.ifft2(im_fft2).real - -plt.figure() -plt.imshow(im_new, "gray") -plt.title("Reconstructed Image") - - -############################################################ -# Easier and better: :func:`scipy.ndimage.gaussian_filter` -############################################################ -# -# Implementing filtering directly with FFTs is tricky and time consuming. -# We can use the Gaussian filter from :mod:`scipy.ndimage` - -im_blur = sp.ndimage.gaussian_filter(im, 4) - -plt.figure() -plt.imshow(im_blur, "gray") -plt.title("Blurred image") - -plt.show() diff --git a/intro/scipy/examples/solutions/plot_image_blur.py b/intro/scipy/examples/solutions/plot_image_blur.py deleted file mode 100644 index 19b1d594a..000000000 --- a/intro/scipy/examples/solutions/plot_image_blur.py +++ /dev/null @@ -1,89 +0,0 @@ -""" -======================================================= -Simple image blur by convolution with a Gaussian kernel -======================================================= - -Blur an an image (:download:`../../../../data/elephant.png`) using a -Gaussian kernel. - -Convolution is easy to perform with FFT: convolving two signals boils -down to multiplying their FFTs (and performing an inverse FFT). - -""" - -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - -##################################################################### -# The original image -##################################################################### - -# read image -img = plt.imread("../../../../data/elephant.png") -plt.figure() -plt.imshow(img) - -##################################################################### -# Prepare an Gaussian convolution kernel -##################################################################### - -# First a 1-D Gaussian -t = np.linspace(-10, 10, 30) -bump = np.exp(-0.1 * t**2) -bump /= np.trapezoid(bump) # normalize the integral to 1 - -# make a 2-D kernel out of it -kernel = bump[:, np.newaxis] * bump[np.newaxis, :] - -##################################################################### -# Implement convolution via FFT -##################################################################### - -# Padded fourier transform, with the same shape as the image -# We use :func:`scipy.fft.fft2` to have a 2D FFT -kernel_ft = sp.fft.fft2(kernel, s=img.shape[:2], axes=(0, 1)) - -# convolve -img_ft = sp.fft.fft2(img, axes=(0, 1)) -# the 'newaxis' is to match to color direction -img2_ft = kernel_ft[:, :, np.newaxis] * img_ft -img2 = sp.fft.ifft2(img2_ft, axes=(0, 1)).real - -# clip values to range -img2 = np.clip(img2, 0, 1) - -# plot output -plt.figure() -plt.imshow(img2) - -##################################################################### -# Further exercise (only if you are familiar with this stuff): -# -# A "wrapped border" appears in the upper left and top edges of the -# image. This is because the padding is not done correctly, and does -# not take the kernel size into account (so the convolution "flows out -# of bounds of the image"). Try to remove this artifact. - - -##################################################################### -# A function to do it: :func:`scipy.signal.fftconvolve` -##################################################################### -# -# The above exercise was only for didactic reasons: there exists a -# function in scipy that will do this for us, and probably do a better -# job: :func:`scipy.signal.fftconvolve` - -# mode='same' is there to enforce the same output shape as input arrays -# (ie avoid border effects) -img3 = sp.signal.fftconvolve(img, kernel[:, :, np.newaxis], mode="same") -plt.figure() -plt.imshow(img3) - -##################################################################### -# Note that we still have a decay to zero at the border of the image. -# Using :func:`scipy.ndimage.gaussian_filter` would get rid of this -# artifact - - -plt.show() diff --git a/intro/scipy/examples/solutions/plot_periodicity_finder.py b/intro/scipy/examples/solutions/plot_periodicity_finder.py deleted file mode 100644 index f2b13c890..000000000 --- a/intro/scipy/examples/solutions/plot_periodicity_finder.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -========================== -Crude periodicity finding -========================== - -Discover the periods in evolution of animal populations -(:download:`../../../../data/populations.txt`) -""" - -############################################################ -# Load the data -############################################################ - -import numpy as np - -data = np.loadtxt("../../../../data/populations.txt") -years = data[:, 0] -populations = data[:, 1:] - -############################################################ -# Plot the data -############################################################ - -import matplotlib.pyplot as plt - -plt.figure() -plt.plot(years, populations * 1e-3) -plt.xlabel("Year") -plt.ylabel(r"Population number ($\cdot10^3$)") -plt.legend(["hare", "lynx", "carrot"], loc=1) - -############################################################ -# Plot its periods -############################################################ -import scipy as sp - -ft_populations = sp.fft.fft(populations, axis=0) -frequencies = sp.fft.fftfreq(populations.shape[0], years[1] - years[0]) -periods = 1 / frequencies - -plt.figure() -plt.plot(periods, abs(ft_populations) * 1e-3, "o") -plt.xlim(0, 22) -plt.xlabel("Period") -plt.ylabel(r"Power ($\cdot10^3$)") - -plt.show() - -############################################################ -# There's probably a period of around 10 years (obvious from the -# plot), but for this crude a method, there's not enough data to say -# much more. diff --git a/intro/scipy/image_processing/image_processing.md b/intro/scipy/image_processing/image_processing.md new file mode 100644 index 000000000..4fa283ff9 --- /dev/null +++ b/intro/scipy/image_processing/image_processing.md @@ -0,0 +1,334 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +(scipy-image-processing)= + +# Geometrical transformations on images + +{mod}`scipy.ndimage` provides manipulation of n-dimensional arrays as +images. + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +``` + +## Changing orientation, resolution, .. + +```{code-cell} +import scipy as sp +``` + +```{code-cell} +# Load an image +face = sp.datasets.face(gray=True) +``` + +```{code-cell} +# Shift, rotate and zoom it +shifted_face = sp.ndimage.shift(face, (50, 50)) +shifted_face2 = sp.ndimage.shift(face, (50, 50), mode='nearest') +rotated_face = sp.ndimage.rotate(face, 30) +cropped_face = face[50:-50, 50:-50] +zoomed_face = sp.ndimage.zoom(face, 2) +zoomed_face.shape +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(15, 3)) +fig, axes = plt.subplots(1, 5) +for i, arr in enumerate([shifted_face, + shifted_face2, + rotated_face, + cropped_face, + zoomed_face]): + axes[i].imshow(arr, cmap="gray") + axes[i].axis("off") + +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.99); +``` + +## Image filtering + +Generate a noisy face: + +```{code-cell} +face = sp.datasets.face(gray=True) +face = face[:512, -512:] # crop out square on right +noisy_face = np.copy(face).astype(float) +rng = np.random.default_rng() +noisy_face += face.std() * 0.5 * rng.standard_normal(face.shape) +``` + +Apply a variety of filters on it: + +```{code-cell} +blurred_face = sp.ndimage.gaussian_filter(noisy_face, sigma=3) +median_face = sp.ndimage.median_filter(noisy_face, size=5) +wiener_face = sp.signal.wiener(noisy_face, (5, 5)) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(12, 3.5)) +fig, axes = plt.subplots(1, 4) +for i, (arr, name) in enumerate([[noisy_face, 'noisy'], + [blurred_face, 'Gaussian filter'], + [median_face, 'median filter'], + [wiener_face, 'Wiener filter']]): + axes[i].imshow(arr, cmap="gray") + axes[i].set_title(name) + axes[i].axis("off") + +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.99) +``` + +Other filters in {mod}`scipy.ndimage.filters` and {mod}`scipy.signal` +can be applied to images. + +::: {exercise-start} +:label: compare-histograms +:class: dropdown +::: + +Compare histograms for the different filtered images. + +::: {exercise-end} +::: + ++++ + +## Mathematical morphology + +::: {note} +:class: dropdown + +[Mathematical morphology](https://en.wikipedia.org/wiki/Mathematical_morphology) stems from set +theory. It characterizes and transforms geometrical structures. Binary +(black and white) images, in particular, can be transformed using this +theory: the sets to be transformed are the sets of neighboring +non-zero-valued pixels. The theory was also extended to gray-valued +images. +::: + +![](morpho_mat.png) + +Mathematical-morphology operations use a _structuring element_ +in order to modify geometrical structures. + +Let us first generate a structuring element: + +```{code-cell} +el = sp.ndimage.generate_binary_structure(2, 1) +el +``` + +```{code-cell} +el.astype(int) +``` + +- **Erosion** {func}`scipy.ndimage.binary_erosion` + +```{code-cell} +a = np.zeros((7, 7), dtype=int) +a[1:6, 2:5] = 1 +a +``` + +```{code-cell} +sp.ndimage.binary_erosion(a).astype(a.dtype) +``` + +```{code-cell} +# Erosion removes objects smaller than the structure +sp.ndimage.binary_erosion(a, structure=np.ones((5,5))).astype(a.dtype) +``` + +- **Dilation** {func}`scipy.ndimage.binary_dilation` + +```{code-cell} +a = np.zeros((5, 5)) +a[2, 2] = 1 +a +``` + +```{code-cell} +sp.ndimage.binary_dilation(a).astype(a.dtype) +``` + +- **Opening** {func}`scipy.ndimage.binary_opening` + +```{code-cell} +a = np.zeros((5, 5), dtype=int) +a[1:4, 1:4] = 1 +a[4, 4] = 1 +a +``` + +```{code-cell} +# Opening removes small objects +sp.ndimage.binary_opening(a, structure=np.ones((3, 3))).astype(int) +``` + +```{code-cell} +# Opening can also smooth corners +sp.ndimage.binary_opening(a).astype(int) +``` + +- **Closing:** {func}`scipy.ndimage.binary_closing` + +::: {exercise-start} +:label: closing-exercise +:class: dropdown +::: + +Check that opening amounts to eroding, then dilating. + +::: {exercise-end} +::: + +An opening operation removes small structures, while a closing operation +fills small holes. Such operations can therefore be used to "clean" an +image. + +```{code-cell} +a = np.zeros((50, 50)) +a[10:-10, 10:-10] = 1 +rng = np.random.default_rng() +a += 0.25 * rng.standard_normal(a.shape) +mask = a>=0.5 +opened_mask = sp.ndimage.binary_opening(mask) +closed_mask = sp.ndimage.binary_closing(opened_mask) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(12, 3.5)) +for i, (arr, name) in enumerate([[a, 'a'], + [mask, 'mask'], + [opened_mask, 'opened_mask'], + [closed_mask, 'closed_mask']]): + axes[i].imshow(shifted_face, cmap="gray") + axes[i].set_title(name) + axes[i].axis("off") + +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.99) +``` + +::: {exercise-start} +:label: reconstructed-square +:class: dropdown +::: + +Check that the area of the reconstructed square is smaller +than the area of the initial square. (The opposite would occur if the +closing step was performed _before_ the opening). + +::: {exercise-end} +::: + +For _gray-valued_ images, eroding (resp. dilating) amounts to replacing +a pixel by the minimal (resp. maximal) value among pixels covered by the +structuring element centered on the pixel of interest. + +```{code-cell} +a = np.zeros((7, 7), dtype=int) +a[1:6, 1:6] = 3 +a[4, 4] = 2; a[2, 3] = 1 +a +``` + +```{code-cell} +sp.ndimage.grey_erosion(a, size=(3, 3)) +``` + +## Connected components and measurements on images + +Let us first generate a nice synthetic binary image. + +```{code-cell} +x, y = np.indices((100, 100)) +sig = np.sin(2*np.pi*x/50.) * np.sin(2*np.pi*y/50.) * (1+x*y/50.**2)**2 +mask = sig > 1 +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(7, 3.5)) +fig, axes = plt.subplots(1, 2) +axes[0].imshow(sig) +axes[0].axis("off") +axes[0].set_title("sig") +axes[1].imshow(mask, cmap="gray") +axes[1].axis("off") +axes[1].set_title("mask") +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.9); +``` + +{func}`scipy.ndimage.label` assigns a different label to each connected +component: + +```{code-cell} +labels, nb = sp.ndimage.label(mask) +nb +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(3.5, 3.5)) +plt.imshow(labels) +plt.title("label") +plt.axis("off") + +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.9) +``` + +Now compute measurements on each connected component: + +```{code-cell} +areas = sp.ndimage.sum(mask, labels, range(1, labels.max()+1)) +areas # The number of pixels in each connected component +``` + +```{code-cell} +maxima = sp.ndimage.maximum(sig, labels, range(1, labels.max()+1)) +maxima # The maximum signal in each connected component +``` + +Extract the 4th connected component, and crop the array around it: + +```{code-cell} +sl_3 = sp.ndimage.find_objects(labels)[3] +sl_3 +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(3.5, 3.5)) +plt.imshow(sig[sl_3]) +plt.title("Cropped connected component") +plt.axis("off") + +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.9) +``` + +See the summary exercise on {ref}`summary-exercise-image-processing` for a more +advanced example. diff --git a/intro/scipy/image_processing/image_processing.rst b/intro/scipy/image_processing/image_processing.rst deleted file mode 100644 index a8af7ffbd..000000000 --- a/intro/scipy/image_processing/image_processing.rst +++ /dev/null @@ -1,301 +0,0 @@ -:orphan: - -.. for doctests - >>> import matplotlib.pyplot as plt - -:mod:`scipy.ndimage` provides manipulation of n-dimensional arrays as -images. - -Geometrical transformations on images -....................................... - -Changing orientation, resolution, .. :: - - >>> import scipy as sp - - >>> # Load an image - >>> face = sp.datasets.face(gray=True) - - >>> # Shift, rotate and zoom it - >>> shifted_face = sp.ndimage.shift(face, (50, 50)) - >>> shifted_face2 = sp.ndimage.shift(face, (50, 50), mode='nearest') - >>> rotated_face = sp.ndimage.rotate(face, 30) - >>> cropped_face = face[50:-50, 50:-50] - >>> zoomed_face = sp.ndimage.zoom(face, 2) - >>> zoomed_face.shape - (1536, 2048) - -.. image:: /intro/scipy/auto_examples/images/sphx_glr_plot_image_transform_001.png - :target: auto_examples/plot_image_transform.html - :scale: 70 - :align: center - - -:: - - >>> plt.subplot(151) - - - >>> plt.imshow(shifted_face, cmap=plt.cm.gray) - - - >>> plt.axis('off') - (np.float64(-0.5), np.float64(1023.5), np.float64(767.5), np.float64(-0.5)) - - >>> # etc. - - -Image filtering -................... - -Generate a noisy face:: - - >>> import scipy as sp - >>> face = sp.datasets.face(gray=True) - >>> face = face[:512, -512:] # crop out square on right - >>> import numpy as np - >>> noisy_face = np.copy(face).astype(float) - >>> rng = np.random.default_rng() - >>> noisy_face += face.std() * 0.5 * rng.standard_normal(face.shape) - -Apply a variety of filters on it:: - - >>> blurred_face = sp.ndimage.gaussian_filter(noisy_face, sigma=3) - >>> median_face = sp.ndimage.median_filter(noisy_face, size=5) - >>> wiener_face = sp.signal.wiener(noisy_face, (5, 5)) - -.. image:: /intro/scipy/auto_examples/images/sphx_glr_plot_image_filters_001.png - :target: auto_examples/plot_image_filters.html - :scale: 70 - :align: center - - -Other filters in :mod:`scipy.ndimage.filters` and :mod:`scipy.signal` -can be applied to images. - -.. topic:: Exercise - :class: green - - Compare histograms for the different filtered images. - -Mathematical morphology -........................ - -.. tip:: - - `Mathematical morphology - `_ stems from set - theory. It characterizes and transforms geometrical structures. Binary - (black and white) images, in particular, can be transformed using this - theory: the sets to be transformed are the sets of neighboring - non-zero-valued pixels. The theory was also extended to gray-valued - images. - -.. image:: /intro/scipy/image_processing/morpho_mat.png - :align: center - -Mathematical-morphology operations use a *structuring element* -in order to modify geometrical structures. - -Let us first generate a structuring element:: - - >>> el = sp.ndimage.generate_binary_structure(2, 1) - >>> el - array([[False, True, False], - [...True, True, True], - [False, True, False]]) - >>> el.astype(int) - array([[0, 1, 0], - [1, 1, 1], - [0, 1, 0]]) - -* **Erosion** :func:`scipy.ndimage.binary_erosion` :: - - >>> a = np.zeros((7, 7), dtype=int) - >>> a[1:6, 2:5] = 1 - >>> a - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 0, 0, 0, 0, 0]]) - >>> sp.ndimage.binary_erosion(a).astype(a.dtype) - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]]) - >>> # Erosion removes objects smaller than the structure - >>> sp.ndimage.binary_erosion(a, structure=np.ones((5,5))).astype(a.dtype) - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]]) - -* **Dilation** :func:`scipy.ndimage.binary_dilation` :: - - >>> a = np.zeros((5, 5)) - >>> a[2, 2] = 1 - >>> a - array([[0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0.], - [0., 0., 1., 0., 0.], - [0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0.]]) - >>> sp.ndimage.binary_dilation(a).astype(a.dtype) - array([[0., 0., 0., 0., 0.], - [0., 0., 1., 0., 0.], - [0., 1., 1., 1., 0.], - [0., 0., 1., 0., 0.], - [0., 0., 0., 0., 0.]]) - -* **Opening** :func:`scipy.ndimage.binary_opening` :: - - >>> a = np.zeros((5, 5), dtype=int) - >>> a[1:4, 1:4] = 1 - >>> a[4, 4] = 1 - >>> a - array([[0, 0, 0, 0, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 0, 0, 0, 1]]) - >>> # Opening removes small objects - >>> sp.ndimage.binary_opening(a, structure=np.ones((3, 3))).astype(int) - array([[0, 0, 0, 0, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 0, 0, 0, 0]]) - >>> # Opening can also smooth corners - >>> sp.ndimage.binary_opening(a).astype(int) - array([[0, 0, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 1, 1, 1, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 0, 0]]) - -* **Closing:** :func:`scipy.ndimage.binary_closing` - -.. topic:: Exercise - :class: green - - Check that opening amounts to eroding, then dilating. - -An opening operation removes small structures, while a closing operation -fills small holes. Such operations can therefore be used to "clean" an -image. :: - - >>> a = np.zeros((50, 50)) - >>> a[10:-10, 10:-10] = 1 - >>> rng = np.random.default_rng() - >>> a += 0.25 * rng.standard_normal(a.shape) - >>> mask = a>=0.5 - >>> opened_mask = sp.ndimage.binary_opening(mask) - >>> closed_mask = sp.ndimage.binary_closing(opened_mask) - -.. image:: /intro/scipy/auto_examples/images/sphx_glr_plot_mathematical_morpho_001.png - :target: auto_examples/plot_mathematical_morpho.html - :scale: 70 - :align: center - - -.. topic:: Exercise - :class: green - - Check that the area of the reconstructed square is smaller - than the area of the initial square. (The opposite would occur if the - closing step was performed *before* the opening). - -For *gray-valued* images, eroding (resp. dilating) amounts to replacing -a pixel by the minimal (resp. maximal) value among pixels covered by the -structuring element centered on the pixel of interest. :: - - >>> a = np.zeros((7, 7), dtype=int) - >>> a[1:6, 1:6] = 3 - >>> a[4, 4] = 2; a[2, 3] = 1 - >>> a - array([[0, 0, 0, 0, 0, 0, 0], - [0, 3, 3, 3, 3, 3, 0], - [0, 3, 3, 1, 3, 3, 0], - [0, 3, 3, 3, 3, 3, 0], - [0, 3, 3, 3, 2, 3, 0], - [0, 3, 3, 3, 3, 3, 0], - [0, 0, 0, 0, 0, 0, 0]]) - >>> sp.ndimage.grey_erosion(a, size=(3, 3)) - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 3, 2, 2, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]]) - - -Connected components and measurements on images -................................................ - -Let us first generate a nice synthetic binary image. :: - - >>> x, y = np.indices((100, 100)) - >>> sig = np.sin(2*np.pi*x/50.) * np.sin(2*np.pi*y/50.) * (1+x*y/50.**2)**2 - >>> mask = sig > 1 - -.. image:: /intro/scipy/auto_examples/images/sphx_glr_plot_connect_measurements_001.png - :target: auto_examples/plot_connect_measurements.html - :scale: 60 - :align: center - -.. image:: /intro/scipy/auto_examples/images/sphx_glr_plot_connect_measurements_002.png - :target: auto_examples/plot_connect_measurements.html - :scale: 60 - :align: right - -:func:`scipy.ndimage.label` assigns a different label to each connected -component:: - - >>> labels, nb = sp.ndimage.label(mask) - >>> nb - 8 - -.. raw:: html - -
- - -Now compute measurements on each connected component:: - - >>> areas = sp.ndimage.sum(mask, labels, range(1, labels.max()+1)) - >>> areas # The number of pixels in each connected component - array([190., 45., 424., 278., 459., 190., 549., 424.]) - >>> maxima = sp.ndimage.maximum(sig, labels, range(1, labels.max()+1)) - >>> maxima # The maximum signal in each connected component - array([ 1.80238238, 1.13527605, 5.51954079, 2.49611818, 6.71673619, - 1.80238238, 16.76547217, 5.51954079]) - -.. image:: /intro/scipy/auto_examples/images/sphx_glr_plot_connect_measurements_003.png - :target: auto_examples/plot_connect_measurements.html - :scale: 60 - :align: right - - -Extract the 4th connected component, and crop the array around it:: - - >>> sp.ndimage.find_objects(labels)[3] - (slice(30, 48, None), slice(30, 48, None)) - >>> sl = sp.ndimage.find_objects(labels)[3] - >>> import matplotlib.pyplot as plt - >>> plt.imshow(sig[sl]) - - - - -See the summary exercise on :ref:`summary_exercise_image_processing` for a more -advanced example. diff --git a/intro/scipy/index.md b/intro/scipy/index.md new file mode 100644 index 000000000..8609654ac --- /dev/null +++ b/intro/scipy/index.md @@ -0,0 +1,1474 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(scipy)= + +# SciPy: high-level scientific computing + +**Authors**: _Gaël Varoquaux, Adrien Chauve, Andre Espaze, Emmanuelle Gouillart, Ralf Gommers_ + +:::{admonition} Scipy +The {mod}`scipy` package contains various toolboxes dedicated to common +issues in scientific computing. Its different submodules correspond +to different applications, such as interpolation, integration, +optimization, image processing, statistics, special functions, etc. +::: + +::: {note} +:class: dropdown + +{mod}`scipy` can be compared to other standard scientific-computing +libraries, such as the GSL (GNU Scientific Library for C and C++), +or Matlab's toolboxes. `scipy` is the core package for scientific +routines in Python; it is meant to operate efficiently on `numpy` +arrays, so that NumPy and SciPy work hand in hand. + +Before implementing a routine, it is worth checking if the desired +data processing is not already implemented in SciPy. As +non-professional programmers, scientists often tend to **re-invent the +wheel**, which leads to buggy, non-optimal, difficult-to-share and +unmaintainable code. By contrast, `SciPy`'s routines are optimized +and tested, and should therefore be used when possible. +::: + +:::{warning} +This tutorial is far from an introduction to numerical computing. +As enumerating the different submodules and functions in SciPy would +be very boring, we concentrate instead on a few examples to give a +general idea of how to use `scipy` for scientific computing. +::: + +{mod}`scipy` is composed of task-specific sub-modules: + +| | | +| ------------------------ | -------------------------------------- | +| {mod}`scipy.cluster` | Vector quantization / Kmeans | +| {mod}`scipy.constants` | Physical and mathematical constants | +| {mod}`scipy.fft` | Fourier transform | +| {mod}`scipy.integrate` | Integration routines | +| {mod}`scipy.interpolate` | Interpolation | +| {mod}`scipy.io` | Data input and output | +| {mod}`scipy.linalg` | Linear algebra routines | +| {mod}`scipy.ndimage` | n-dimensional image package | +| {mod}`scipy.odr` | Orthogonal distance regression | +| {mod}`scipy.optimize` | Optimization | +| {mod}`scipy.signal` | Signal processing | +| {mod}`scipy.sparse` | Sparse matrices | +| {mod}`scipy.spatial` | Spatial data structures and algorithms | +| {mod}`scipy.special` | Any special mathematical functions | +| {mod}`scipy.stats` | Statistics | + +Scipy modules all depend on {mod}`numpy`, but are mostly independent of each +other. The standard way of importing NumPy and these SciPy modules is: + +```{code-cell} +import numpy as np +import scipy as sp +``` + +We will also be using plotting for this tutorial. + +```{code-cell} +import matplotlib.pyplot as plt +``` + +## File input/output: {mod}`scipy.io` + +{mod}`scipy.io` contains functions for loading and saving data in +several common formats including Matlab, IDL, Matrix Market, and +Harwell-Boeing. + +**Matlab files**: Loading and saving: + +```{code-cell} +a = np.ones((3, 3)) +sp.io.savemat('file.mat', {'a': a}) # savemat expects a dictionary +data = sp.io.loadmat('file.mat') +data['a'] +``` + +**Warning — Python / Matlab mismatch** + +The Matlab file format does not support 1D arrays. + +```{code-cell} +a = np.ones(3) +a +``` + +```{code-cell} +a.shape +``` + +```{code-cell} +sp.io.savemat('file.mat', {'a': a}) +a2 = sp.io.loadmat('file.mat')['a'] +a2 +``` + +```{code-cell} +a2.shape +``` + +Notice that the original array was a one-dimensional array, whereas the +saved and reloaded array is a two-dimensional array with a single row. + +For other formats, see the {mod}`scipy.io` documentation. + +**End of warning** + +:::{admonition} See also + +- Load text files: {func}`numpy.loadtxt`/{func}`numpy.savetxt` +- Clever loading of text/csv files: + {func}`numpy.genfromtxt` +- Fast and efficient, but NumPy-specific, binary format: + {func}`numpy.save`/{func}`numpy.load` +- Basic input/output of images in Matplotlib: + {func}`matplotlib.pyplot.imread`/{func}`matplotlib.pyplot.imsave` +- More advanced input/output of images: {mod}`imageio` + ::: + +## Special functions: {mod}`scipy.special` + +"Special" functions are functions commonly used in science and mathematics that +are not considered to be "elementary" functions. Examples include + +- the gamma function, {func}`scipy.special.gamma`, +- the error function, {func}`scipy.special.erf`, +- Bessel functions, such as {func}`scipy.special.jv` + (Bessel function of the first kind), and +- elliptic functions, such as {func}`scipy.special.ellipj` + (Jacobi elliptic functions). + +Other special functions are combinations of familiar elementary functions, +but they offer better accuracy or robustness than their naive implementations +would. + +Most of these function are computed elementwise and follow standard +NumPy broadcasting rules when the input arrays have different shapes. +For example, {func}`scipy.special.xlog1py` is mathematically equivalent +to $x\log(1 + y)$. + +```{code-cell} +x = np.asarray([1, 2]) +y = np.asarray([[3], [4], [5]]) +res = sp.special.xlog1py(x, y) +res.shape +``` + +```{code-cell} +ref = x * np.log(1 + y) +np.allclose(res, ref) +``` + +However, {func}`scipy.special.xlog1py` is numerically favorable for small $y$, +when explicit addition of `1` would lead to loss of precision due to floating +point truncation error. + +```{code-cell} +x = 2.5 +y = 1e-18 +x * np.log(1 + y) +``` + +```{code-cell} +sp.special.xlog1py(x, y) +``` + +Many special functions also have "logarithmized" variants. For instance, +the gamma function $\Gamma(\cdot)$ is related to the factorial +function by $n! = \Gamma(n + 1)$, but it extends the domain from the +positive integers to the complex plane. + +```{code-cell} +x = np.arange(10) +np.allclose(sp.special.gamma(x + 1), sp.special.factorial(x)) +``` + +```{code-cell} +sp.special.gamma(5) < sp.special.gamma(5.5) < sp.special.gamma(6) +``` + +The factorial function grows quickly, and so the gamma function overflows +for moderate values of the argument. However, sometimes only the logarithm +of the gamma function is needed. In such cases, we can compute the logarithm +of the gamma function directly using {func}`scipy.special.gammaln`. + +```{code-cell} +x = [5, 50, 500] +np.log(sp.special.gamma(x)) +``` + +```{code-cell} +sp.special.gammaln(x) +``` + +Such functions can often be used when the intermediate components of a +calculation would overflow or underflow, but the final result would not. +For example, suppose we wish to compute the ratio +$\Gamma(500)/\Gamma(499)$. + +```{code-cell} +a = sp.special.gamma(500) +b = sp.special.gamma(499) +a, b +``` + +```{code-cell} +:tags: [remove-cell, test] +assert a == np.inf +assert b == np.inf +``` + +Both the numerator and denominator overflow, so performing $a / b$ will +not return the result we seek. However, the magnitude of the result should +be moderate, so the use of logarithms comes to mind. Combining the identities +$\log(a/b) = \log(a) - \log(b)$ and $\exp(\log(x)) = x$, +we get: + +```{code-cell} +log_a = sp.special.gammaln(500) +log_b = sp.special.gammaln(499) +log_res = log_a - log_b +res = np.exp(log_res) +res +``` + +```{code-cell} +:tags: [remove-cell, test] +assert np.allclose(res, 499) +``` + +Similarly, suppose we wish to compute the difference +$\log(\Gamma(500) - \Gamma(499))$. For this, we use +{func}`scipy.special.logsumexp`, which computes +$\log(\exp(x) + \exp(y))$ using a numerical trick that avoids overflow. + +```{code-cell} +res = sp.special.logsumexp([log_a, log_b], + b=[1, -1]) # weights the terms of the sum +res +``` + +For more information about these and many other special functions, see +the documentation of {mod}`scipy.special`. + ++++ + +(scipy-linalg)= + +## Linear algebra operations: {mod}`scipy.linalg` + +{mod}`scipy.linalg` provides a Python interface to efficient, compiled +implementations of standard linear algebra operations: the BLAS (Basic +Linear Algebra Subroutines) and LAPACK (Linear Algebra PACKage) libraries. + +For example, the {func}`scipy.linalg.det` function computes the determinant +of a square matrix: + +```{code-cell} +arr = np.array([[1, 2], + [3, 4]]) +sp.linalg.det(arr) +``` + +Mathematically, the solution of a linear system $Ax = b$ is $x = A^{-1}b$, +but explicit inversion of a matrix is numerically unstable and should be avoided. +Instead, use {func}`scipy.linalg.solve`: + +```{code-cell} +A = np.array([[1, 2], + [2, 3]]) +b = np.array([14, 23]) +x = sp.linalg.solve(A, b) +x +``` + +```{code-cell} +np.allclose(A @ x, b) +``` + +Linear systems with special structure can often be solved more efficiently +than more general systems. For example, systems with triangular matrices +can be solved using {func}`scipy.linalg.solve_triangular`: + +```{code-cell} +A_upper = np.triu(A) +A_upper +``` + +```{code-cell} +np.allclose(sp.linalg.solve_triangular(A_upper, b, lower=False), + sp.linalg.solve(A_upper, b)) +``` + +{mod}`scipy.linalg` also features matrix factorizations/decompositions +such as the singular value decomposition. + +```{code-cell} +A = np.array([[1, 2], + [2, 3]]) +U, s, Vh = sp.linalg.svd(A) +s # singular values +``` + +The original matrix can be recovered by matrix multiplication of the +factors: + +```{code-cell} +S = np.diag(s) # convert to diagonal matrix before matrix multiplication +A2 = U @ S @ Vh +np.allclose(A2, A) +``` + +```{code-cell} +A3 = (U * s) @ Vh # more efficient: use array math broadcasting rules! +np.allclose(A3, A) +``` + +Many other decompositions (e.g. LU, Cholesky, QR), solvers for structured +linear systems (e.g. triangular, circulant), eigenvalue problem algorithms, +matrix functions (e.g. matrix exponential), and routines for special matrix +creation (e.g. block diagonal, toeplitz) are available in {mod}`scipy.linalg`. + ++++ + +(intro-scipy-interpolate)= + +## Interpolation: {mod}`scipy.interpolate` + +{mod}`scipy.interpolate` is used for fitting a function -- an "interpolant" -- +to experimental or computed data. Once fit, the interpolant can be used to +approximate the underlying function at intermediate points; it can also be used +to compute the integral, derivative, or inverse of the function. + +Some kinds of interpolants, known as "smoothing splines", are designed to +generate smooth curves from noisy data. For example, suppose we have +the following data: + +```{code-cell} +rng = np.random.default_rng(27446968) + +measured_time = np.linspace(0, 2 * np.pi, 20) +function = np.sin(measured_time) +noise = rng.normal(loc=0, scale=0.1, size=20) +measurements = function + noise +``` + +{func}`scipy.interpolate.make_smoothing_spline` can be used to form a curve +similar to the underlying sine function. + +```{code-cell} +smoothing_spline = sp.interpolate.make_smoothing_spline(measured_time, measurements) +interpolation_time = np.linspace(0, 2 * np.pi, 200) +smooth_results = smoothing_spline(interpolation_time) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(6, 4)) +plt.plot(measured_time, measurements, ".", ms=6, label="measurements") +plt.plot(interpolation_time, smooth_results, label="smoothing spline") +plt.plot(interpolation_time, np.sin(interpolation_time), "--", label="underlying curve") +plt.legend(); +``` + +On the other hand, if the data are not noisy, it may be desirable to pass +exactly through each point. + +```{code-cell} +interp_spline = sp.interpolate.make_interp_spline(measured_time, function) +interp_results = interp_spline(interpolation_time) +``` + +```{code-cell} +:tags: [hide-input] + +# Plot the data, the interpolant, and the original function +plt.figure(figsize=(6, 4)) +plt.plot(measured_time, function, ".", ms=6, label="measurements") +plt.plot(interpolation_time, interp_results, label="interpolating spline") +plt.plot(interpolation_time, np.sin(interpolation_time), "--", label="underlying curve") +plt.legend(); +``` + +The `derivative` and `antiderivative` methods of the result object can be used +for differentiation and integration. For the latter, the constant of integration is +assumed to be zero, but we can "wrap" the antiderivative to include a nonzero +constant of integration. + +```{code-cell} +d_interp_spline = interp_spline.derivative() +d_interp_results = d_interp_spline(interpolation_time) +i_interp_spline = lambda t: interp_spline.antiderivative()(t) - 1 +i_interp_results = i_interp_spline(interpolation_time) +``` + +```{code-cell} +:tags: [hide-input] + +# Plot interpolant, its derivative, and its antiderivative +plt.figure(figsize=(6, 4)) +t = interpolation_time +plt.plot(t, interp_results, label="spline") +plt.plot(t, d_interp_results, label="derivative") +plt.plot(t, i_interp_results, label="antiderivative") +plt.legend(); +``` + +For functions that are monotonic on an interval (e.g. $\sin$ from $\pi/2$ +to $3\pi/2$), we can reverse the arguments of `make_interp_spline` to +interpolate the inverse function. Because the first argument is expected to be +monotonically _increasing_, we also reverse the order of elements in the arrays +with {func}`numpy.flip`. + +```{code-cell} +i = (measured_time > np.pi/2) & (measured_time < 3*np.pi/2) +inverse_spline = sp.interpolate.make_interp_spline(np.flip(function[i]), + np.flip(measured_time[i])) +inverse_spline(0) +``` + +See the summary exercise on {ref}`summary-exercise-stat-interp` for a more +advanced spline interpolation example, and read the [SciPy interpolation +tutorial](https://scipy.github.io/devdocs/tutorial/interpolate.html) and the +{mod}`scipy.interpolate` documentation for much more information. + ++++ + +## Optimization and fit: {mod}`scipy.optimize` + +{mod}`scipy.optimize` provides algorithms for root finding, curve fitting, +and more general optimization. + +### Root Finding + +{func}`scipy.optimize.root_scalar` attempts to find a root of a specified +scalar-valued function (i.e., an argument at which the function value is zero). +Like many {mod}`scipy.optimize` functions, the function needs an initial +guess of the solution, which the algorithm will refine until it converges or +recognizes failure. We also provide the derivative to improve the rate of +convergence. + +```{code-cell} +def f(x): + return (x-1)*(x-2) + +def df(x): + return 2*x - 3 + +x0 = 0 # guess +res = sp.optimize.root_scalar(f, x0=x0, fprime=df) +res +``` + +:::{warning} + +None of the functions in {mod}`scipy.optimize` that accept a guess are +guaranteed to converge for all possible guesses! (For example, try +`x0=1.5` in the example above, where the derivative of the function is +exactly zero.) If this occurs, try a different guess, adjust the options +(like providing a `bracket` as shown below), or consider whether SciPy +offers a more appropriate method for the problem. + +::: + +Note that only one the root at `1.0` is found. By inspection, we can tell +that there is a second root at `2.0`. We can direct the function toward a +particular root by changing the guess or by passing a bracket that contains +only the root we seek. + +```{code-cell} +res = sp.optimize.root_scalar(f, bracket=(1.5, 10)) +res.root +``` + +For multivariate problems, use {func}`scipy.optimize.root`. + +```{code-cell} +def f(x): + # intersection of unit circle and line from origin + return [x[0]**2 + x[1]**2 - 1, + x[1] - x[0]] + +res = sp.optimize.root(f, x0=[0, 0]) +np.allclose(f(res.x), 0, atol=1e-10) +``` + +```{code-cell} +np.allclose(res.x, np.sqrt(2)/2) +``` + +Over-constrained problems can be solved in the least-squares sense using +{func}`scipy.optimize.root` with `method='lm'` (Levenberg-Marquardt). + +```{code-cell} +def f(x): + # intersection of unit circle, line from origin, and parabola + return [x[0]**2 + x[1]**2 - 1, + x[1] - x[0], + x[1] - x[0]**2] + +res = sp.optimize.root(f, x0=[1, 1], method='lm') +res.success +``` + +```{code-cell} +res.x +``` + +See the documentation of {func}`scipy.optimize.root_scalar` and +{func}`scipy.optimize.root` for a variety of other solution algorithms and +options. + ++++ + +### Curve fitting + ++++ + +Suppose we have data that is sinusoidal but noisy: + +```{code-cell} +x_data = np.linspace(-5, 5, num=50) # 50 values between -5 and 5 +noise = 0.01 * np.cos(100 * x_data) +a, b = 2.9, 1.5 +y_data = a * np.cos(b * x_data) + noise +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(6, 4)) +plt.scatter(x_data, y_data); +``` + +We can approximate the underlying amplitude, frequency, and phase +from the data by least squares curve fitting. To begin, we write +a function that accepts the independent variable as the first +argument and all parameters to fit as separate arguments: + +```{code-cell} +def f(x, a, b, c): + return a * np.sin(b * x + c) +``` + +We then use {func}`scipy.optimize.curve_fit` to find $a$ and $b$: + +```{code-cell} +params, _ = sp.optimize.curve_fit(f, x_data, y_data, p0=[2, 1, 3]) +params +``` + +```{code-cell} +ref = [a, b, np.pi/2] # what we'd expect +np.allclose(params, ref, rtol=1e-3) +``` + +We plot the resulting curve on the data: + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(6, 4)) +plt.scatter(x_data, y_data, label="Data") +plt.plot(x_data, f(x_data, *params), label="Fitted function") +plt.legend(loc="best"); +``` + +::: {exercise-start} +:label: scipy-curve-fit-ex +:class: dropdown +::: + +The temperature extremes in Alaska for each month, starting in January, are +given by (in degrees Celsius): + +```text +max: 17, 19, 21, 28, 33, 38, 37, 37, 31, 23, 19, 18 +min: -62, -59, -56, -46, -32, -18, -9, -13, -25, -46, -52, -58 +``` + +1. Plot these temperature extremes. +2. Define a function that can describe min and max temperatures. + Hint: this function has to have a period of 1 year. + Hint: include a time offset. +3. Fit this function to the data with {func}`scipy.optimize.curve_fit`. +4. Plot the result. Is the fit reasonable? If not, why? +5. Is the time offset for min and max temperatures the same within the fit + accuracy? + +::: {exercise-end} +::: + +::: {solution-start} scipy-curve-fit-ex +:class: dropdown +::: + +Curve fitting: temperature as a function of month of the year + +We have the min and max temperatures in Alaska for each months of the +year. We would like to find a function to describe this yearly evolution. + +For this, we will fit a periodic function. + +```{code-cell} +# The data +temp_max = np.array([17, 19, 21, 28, 33, 38, 37, 37, 31, 23, 19, 18]) +temp_min = np.array([-62, -59, -56, -46, -32, -18, -9, -13, -25, -46, -52, -58]) + +months = np.arange(12) +plt.plot(months, temp_max, "ro") +plt.plot(months, temp_min, "bo") +plt.xlabel("Month") +plt.ylabel("Min and max temperature"); +``` + +Fitting it to a periodic function: + +```{code-cell} +def yearly_temps(times, avg, ampl, time_offset): + return avg + ampl * np.cos((times + time_offset) * 2 * np.pi / times.max()) + +res_max, cov_max = sp.optimize.curve_fit(yearly_temps, months, temp_max, [20, 10, 0]) +res_min, cov_min = sp.optimize.curve_fit(yearly_temps, months, temp_min, [-40, 20, 0]) +``` + +Plotting the fit + +```{code-cell} +days = np.linspace(0, 12, num=365) +plt.figure() +plt.plot(months, temp_max, "ro") +plt.plot(days, yearly_temps(days, *res_max), "r-") +plt.plot(months, temp_min, "bo") +plt.plot(days, yearly_temps(days, *res_min), "b-") +plt.xlabel("Month") +plt.ylabel(r"Temperature ($^\circ$C)"); +``` + +::: {solution-end} +::: + ++++ + +### Optimization + ++++ + +Suppose we wish to minimize the scalar-valued function of a single +variable $f(x) = x^2 + 10 \sin(x)$: + +```{code-cell} +def f(x): + return x**2 + 10 * np.sin(x) + +x = np.arange(-5, 5, 0.1) +plt.plot(x, f(x)) +``` + +We can see that the function has a local minimizer near $x = 3.8$ and a global +minimizer near $x = -1.3$, but the precise values cannot be determined from the +plot. + +The most appropriate function for this purpose is +{func}`scipy.optimize.minimize_scalar`. +Since we know the approximate locations of the minima, we will provide +bounds that restrict the search to the vicinity of the global minimum. + +```{code-cell} +res = sp.optimize.minimize_scalar(f, bounds=(-2, -1)) +res +``` + +```{code-cell} +res.fun == f(res.x) +``` + +If we did not already know the approximate location of the global minimum, +we could use one of SciPy's global minimizers, such as +{func}`scipy.optimize.differential_evolution`. We are required to pass +`bounds`, but they do not need to be tight. + +```{code-cell} +bounds=[(-5, 5)] # list of lower, upper bound for each variable +res = sp.optimize.differential_evolution(f, bounds=bounds) +res +``` + +For multivariate optimization, a good choice for many problems is +{func}`scipy.optimize.minimize`. +Suppose we wish to find the minimum of a quadratic function of two +variables, $f(x_0, x_1) = (x_0-1)^2 + (x_1-2)^2$. + +```{code-cell} +def f(x): + return (x[0] - 1)**2 + (x[1] - 2)**2 +``` + +Like {func}`scipy.optimize.root`, {func}`scipy.optimize.minimize` +requires a guess `x0`. (Note that this is the initial value of +_both_ variables rather than the value of the variable we happened to +label $x_0$.) + +```{code-cell} +res = sp.optimize.minimize(f, x0=[0, 0]) +res +``` + +:::{sidebar} Maximization? +Is {func}`scipy.optimize.minimize` restricted to the solution of +minimization problems? Nope! To solve a maximization problem, +simply minimize the _negative_ of the original objective function. +::: + +This barely scratches the surface of SciPy's optimization features, which +include mixed integer linear programming, constrained nonlinear programming, +and the solution of assignment problems. For much more information, see the +documentation of {mod}`scipy.optimize` and the advanced chapter +{ref}`mathematical-optimization`. + +::: {exercise-start} +:label: scipy-2d-minimization-ex +:class: dropdown +::: + +This is an exercise on 2-D minimization. + +The six-hump camelback function + +$f(x, y) = (4 - 2.1x^2 + \frac{x^4}{3})x^2 + xy + (4y^2 - 4)y^2$ + +has multiple local minima. Find a global minimum (there is more than one, +each with the same value of the objective function) and at least one other +local minimum. + +Here's a plot of the function (taken from the exercise solution): + +::: {glue} plot_camel +::: + +Hints: + +- Variables can be restricted to $-2 < x < 2$ and $-1 < y < 1$. +- {func}`numpy.meshgrid` and {func}`matplotlib.pyplot.imshow` can help + with visualization. +- Try minimizing with {func}`scipy.optimize.minimize` with an initial + guess of $(x, y) = (0, 0)$. Does it find the global minimum, or + converge to a local minimum? What about other initial guesses? +- Try minimizing with {func}`scipy.optimize.differential_evolution`. + +::: {exercise-end} +::: + +::: {solution-start} scipy-2d-minimization-ex +:class: dropdown +::: + +Optimization of a two-parameter function: + +```{code-cell} +# Define the function that we are interested in +def sixhump(x): + return ( + (4 - 2.1 * x[0] ** 2 + x[0] ** 4 / 3) * x[0] ** 2 + + x[0] * x[1] + + (-4 + 4 * x[1] ** 2) * x[1] ** 2 + ) + +# Make a grid to evaluate the function (for plotting) +xlim = [-2, 2] +ylim = [-1, 1] +x = np.linspace(*xlim) # type: ignore[call-overload] +y = np.linspace(*ylim) # type: ignore[call-overload] +xg, yg = np.meshgrid(x, y) +``` + +A 2D image plot of the function: + +```{code-cell} +# Simple visualization in 2D +plt.figure() +plt.imshow(sixhump([xg, yg]), extent=xlim + ylim, origin="lower") # type: ignore[arg-type] +plt.colorbar(); +``` + +A 3D surface plot of the function: + +```{code-cell} +from mpl_toolkits.mplot3d import Axes3D + +fig = plt.figure() +ax: Axes3D = fig.add_subplot(111, projection="3d") +surf = ax.plot_surface( + xg, + yg, + sixhump([xg, yg]), + rstride=1, + cstride=1, + cmap="viridis", + linewidth=0, + antialiased=False, +) + +ax.set_xlabel("x") +ax.set_ylabel("y") +ax.set_zlabel("f(x, y)") +ax.set_title("Six-hump Camelback function"); + +# You can ignore the code below - it's not part of the solution. It is only to +# allow us to use the plot from the solution as a graphic in the web page. +from myst_nb import glue +glue("plot_camel", fig, display=False) +``` + +Find minima: + +```{code-cell} +# local minimization +res_local = sp.optimize.minimize(sixhump, x0=[0, 0]) +# global minimization +res_global = sp.optimize.differential_evolution(sixhump, bounds=[xlim, ylim]) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure() +# Show the function in 2D +plt.imshow(sixhump([xg, yg]), extent=xlim + ylim, origin="lower") # type: ignore[arg-type] +plt.colorbar() +# Mark the minima +plt.scatter(res_local.x[0], res_local.x[1], label="local minimizer") +plt.scatter(res_global.x[0], res_global.x[1], label="global minimizer") +plt.legend(); +``` + +::: {solution-end} +::: + +See the summary exercise on {ref}`summary-exercise-optimize` for another, more +advanced example. + ++++ + +## Statistics and random numbers: {mod}`scipy.stats` + +{mod}`scipy.stats` contains fundamental tools for statistics in Python. + +### Statistical Distributions + +Consider a random variable distributed according to the standard normal. +We draw a sample consisting of 100000 observations from the random variable. +The normalized histogram of the sample is an estimator of the random +variable's probability density function (PDF): + +```{code-cell} +dist = sp.stats.norm(loc=0, scale=1) # standard normal distribution +sample = dist.rvs(size=100000) # "random variate sample" +plt.hist(sample, bins=50, density=True, label='normalized histogram') +x = np.linspace(-5, 5) +plt.plot(x, dist.pdf(x), label='PDF') +plt.legend() +``` + +:::{sidebar} Distribution objects and frozen distributions + +Each of the 100+ {mod}`scipy.stats` distribution families is represented by an +_object_ with a `__call__` method. Here, we call the {class}`scipy.stats.norm` +object to specify its location and scale, and it returns a _frozen_ +distribution: a particular element of a distribution family with all +parameters fixed. The frozen distribution object has methods to compute +essential functions of the particular distribution. + +::: + +Suppose we knew that the sample had been drawn from a distribution belonging to +the family of normal distributions, but we did not know the particular +distribution's location (mean) and scale (standard deviation). We perform +maximum likelihood estimation of the unknown parameters using the distribution +family's `fit` method: + +```{code-cell} +loc, scale = sp.stats.norm.fit(sample) +loc +``` + +```{code-cell} +scale +``` + +Since we know the true parameters of the distribution from which the +sample was drawn, we are not surprised that these estimates are similar. + +::: {exercise-start} +:label: scipy-prob-dist-ex +:class: dropdown +::: + +Generate 1000 random variates from a gamma distribution with a shape +parameter of 1. _Hint: the shape parameter is passed as the first +argument when freezing the distribution_. Plot the histogram of the +sample, and overlay the distribution's PDF. Estimate the shape parameter +from the sample using the `fit` method. + +Extra: the distributions have many useful methods. Explore them +using tab completion. Plot the cumulative density function of the +distribution, and compute the variance. + +::: {exercise-end} +::: + ++++ + +### Sample Statistics and Hypothesis Tests + +The sample mean is an estimator of the mean of the distribution from which +the sample was drawn: + +```{code-cell} +np.mean(sample) +``` + +NumPy includes some of the most fundamental sample statistics (e.g. +{func}`numpy.mean`, {func}`numpy.var`, {func}`numpy.percentile`); +{mod}`scipy.stats` includes many more. For instance, the geometric mean +is a common measure of central tendency for data that tends to be +distributed over many orders of magnitude. + +```{code-cell} +sp.stats.gmean(2**sample) +``` + +SciPy also includes a variety of hypothesis tests that produce a +sample statistic and a p-value. For instance, suppose we wish to +test the null hypothesis that `sample` was drawn from a normal +distribution: + +```{code-cell} +res = sp.stats.normaltest(sample) +res.statistic +``` + +```{code-cell} +res.pvalue +``` + +Here, `statistic` is a sample statistic that tends to be high for +samples that are drawn from non-normal distributions. `pvalue` is +the probability of observing such a high value of the statistic for +a sample that _has_ been drawn from a normal distribution. If the +p-value is unusually small, this may be taken as evidence that +`sample` was _not_ drawn from the normal distribution. Our statistic +and p-value are moderate, so the test is inconclusive. + +There are many other features of {mod}`scipy.stats`, including circular +statistics, quasi-Monte Carlo methods, and resampling methods. +For much more information, see the documentation of {mod}`scipy.stats` +and the advanced chapter {ref}`statistics `. + ++++ + +## Numerical integration: {mod}`scipy.integrate` + +### Quadrature + +Suppose we wish to compute the definite integral +$\int_0^{\pi / 2} \sin(t) dt$ numerically. {func}`scipy.integrate.quad` +chooses one of several adaptive techniques depending on the parameters, and +is therefore the recommended first choice for integration of function of a single variable: + +```{code-cell} +integral, error_estimate = sp.integrate.quad(np.sin, 0, np.pi / 2) +np.allclose(integral, 1) # numerical result ~ analytical result +``` + +```{code-cell} +abs(integral - 1) < error_estimate # actual error < estimated error +``` + +Other functions for _numerical quadrature_, including integration of +multivariate functions and approximating integrals from samples, are available +in {mod}`scipy.integrate`. + ++++ + +### Initial Value Problems + +{mod}`scipy.integrate` also features routines for integrating [Ordinary +Differential Equations +(ODE)](https://en.wikipedia.org/wiki/Ordinary_differential_equation). For +example, {func}`scipy.integrate.solve_ivp` integrates ODEs of the form: + +$$ +\frac{dy}{dt} = f(t, y(t)) +$$ + +from an initial time $t_0$ and initial state $y(t=t_0)=t_0$ to a final +time $t_f$ or until an event occurs (e.g. a specified state is reached). + +As an introduction, consider the initial value problem given by +$\frac{dy}{dt} = -2 y$ and the initial condition $y(t=0) = 1$ on +the interval $t = 0 \dots 4$. We begin by defining a callable that +computes $f(t, y(t))$ given the current time and state. + +```{code-cell} +def f(t, y): + return -2 * y +``` + +Then, to compute `y` as a function of time: + +```{code-cell} +t_span = (0, 4) # time interval +t_eval = np.linspace(*t_span) # times at which to evaluate `y` +y0 = [1,] # initial state +res = sp.integrate.solve_ivp(f, t_span=t_span, y0=y0, t_eval=t_eval) +``` + +and plot the result: + +```{code-cell} +plt.figure(figsize=(4, 3)) +plt.plot(res.t, res.y[0]) +plt.xlabel('t') +plt.ylabel('y') +plt.title('Solution of Initial Value Problem') +plt.tight_layout(); +``` + +Let us integrate a more complex ODE: a [damped +spring-mass oscillator](https://en.wikipedia.org/wiki/Harmonic_oscillator#Damped_harmonic_oscillator). +The position of a mass attached to a spring obeys the 2nd order ODE +$\ddot{y} + 2 \zeta \omega_0 \dot{y} + \omega_0^2 y = 0$ with natural frequency +$\omega_0 = \sqrt{k/m}$, damping ratio $\zeta = c/(2 m \omega_0)$, +spring constant $k$, mass $m$, and damping coefficient $c$. + +Before using {func}`scipy.integrate.solve_ivp`, the 2nd order ODE needs to be +transformed into a system of first-order ODEs. Note that + +$$ +\frac{dy}{dt} = \dot{y} +\frac{d\dot{y}}{dt} = \ddot{y} = -(2 \zeta \omega_0 \dot{y} + \omega_0^2 y) +$$ + +If we define $z = [z_0, z_1]$ where $z_0 = y$ and $z_1 = \dot{y}$, then the +first order equation: + +$$ +\frac{dz}{dt} = +\begin{bmatrix} + \frac{dz_0}{dt} \\ + \frac{dz_1}{dt} +\end{bmatrix} = +\begin{bmatrix} + z_1 \\ + -(2 \zeta \omega_0 z_1 + \omega_0^2 z_0) +\end{bmatrix} +$$ + +is equivalent to the original second order equation. + +We set: + +```{code-cell} +m = 0.5 # kg +k = 4 # N/m +c = 0.4 # N s/m +zeta = c / (2 * m * np.sqrt(k/m)) +omega = np.sqrt(k / m) +``` + +and define the function that computes $\dot{z} = f(t, z(t))$: + +```{code-cell} +def f(t, z, zeta, omega): + return (z[1], -2.0 * zeta * omega * z[1] - omega**2 * z[0]) +``` + +Integration of the system follows: + +```{code-cell} +t_span = (0, 10) +t_eval = np.linspace(*t_span, 100) +z0 = [1, 0] +res = sp.integrate.solve_ivp(f, t_span, z0, t_eval=t_eval, + args=(zeta, omega), method='LSODA') +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(4, 3)) +plt.plot(res.t, res.y[0], label="y") +plt.plot(res.t, res.y[1], label="dy/dt") +plt.legend(loc="best"); +``` + +::: {note} +:class: dropdown + +With the option `method='LSODA'`, {func}`scipy.integrate.solve_ivp` uses the LSODA +(Livermore Solver for Ordinary Differential equations with Automatic method switching +for stiff and non-stiff problems). See the [ODEPACK Fortran library] for more details. +::: + +:::{admonition} See also + +**Partial Differental Equations** + +There is no Partial Differential Equations (PDE) solver in SciPy. +Some Python packages for solving PDE's are available, such as [fipy] +or [SfePy]. +::: + ++++ + +## Fast Fourier transforms: {mod}`scipy.fft` + +The {mod}`scipy.fft` module computes fast Fourier transforms (FFTs) +and offers utilities to handle them. Some important functions are: + +- {func}`scipy.fft.fft` to compute the FFT +- {func}`scipy.fft.fftfreq` to generate the sampling frequencies +- {func}`scipy.fft.ifft` to compute the inverse FFT, from frequency + space to signal space + +As an illustration, a example (noisy) input signal (`sig`), and its FFT: + +```{code-cell} +# Time. +dt = 0.02 # Time step. +t = np.arange(0, 20, dt) # Time vector. +# An example noisy signal over time. +sig = np.sin(2 * np.pi / 5.0 * t) + 0.5 * rng.normal(size=t.size) +# FFT of signal. +sig_fft = sp.fft.fft(sig) +# Corresponding frequencies. +freqs = sp.fft.fftfreq(sig.size, d=dt) +``` + +::: {list-table} + +- - Signal + - FFT +- - ::: {glue} original_signal_fig + :doc: scipy_examples.md + ::: + - ::: {glue} fft_of_signal_fig + :doc: scipy_examples.md + ::: + +::: + +The peak signal frequency can be found with `freqs[power.argmax()]`. + +The code of this example and the figures above can be found in the [Scipy FFT +example](scipy-fft-example). + +Setting the Fourier component above this frequency to zero and inverting the +FFT with {func}`scipy.fft.ifft`, gives a filtered signal (see the +[example](scipy-fft-example) for detail). + +::: {glue} fft_filter_fig +:doc: scipy_examples.md +::: + +:::{admonition} `numpy.fft` + +NumPy also has an implementation of FFT ({mod}`numpy.fft`). However, the SciPy +one should be preferred, as it uses more efficient underlying implementations. + +::: + +**Fully worked examples:** + +::: {list-table} + +- - [Crude periodicity finding](eg-periodicity-finder) + - [Image blur with FFT](eg-image-blur) +- - ::: {glue} periodicity_fig + :doc: scipy_examples.md + ::: + - ::: {glue} blur_fig + :doc: scipy_examples.md + ::: + +::: + +::: {exercise-start} +:label: scipy-image-denoise-ex +:class: dropdown +::: + +![](data/moonlanding.png) + +1. Examine the provided image {download}`moonlanding.png +`, which is heavily contaminated with periodic + noise. In this exercise, we aim to clean up the noise using the + Fast Fourier Transform. +2. Load the image using {func}`matplotlib.pyplot.imread`. +3. Find and use the 2-D FFT function in {mod}`scipy.fft`, and plot the + spectrum (Fourier transform of) the image. Do you have any trouble + visualising the spectrum? If so, why? +4. The spectrum consists of high and low frequency components. The noise is + contained in the high-frequency part of the spectrum, so set some of + those components to zero (use array slicing). +5. Apply the inverse Fourier transform to see the resulting image. + +::: {exercise-end} +::: + +::: {solution-start} scipy-image-denoise-ex +:class: dropdown +::: + +Implementing image denoising with FFT. + +Denoise an image ({download}`data/moonlanding.png`) by implementing a blur +with an FFT. + +Implements, via FFT, the following convolution: + +$$ +\begin{align} +f_1(t) = \int dt'\, K(t-t') f_0(t') \\ +\tilde{f}_1(\omega) = \tilde{K}(\omega) \tilde{f}_0(\omega) +\end{align} +$$ + +```{code-cell} +# Read and plot the image +im = plt.imread("data/moonlanding.png").astype(float) + +plt.figure() +plt.imshow(im, "gray") +plt.title("Original image"); +``` + +```{code-cell} +# Compute the 2d FFT of the input image +im_fft = sp.fft.fft2(im) + +# Show the results +from matplotlib.colors import LogNorm + +def plot_spectrum(im_fft): + # A logarithmic colormap + plt.imshow(np.abs(im_fft), norm=LogNorm(vmin=5)) + plt.colorbar() + +plt.figure() +plot_spectrum(im_fft) +plt.title("Fourier transform"); +``` + +Filter in FFT: + +In the lines following, we'll make a copy of the original spectrum and +truncate coefficients. + +```{code-cell} +# Define the fraction of coefficients (in each direction) we keep +keep_fraction = 0.1 + +# Call ff a copy of the original transform. NumPy arrays have a copy +# method for this purpose. +im_fft2 = im_fft.copy() + +# Set r and c to be the number of rows and columns of the array. +r, c = im_fft2.shape + +# Set to zero all rows with indices between r*keep_fraction and +# r*(1-keep_fraction): +im_fft2[int(r * keep_fraction) : int(r * (1 - keep_fraction))] = 0 + +# Similarly with the columns: +im_fft2[:, int(c * keep_fraction) : int(c * (1 - keep_fraction))] = 0 +``` + +```{code-cell} +plt.figure() +plot_spectrum(im_fft2) +plt.title("Filtered Spectrum"); +``` + +Reconstruct the final image + +```{code-cell} +# Reconstruct the denoised image from the filtered spectrum, keep only the +# real part for display. +im_new = sp.fft.ifft2(im_fft2).real + +plt.figure() +plt.imshow(im_new, "gray") +plt.title("Reconstructed Image"); +``` + +Easier and better: {func}`scipy.ndimage.gaussian_filter` + +Implementing filtering directly with FFTs is tricky and time consuming. +We can use the Gaussian filter from {mod}`scipy.ndimage` + +```{code-cell} +im_blur = sp.ndimage.gaussian_filter(im, 4) + +plt.figure() +plt.imshow(im_blur, "gray") +plt.title("Blurred image"); +``` + +::: {solution-end} +::: + ++++ + +## Signal processing: {mod}`scipy.signal` + +::: {note} +:class: dropdown + +{mod}`scipy.signal` is for typical signal processing: 1D, +regularly-sampled signals. +::: + +**Resampling** {func}`scipy.signal.resample`: resample a signal to `n` +points using FFT. + +```{code-cell} +t = np.linspace(0, 5, 100) +x = np.sin(t) + +x_resampled = sp.signal.resample(x, 25) +``` + +```{code-cell} +:tags: [hide-input] + +# Plot +plt.figure(figsize=(5, 4)) +plt.plot(t, x, label="Original signal") +plt.plot(t[::4], x_resampled, "ko", label="Resampled signal") +plt.legend(loc="best"); +``` + +::: {note} +:class: dropdown + +Notice how on the side of the window the resampling is less accurate +and has a rippling effect. + +This resampling is different from the {ref}`interpolation +` provided by {mod}`scipy.interpolate` as it +only applies to regularly sampled data. +::: + +**Detrending** {func}`scipy.signal.detrend`: remove linear trend from signal: + +```{code-cell} +t = np.linspace(0, 5, 100) +rng = np.random.default_rng() +x = t + rng.normal(size=100) + +x_detrended = sp.signal.detrend(x) +``` + +```{code-cell} +:tags: [hide-input] + +# Plot +plt.figure(figsize=(5, 4)) +plt.plot(t, x, label="x") +plt.plot(t, x_detrended, label="x_detrended") +plt.legend(loc="best"); +``` + +**Filtering**: + +For non-linear filtering, {mod}`scipy.signal` has filtering (median +filter {func}`scipy.signal.medfilt`, Wiener {func}`scipy.signal.wiener`), +but we will discuss this in the image section. + +::: {note} +:class: dropdown + +{mod}`scipy.signal` also has a full-blown set of tools for the design +of linear filter (finite and infinite response filters), but this is +out of the scope of this tutorial. +::: + +**Spectral analysis**: + +{func}`scipy.signal.spectrogram` computes a spectrogram — frequency spectra +over consecutive time windows — while {func}`scipy.signal.welch` computes +a power spectrum density (PSD). + +::: {list-table} +:header-rows: 1 + +- - Signal + - Spectrogram + - Power Spectral Density +- - ::: {glue} chirp_fig + :doc: scipy_examples.md + ::: + - ::: {glue} spectrogram_fig + :doc: scipy_examples.md + ::: + - ::: {glue} psd_fig + :doc: scipy_examples.md + ::: + +::: + +See the [Spectrogram example](scipy-spectrogram-example). + ++++ + +## Image manipulation: {mod}`scipy.ndimage` + +See [Scipy image processing](scipy-image-processing) + ++++ + +## Summary exercises on scientific computing + +The summary exercises use mainly NumPy, SciPy and Matplotlib. They provide some +real-life examples of scientific computing with Python. Now that the basics of +working with NumPy and SciPy have been introduced, the interested user is +invited to try these exercises. + +- [Statistical interpolotion](summary-exercise-stat-interp) +- [Non-linear fitting](summary-exercise-optimize) +- [Image processing](summary-exercise-image-processing) + +:::{admonition} See also + +**References to go further** + +- Some chapters of the [advanced](advanced-topics-part) and the + [packages and applications](applications-part) parts of the SciPy + lectures. +- The [SciPy cookbook](https://scipy-cookbook.readthedocs.io) + +::: + ++++ + +## Other useful links + +- [fipy](https://www.ctcms.nist.gov/fipy) +- [odepack fortran library](https://people.sc.fsu.edu/~jburkardt/f77_src/odepack/odepack.html) +- [sfepy](https://sfepy.org/doc) diff --git a/intro/scipy/index.rst b/intro/scipy/index.rst deleted file mode 100644 index 4b802437c..000000000 --- a/intro/scipy/index.rst +++ /dev/null @@ -1,1147 +0,0 @@ -.. for doctests - >>> import matplotlib.pyplot as plt - >>> import numpy as np - -.. _scipy: - -SciPy : high-level scientific computing -======================================= - -**Authors**: *Gaël Varoquaux, Adrien Chauve, Andre Espaze, Emmanuelle Gouillart, Ralf Gommers* - - -.. topic:: Scipy - - The :mod:`scipy` package contains various toolboxes dedicated to common - issues in scientific computing. Its different submodules correspond - to different applications, such as interpolation, integration, - optimization, image processing, statistics, special functions, etc. - -.. tip:: - - :mod:`scipy` can be compared to other standard scientific-computing - libraries, such as the GSL (GNU Scientific Library for C and C++), - or Matlab's toolboxes. ``scipy`` is the core package for scientific - routines in Python; it is meant to operate efficiently on ``numpy`` - arrays, so that NumPy and SciPy work hand in hand. - - Before implementing a routine, it is worth checking if the desired - data processing is not already implemented in SciPy. As - non-professional programmers, scientists often tend to **re-invent the - wheel**, which leads to buggy, non-optimal, difficult-to-share and - unmaintainable code. By contrast, ``SciPy``'s routines are optimized - and tested, and should therefore be used when possible. - - -.. contents:: Chapters contents - :local: - :depth: 1 - - -.. warning:: - - This tutorial is far from an introduction to numerical computing. - As enumerating the different submodules and functions in SciPy would - be very boring, we concentrate instead on a few examples to give a - general idea of how to use ``scipy`` for scientific computing. - -:mod:`scipy` is composed of task-specific sub-modules: - -=========================== ========================================== -:mod:`scipy.cluster` Vector quantization / Kmeans -:mod:`scipy.constants` Physical and mathematical constants -:mod:`scipy.fft` Fourier transform -:mod:`scipy.integrate` Integration routines -:mod:`scipy.interpolate` Interpolation -:mod:`scipy.io` Data input and output -:mod:`scipy.linalg` Linear algebra routines -:mod:`scipy.ndimage` n-dimensional image package -:mod:`scipy.odr` Orthogonal distance regression -:mod:`scipy.optimize` Optimization -:mod:`scipy.signal` Signal processing -:mod:`scipy.sparse` Sparse matrices -:mod:`scipy.spatial` Spatial data structures and algorithms -:mod:`scipy.special` Any special mathematical functions -:mod:`scipy.stats` Statistics -=========================== ========================================== - -.. tip:: - - They all depend on :mod:`numpy`, but are mostly independent of each - other. The standard way of importing NumPy and these SciPy modules - is:: - - >>> import numpy as np - >>> import scipy as sp - - -File input/output: :mod:`scipy.io` ----------------------------------- -:mod:`scipy.io` contains functions for loading and saving data in -several common formats including Matlab, IDL, Matrix Market, and -Harwell-Boeing. - -**Matlab files**: Loading and saving:: - - >>> import scipy as sp - >>> a = np.ones((3, 3)) - >>> sp.io.savemat('file.mat', {'a': a}) # savemat expects a dictionary - >>> data = sp.io.loadmat('file.mat') - >>> data['a'] - array([[1., 1., 1.], - [1., 1., 1.], - [1., 1., 1.]]) - -.. warning:: **Python / Matlab mismatch**: The Matlab file format does not support 1D arrays. - - :: - - >>> a = np.ones(3) - >>> a - array([1., 1., 1.]) - >>> a.shape - (3,) - >>> sp.io.savemat('file.mat', {'a': a}) - >>> a2 = sp.io.loadmat('file.mat')['a'] - >>> a2 - array([[1., 1., 1.]]) - >>> a2.shape - (1, 3) - - Notice that the original array was a one-dimensional array, whereas the - saved and reloaded array is a two-dimensional array with a single row. - - For other formats, see the :mod:`scipy.io` documentation. - -.. seealso:: - - * Load text files: :func:`numpy.loadtxt`/:func:`numpy.savetxt` - - * Clever loading of text/csv files: - :func:`numpy.genfromtxt` - - * Fast and efficient, but NumPy-specific, binary format: - :func:`numpy.save`/:func:`numpy.load` - - * Basic input/output of images in Matplotlib: - :func:`matplotlib.pyplot.imread`/:func:`matplotlib.pyplot.imsave` - - * More advanced input/output of images: :mod:`imageio` - -Special functions: :mod:`scipy.special` ---------------------------------------- - -"Special" functions are functions commonly used in science and mathematics that -are not considered to be "elementary" functions. Examples include - - * the gamma function, :func:`scipy.special.gamma`, - * the error function, :func:`scipy.special.erf`, - * Bessel functions, such as :func:`scipy.special.jv` - (Bessel function of the first kind), and - * elliptic functions, such as :func:`scipy.special.ellipj` - (Jacobi elliptic functions). - -Other special functions are combinations of familiar elementary functions, -but they offer better accuracy or robustness than their naive implementations -would. - -Most of these function are computed elementwise and follow standard -NumPy broadcasting rules when the input arrays have different shapes. -For example, :func:`scipy.special.xlog1py` is mathematically equivalent -to :math:`x\log(1 + y)`. - - >>> import scipy as sp - >>> x = np.asarray([1, 2]) - >>> y = np.asarray([[3], [4], [5]]) - >>> res = sp.special.xlog1py(x, y) - >>> res.shape - (3, 2) - >>> ref = x * np.log(1 + y) - >>> np.allclose(res, ref) - True - -However, :func:`scipy.special.xlog1py` is numerically favorable for small :math:`y`, -when explicit addition of ``1`` would lead to loss of precision due to floating -point truncation error. - - >>> x = 2.5 - >>> y = 1e-18 - >>> x * np.log(1 + y) - np.float64(0.0) - >>> sp.special.xlog1py(x, y) - np.float64(2.5e-18) - -Many special functions also have "logarithmized" variants. For instance, -the gamma function :math:`\Gamma(\cdot)` is related to the factorial -function by :math:`n! = \Gamma(n + 1)`, but it extends the domain from the -positive integers to the complex plane. - - >>> x = np.arange(10) - >>> np.allclose(sp.special.gamma(x + 1), sp.special.factorial(x)) - True - >>> sp.special.gamma(5) < sp.special.gamma(5.5) < sp.special.gamma(6) - np.True_ - -The factorial function grows quickly, and so the gamma function overflows -for moderate values of the argument. However, sometimes only the logarithm -of the gamma function is needed. In such cases, we can compute the logarithm -of the gamma function directly using :func:`scipy.special.gammaln`. - - >>> x = [5, 50, 500] - >>> np.log(sp.special.gamma(x)) - array([ 3.17805383, 144.56574395, inf]) - >>> sp.special.gammaln(x) - array([ 3.17805383, 144.56574395, 2605.11585036]) - -Such functions can often be used when the intermediate components of a -calculation would overflow or underflow, but the final result would not. -For example, suppose we wish to compute the ratio -:math:`\Gamma(500)/\Gamma(499)`. - - >>> a = sp.special.gamma(500) - >>> b = sp.special.gamma(499) - >>> a, b - (np.float64(inf), np.float64(inf)) - -Both the numerator and denominator overflow, so performing :math:`a / b` will -not return the result we seek. However, the magnitude of the result should -be moderate, so the use of logarithms comes to mind. Combining the identities -:math:`\log(a/b) = \log(a) - \log(b)` and :math:`\exp(\log(x)) = x`, -we get: - - >>> log_a = sp.special.gammaln(500) - >>> log_b = sp.special.gammaln(499) - >>> log_res = log_a - log_b - >>> res = np.exp(log_res) - >>> res - np.float64(499.0000000...) - -Similarly, suppose we wish to compute the difference -:math:`\log(\Gamma(500) - \Gamma(499))`. For this, we use -:func:`scipy.special.logsumexp`, which computes -:math:`\log(\exp(x) + \exp(y))` using a numerical trick that avoids overflow. - - >>> res = sp.special.logsumexp([log_a, log_b], - ... b=[1, -1]) # weights the terms of the sum - >>> res - np.float64(2605.113844343...) - -For more information about these and many other special functions, see -the documentation of :mod:`scipy.special`. - -.. _scipy_linalg: - -Linear algebra operations: :mod:`scipy.linalg` ----------------------------------------------- - -:mod:`scipy.linalg` provides a Python interface to efficient, compiled -implementations of standard linear algebra operations: the BLAS (Basic -Linear Algebra Subroutines) and LAPACK (Linear Algebra PACKage) libraries. - -For example, the :func:`scipy.linalg.det` function computes the determinant -of a square matrix:: - - >>> import scipy as sp - >>> arr = np.array([[1, 2], - ... [3, 4]]) - >>> sp.linalg.det(arr) - np.float64(-2.0) - -Mathematically, the solution of a linear system :math:`Ax = b` is :math:`x = A^{-1}b`, -but explicit inversion of a matrix is numerically unstable and should be avoided. -Instead, use :func:`scipy.linalg.solve`:: - - >>> A = np.array([[1, 2], - ... [2, 3]]) - >>> b = np.array([14, 23]) - >>> x = sp.linalg.solve(A, b) - >>> x - array([4., 5.]) - >>> np.allclose(A @ x, b) - True - -Linear systems with special structure can often be solved more efficiently -than more general systems. For example, systems with triangular matrices -can be solved using :func:`scipy.linalg.solve_triangular`:: - - >>> A_upper = np.triu(A) - >>> A_upper - array([[1, 2], - [0, 3]]) - >>> np.allclose(sp.linalg.solve_triangular(A_upper, b, lower=False), - ... sp.linalg.solve(A_upper, b)) - True - -:mod:`scipy.linalg` also features matrix factorizations/decompositions -such as the singular value decomposition. - - >>> A = np.array([[1, 2], - ... [2, 3]]) - >>> U, s, Vh = sp.linalg.svd(A) - >>> s # singular values - array([4.23606798, 0.23606798]) - -The original matrix can be recovered by matrix multiplication of the -factors:: - - >>> S = np.diag(s) # convert to diagonal matrix before matrix multiplication - >>> A2 = U @ S @ Vh - >>> np.allclose(A2, A) - True - >>> A3 = (U * s) @ Vh # more efficient: use array math broadcasting rules! - >>> np.allclose(A3, A) - True - -Many other decompositions (e.g. LU, Cholesky, QR), solvers for structured -linear systems (e.g. triangular, circulant), eigenvalue problem algorithms, -matrix functions (e.g. matrix exponential), and routines for special matrix -creation (e.g. block diagonal, toeplitz) are available in :mod:`scipy.linalg`. - - -.. _intro_scipy_interpolate: - -Interpolation: :mod:`scipy.interpolate` ---------------------------------------- - -:mod:`scipy.interpolate` is used for fitting a function -- an "interpolant" -- -to experimental or computed data. Once fit, the interpolant can be used to -approximate the underlying function at intermediate points; it can also be used -to compute the integral, derivative, or inverse of the function. - -Some kinds of interpolants, known as "smoothing splines", are designed to -generate smooth curves from noisy data. For example, suppose we have -the following data:: - - >>> rng = np.random.default_rng(27446968) - >>> measured_time = np.linspace(0, 2*np.pi, 20) - >>> function = np.sin(measured_time) - >>> noise = rng.normal(loc=0, scale=0.1, size=20) - >>> measurements = function + noise - - -:func:`scipy.interpolate.make_smoothing_spline` can be used to form a curve -similar to the underlying sine function. - - >>> smoothing_spline = sp.interpolate.make_smoothing_spline(measured_time, measurements) - >>> interpolation_time = np.linspace(0, 2*np.pi, 200) - >>> smooth_results = smoothing_spline(interpolation_time) - -.. image:: auto_examples/images/sphx_glr_plot_interpolation_001.png - :target: auto_examples/plot_interpolation.html - :scale: 60 - :align: right - -On the other hand, if the data are not noisy, it may be desirable to pass -exactly through each point. - - >>> interp_spline = sp.interpolate.make_interp_spline(measured_time, function) - >>> interp_results = interp_spline(interpolation_time) - -.. image:: auto_examples/images/sphx_glr_plot_interpolation_002.png - :target: auto_examples/plot_interpolation.html - :scale: 60 - :align: right - -The ``derivative`` and ``antiderivative`` methods of the result object can be used -for differentiation and integration. For the latter, the constant of integration is -assumed to be zero, but we can "wrap" the antiderivative to include a nonzero -constant of integration. - - >>> d_interp_spline = interp_spline.derivative() - >>> d_interp_results = d_interp_spline(interpolation_time) - >>> i_interp_spline = lambda t: interp_spline.antiderivative()(t) - 1 - >>> i_interp_results = i_interp_spline(interpolation_time) - -.. image:: auto_examples/images/sphx_glr_plot_interpolation_003.png - :target: auto_examples/plot_interpolation.html - :scale: 60 - :align: right - -For functions that are monotonic on an interval (e.g. :math:`\sin` from :math:`\pi/2` -to :math:`3\pi/2`), we can reverse the arguments of ``make_interp_spline`` to -interpolate the inverse function. Because the first argument is expected to be -monotonically *increasing*, we also reverse the order of elements in the arrays -with :func:`numpy.flip`. - - >>> i = (measured_time > np.pi/2) & (measured_time < 3*np.pi/2) - >>> inverse_spline = sp.interpolate.make_interp_spline(np.flip(function[i]), - ... np.flip(measured_time[i])) - >>> inverse_spline(0) - array(3.14159265) - -See the summary exercise on :ref:`summary_exercise_stat_interp` for a more -advanced spline interpolation example, and read the -`SciPy interpolation tutorial `__ -and the :mod:`scipy.interpolate` documentation for much more information. - -Optimization and fit: :mod:`scipy.optimize` -------------------------------------------- - -:mod:`scipy.optimize` provides algorithms for root finding, curve fitting, -and more general optimization. - -Root Finding -............ - -:func:`scipy.optimize.root_scalar` attempts to find a root of a specified -scalar-valued function (i.e., an argument at which the function value is zero). -Like many :mod:`scipy.optimize` functions, the function needs an initial -guess of the solution, which the algorithm will refine until it converges or -recognizes failure. We also provide the derivative to improve the rate of -convergence. - - >>> def f(x): - ... return (x-1)*(x-2) - >>> def df(x): - ... return 2*x - 3 - >>> x0 = 0 # guess - >>> res = sp.optimize.root_scalar(f, x0=x0, fprime=df) - >>> res - converged: True - flag: converged - function_calls: 12 - iterations: 6 - root: 1.0 - method: newton - -.. warning:: - - None of the functions in :mod:`scipy.optimize` that accept a guess are - guaranteed to converge for all possible guesses! (For example, try - ``x0=1.5`` in the example above, where the derivative of the function is - exactly zero.) If this occurs, try a different guess, adjust the options - (like providing a ``bracket`` as shown below), or consider whether SciPy - offers a more appropriate method for the problem. - -Note that only one the root at ``1.0`` is found. By inspection, we can tell -that there is a second root at ``2.0``. We can direct the function toward a -particular root by changing the guess or by passing a bracket that contains -only the root we seek. - - >>> res = sp.optimize.root_scalar(f, bracket=(1.5, 10)) - >>> res.root - 2.0 - -For multivariate problems, use :func:`scipy.optimize.root`. - - >>> def f(x): - ... # intersection of unit circle and line from origin - ... return [x[0]**2 + x[1]**2 - 1, - ... x[1] - x[0]] - >>> res = sp.optimize.root(f, x0=[0, 0]) - >>> np.allclose(f(res.x), 0, atol=1e-10) - True - >>> np.allclose(res.x, np.sqrt(2)/2) - True - -Over-constrained problems can be solved in the least-squares -sense using :func:`scipy.optimize.root` with ``method='lm'`` -(Levenberg-Marquardt). - - >>> def f(x): - ... # intersection of unit circle, line from origin, and parabola - ... return [x[0]**2 + x[1]**2 - 1, - ... x[1] - x[0], - ... x[1] - x[0]**2] - >>> res = sp.optimize.root(f, x0=[1, 1], method='lm') - >>> res.success - True - >>> res.x - array([0.76096066, 0.66017736]) - -See the documentation of :func:`scipy.optimize.root_scalar` -and :func:`scipy.optimize.root` for a variety of other solution -algorithms and options. - -Curve fitting -............. - -.. image:: auto_examples/images/sphx_glr_plot_curve_fit_001.png - :target: auto_examples/plot_curve_fit.html - :align: right - :scale: 50 - -Suppose we have data that is sinusoidal but noisy:: - - >>> x = np.linspace(-5, 5, num=50) # 50 values between -5 and 5 - >>> noise = 0.01 * np.cos(100 * x) - >>> a, b = 2.9, 1.5 - >>> y = a * np.cos(b * x) + noise - -We can approximate the underlying amplitude, frequency, and phase -from the data by least squares curve fitting. To begin, we write -a function that accepts the independent variable as the first -argument and all parameters to fit as separate arguments:: - - >>> def f(x, a, b, c): - ... return a * np.sin(b * x + c) - -.. image:: auto_examples/images/sphx_glr_plot_curve_fit_002.png - :target: auto_examples/plot_curve_fit.html - :align: right - :scale: 50 - -We then use :func:`scipy.optimize.curve_fit` to find :math:`a` and :math:`b`:: - - >>> params, _ = sp.optimize.curve_fit(f, x, y, p0=[2, 1, 3]) - >>> params - array([2.900026 , 1.50012043, 1.57079633]) - >>> ref = [a, b, np.pi/2] # what we'd expect - >>> np.allclose(params, ref, rtol=1e-3) - True - -.. raw:: html - -
- -.. topic:: Exercise: Curve fitting of temperature data - :class: green - - The temperature extremes in Alaska for each month, starting in January, are - given by (in degrees Celsius):: - - max: 17, 19, 21, 28, 33, 38, 37, 37, 31, 23, 19, 18 - min: -62, -59, -56, -46, -32, -18, -9, -13, -25, -46, -52, -58 - - 1. Plot these temperature extremes. - 2. Define a function that can describe min and max temperatures. - Hint: this function has to have a period of 1 year. - Hint: include a time offset. - 3. Fit this function to the data with :func:`scipy.optimize.curve_fit`. - 4. Plot the result. Is the fit reasonable? If not, why? - 5. Is the time offset for min and max temperatures the same within the fit - accuracy? - - :ref:`solution ` - - -Optimization -............ - -.. image:: auto_examples/images/sphx_glr_plot_optimize_example1_001.png - :target: auto_examples/plot_optimize_example1.html - :align: right - :scale: 50 - -Suppose we wish to minimize the scalar-valued function of a single -variable :math:`f(x) = x^2 + 10 \sin(x)`:: - - >>> def f(x): - ... return x**2 + 10*np.sin(x) - >>> x = np.arange(-5, 5, 0.1) - >>> plt.plot(x, f(x)) - [] - >>> plt.show() - -We can see that the function has a local minimizer near :math:`x = 3.8` -and a global minimizer near :math:`x = -1.3`, but -the precise values cannot be determined from the plot. - -The most appropriate function for this purpose is -:func:`scipy.optimize.minimize_scalar`. -Since we know the approximate locations of the minima, we will provide -bounds that restrict the search to the vicinity of the global minimum. - - >>> res = sp.optimize.minimize_scalar(f, bounds=(-2, -1)) - >>> res - message: Solution found. - success: True - status: 0 - fun: -7.9458233756... - x: -1.306440997... - nit: 8 - nfev: 8 - >>> res.fun == f(res.x) - np.True_ - -If we did not already know the approximate location of the global minimum, -we could use one of SciPy's global minimizers, such as -:func:`scipy.optimize.differential_evolution`. We are required to pass -``bounds``, but they do not need to be tight. - - >>> bounds=[(-5, 5)] # list of lower, upper bound for each variable - >>> res = sp.optimize.differential_evolution(f, bounds=bounds) - >>> res # doctest:+SKIP - message: Optimization terminated successfully. - success: True - fun: -7.9458233756... - x: [-1.306e+00] - nit: 6 - nfev: 111 - jac: [ 9.948e-06] - -For multivariate optimization, a good choice for many problems is -:func:`scipy.optimize.minimize`. -Suppose we wish to find the minimum of a quadratic function of two -variables, :math:`f(x_0, x_1) = (x_0-1)^2 + (x_1-2)^2`. - - >>> def f(x): - ... return (x[0] - 1)**2 + (x[1] - 2)**2 - -Like :func:`scipy.optimize.root`, :func:`scipy.optimize.minimize` -requires a guess ``x0``. (Note that this is the initial value of -*both* variables rather than the value of the variable we happened to -label :math:`x_0`.) - - >>> res = sp.optimize.minimize(f, x0=[0, 0]) - >>> res - message: Optimization terminated successfully. - success: True - status: 0 - fun: 1.70578...e-16 - x: [ 1.000e+00 2.000e+00] - nit: 2 - jac: [ 3.219e-09 -8.462e-09] - hess_inv: [[ 9.000e-01 -2.000e-01] - [-2.000e-01 6.000e-01]] - nfev: 9 - njev: 3 - -.. sidebar:: **Maximization?** - - Is :func:`scipy.optimize.minimize` restricted to the solution of - minimization problems? Nope! To solve a maximization problem, - simply minimize the *negative* of the original objective function. - -This barely scratches the surface of SciPy's optimization features, which -include mixed integer linear programming, constrained nonlinear programming, -and the solution of assignment problems. For much more information, see the -documentation of :mod:`scipy.optimize` and the advanced chapter -:ref:`mathematical_optimization`. - -.. topic:: Exercise: 2-D minimization - :class: green - - .. image:: auto_examples/images/sphx_glr_plot_2d_minimization_002.png - :target: auto_examples/plot_2d_minimization.html - :align: right - :scale: 50 - - The six-hump camelback function - - .. math:: f(x, y) = (4 - 2.1x^2 + \frac{x^4}{3})x^2 + xy + (4y^2 - 4)y^2 - - has multiple local minima. Find a global minimum (there is more than one, - each with the same value of the objective function) and at least one other - local minimum. - - Hints: - - - Variables can be restricted to :math:`-2 < x < 2` and :math:`-1 < y < 1`. - - :func:`numpy.meshgrid` and :func:`matplotlib.pyplot.imshow` can help - with visualization. - - Try minimizing with :func:`scipy.optimize.minimize` with an initial - guess of :math:`(x, y) = (0, 0)`. Does it find the global minimum, or - converge to a local minimum? What about other initial guesses? - - Try minimizing with :func:`scipy.optimize.differential_evolution`. - - :ref:`solution ` - -See the summary exercise on :ref:`summary_exercise_optimize` for another, more -advanced example. - - -Statistics and random numbers: :mod:`scipy.stats` -------------------------------------------------- - -.. Comment to make doctest pass - >>> np.random.seed(0) - - -:mod:`scipy.stats` contains fundamental tools for statistics in Python. - -Statistical Distributions -......................... - -Consider a random variable distributed according to the standard normal. -We draw a sample consisting of 100000 observations from the random variable. -The normalized histogram of the sample is an estimator of the random -variable's probability density function (PDF):: - - >>> dist = sp.stats.norm(loc=0, scale=1) # standard normal distribution - >>> sample = dist.rvs(size=100000) # "random variate sample" - >>> plt.hist(sample, bins=50, density=True, label='normalized histogram') # doctest: +SKIP - >>> x = np.linspace(-5, 5) - >>> plt.plot(x, dist.pdf(x), label='PDF') - [] - >>> plt.legend() - - -.. image:: auto_examples/images/sphx_glr_plot_normal_distribution_001.png - :target: auto_examples/plot_normal_distribution.html - :scale: 70 - -.. sidebar:: **Distribution objects and frozen distributions** - - Each of the 100+ :mod:`scipy.stats` distribution families is represented by an - *object* with a `__call__` method. Here, we call the :class:`scipy.stats.norm` - object to specify its location and scale, and it returns a *frozen* - distribution: a particular element of a distribution family with all - parameters fixed. The frozen distribution object has methods to compute - essential functions of the particular distribution. - -Suppose we knew that the sample had been drawn from a distribution belonging -to the family of normal distributions, but we did not know the particular -distribution's location (mean) and scale (standard deviation). We perform -maximum likelihood estimation of the unknown parameters using the -distribution family's ``fit`` method:: - - >>> loc, scale = sp.stats.norm.fit(sample) - >>> loc - np.float64(0.0015767005...) - >>> scale - np.float64(0.9973396878...) - -Since we know the true parameters of the distribution from which the -sample was drawn, we are not surprised that these estimates are similar. - -.. topic:: Exercise: Probability distributions - :class: green - - Generate 1000 random variates from a gamma distribution with a shape - parameter of 1. *Hint: the shape parameter is passed as the first - argument when freezing the distribution*. Plot the histogram of the - sample, and overlay the distribution's PDF. Estimate the shape parameter - from the sample using the ``fit`` method. - - Extra: the distributions have many useful methods. Explore them - using tab completion. Plot the cumulative density function of the - distribution, and compute the variance. - -Sample Statistics and Hypothesis Tests -...................................... - -The sample mean is an estimator of the mean of the distribution from which -the sample was drawn:: - - >>> np.mean(sample) - np.float64(0.001576700508...) - -NumPy includes some of the most fundamental sample statistics (e.g. -:func:`numpy.mean`, :func:`numpy.var`, :func:`numpy.percentile`); -:mod:`scipy.stats` includes many more. For instance, the geometric mean -is a common measure of central tendency for data that tends to be -distributed over many orders of magnitude. - - >>> sp.stats.gmean(2**sample) - np.float64(1.0010934829...) - -SciPy also includes a variety of hypothesis tests that produce a -sample statistic and a p-value. For instance, suppose we wish to -test the null hypothesis that ``sample`` was drawn from a normal -distribution:: - - >>> res = sp.stats.normaltest(sample) - >>> res.statistic - np.float64(5.20841759...) - >>> res.pvalue - np.float64(0.07396163283...) - -Here, ``statistic`` is a sample statistic that tends to be high for -samples that are drawn from non-normal distributions. ``pvalue`` is -the probability of observing such a high value of the statistic for -a sample that *has* been drawn from a normal distribution. If the -p-value is unusually small, this may be taken as evidence that -``sample`` was *not* drawn from the normal distribution. Our statistic -and p-value are moderate, so the test is inconclusive. - -There are many other features of :mod:`scipy.stats`, including circular -statistics, quasi-Monte Carlo methods, and resampling methods. -For much more information, see the documentation of :mod:`scipy.stats` -and the advanced chapter :ref:`statistics `. - -Numerical integration: :mod:`scipy.integrate` ---------------------------------------------- - -Quadrature -.......... - -Suppose we wish to compute the definite integral -:math:`\int_0^{\pi / 2} \sin(t) dt` numerically. :func:`scipy.integrate.quad` -chooses one of several adaptive techniques depending on the parameters, and -is therefore the recommended first choice for integration of function of a single variable:: - - >>> integral, error_estimate = sp.integrate.quad(np.sin, 0, np.pi/2) - >>> np.allclose(integral, 1) # numerical result ~ analytical result - True - >>> abs(integral - 1) < error_estimate # actual error < estimated error - True - -Other functions for *numerical quadrature*, including integration of -multivariate functions and approximating integrals from samples, are available -in :mod:`scipy.integrate`. - -Initial Value Problems -...................... - -:mod:`scipy.integrate` also features routines for integrating `Ordinary -Differential Equations (ODE) -`__. -For example, :func:`scipy.integrate.solve_ivp` integrates ODEs of the form: - -.. math:: - - \frac{dy}{dt} = f(t, y(t)) - -from an initial time :math:`t_0` and initial state :math:`y(t=t_0)=t_0` to a final -time :math:`t_f` or until an event occurs (e.g. a specified state is reached). - -As an introduction, consider the initial value problem given by -:math:`\frac{dy}{dt} = -2 y` and the initial condition :math:`y(t=0) = 1` on -the interval :math:`t = 0 \dots 4`. We begin by defining a callable that -computes :math:`f(t, y(t))` given the current time and state. - - >>> def f(t, y): - ... return -2 * y - -Then, to compute ``y`` as a function of time:: - - >>> t_span = (0, 4) # time interval - >>> t_eval = np.linspace(*t_span) # times at which to evaluate `y` - >>> y0 = [1,] # initial state - >>> res = sp.integrate.solve_ivp(f, t_span=t_span, y0=y0, t_eval=t_eval) - -and plot the result:: - - >>> plt.plot(res.t, res.y[0]) - [] - >>> plt.xlabel('t') - Text(0.5, ..., 't') - >>> plt.ylabel('y') - Text(..., 0.5, 'y') - >>> plt.title('Solution of Initial Value Problem') - Text(0.5, 1.0, 'Solution of Initial Value Problem') - -.. image:: auto_examples/images/sphx_glr_plot_solve_ivp_simple_001.png - :target: auto_examples/plot_solve_ivp_simple.html - :scale: 70 - :align: right - -Let us integrate a more complex ODE: a `damped -spring-mass oscillator -`__. -The position of a mass attached to a spring obeys the 2nd order ODE -:math:`\ddot{y} + 2 \zeta \omega_0 \dot{y} + \omega_0^2 y = 0` with natural frequency -:math:`\omega_0 = \sqrt{k/m}`, damping ratio :math:`\zeta = c/(2 m \omega_0)`, -spring constant :math:`k`, mass :math:`m`, and damping coefficient :math:`c`. - -Before using :func:`scipy.integrate.solve_ivp`, the 2nd order ODE -needs to be transformed into a system of first-order ODEs. Note that - -.. math:: - - \frac{dy}{dt} = \dot{y} - \frac{d\dot{y}}{dt} = \ddot{y} = -(2 \zeta \omega_0 \dot{y} + \omega_0^2 y) - -If we define :math:`z = [z_0, z_1]` where :math:`z_0 = y` and :math:`z_1 = \dot{y}`, -then the first order equation: - -.. math:: - - \frac{dz}{dt} = - \begin{bmatrix} - \frac{dz_0}{dt} \\ - \frac{dz_1}{dt} - \end{bmatrix} = - \begin{bmatrix} - z_1 \\ - -(2 \zeta \omega_0 z_1 + \omega_0^2 z_0) - \end{bmatrix} - -is equivalent to the original second order equation. - -We set:: - - >>> m = 0.5 # kg - >>> k = 4 # N/m - >>> c = 0.4 # N s/m - >>> zeta = c / (2 * m * np.sqrt(k/m)) - >>> omega = np.sqrt(k / m) - -and define the function that computes :math:`\dot{z} = f(t, z(t))`:: - - >>> def f(t, z, zeta, omega): - ... return (z[1], -2.0 * zeta * omega * z[1] - omega**2 * z[0]) - -.. image:: auto_examples/images/sphx_glr_plot_solve_ivp_damped_spring_mass_001.png - :target: auto_examples/plot_solve_ivp_damped_spring_mass.html - :scale: 70 - :align: right - -Integration of the system follows:: - - >>> t_span = (0, 10) - >>> t_eval = np.linspace(*t_span, 100) - >>> z0 = [1, 0] - >>> res = sp.integrate.solve_ivp(f, t_span, z0, t_eval=t_eval, - ... args=(zeta, omega), method='LSODA') - -.. tip:: - - With the option `method='LSODA'`, :func:`scipy.integrate.solve_ivp` uses the LSODA - (Livermore Solver for Ordinary Differential equations with Automatic method switching - for stiff and non-stiff problems). See the `ODEPACK Fortran library`_ for more details. - -.. _`ODEPACK Fortran library` : https://people.sc.fsu.edu/~jburkardt/f77_src/odepack/odepack.html - -.. seealso:: **Partial Differental Equations** - - There is no Partial Differential Equations (PDE) solver in SciPy. - Some Python packages for solving PDE's are available, such as fipy_ - or SfePy_. - -.. _fipy: https://www.ctcms.nist.gov/fipy/ -.. _SfePy: https://sfepy.org/doc/ - -Fast Fourier transforms: :mod:`scipy.fft` ---------------------------------------------- - -The :mod:`scipy.fft` module computes fast Fourier transforms (FFTs) -and offers utilities to handle them. Some important functions are: - -* :func:`scipy.fft.fft` to compute the FFT - -* :func:`scipy.fft.fftfreq` to generate the sampling frequencies - -* :func:`scipy.fft.ifft` to compute the inverse FFT, from frequency - space to signal space - -| - -As an illustration, a (noisy) input signal (``sig``), and its FFT:: - - >>> sig_fft = sp.fft.fft(sig) # doctest:+SKIP - >>> freqs = sp.fft.fftfreq(sig.size, d=time_step) # doctest:+SKIP - - -.. |signal_fig| image:: auto_examples/images/sphx_glr_plot_fftpack_001.png - :target: auto_examples/plot_fftpack.html - :scale: 60 - -.. |fft_fig| image:: auto_examples/images/sphx_glr_plot_fftpack_002.png - :target: auto_examples/plot_fftpack.html - :scale: 60 - -===================== ===================== -|signal_fig| |fft_fig| -===================== ===================== -**Signal** **FFT** -===================== ===================== - -As the signal comes from a real-valued function, the Fourier transform is -symmetric. - -The peak signal frequency can be found with ``freqs[power.argmax()]`` - -.. image:: auto_examples/images/sphx_glr_plot_fftpack_003.png - :target: auto_examples/plot_fftpack.html - :scale: 60 - :align: right - - -Setting the Fourier component above this frequency to zero and inverting -the FFT with :func:`scipy.fft.ifft`, gives a filtered signal. - -.. note:: - - The code of this example can be found :ref:`here ` - -.. topic:: `numpy.fft` - - NumPy also has an implementation of FFT (:mod:`numpy.fft`). However, - the SciPy one - should be preferred, as it uses more efficient underlying implementations. - -| - -**Fully worked examples:** - -.. |periodicity_finding| image:: auto_examples/solutions/images/sphx_glr_plot_periodicity_finder_001.png - :scale: 50 - :target: auto_examples/solutions/plot_periodicity_finder.html - -.. |image_blur| image:: auto_examples/solutions/images/sphx_glr_plot_image_blur_002.png - :scale: 50 - :target: auto_examples/solutions/plot_image_blur.html - -=================================================================================================================== =================================================================================================================== -Crude periodicity finding (:ref:`link `) Gaussian image blur (:ref:`link `) -=================================================================================================================== =================================================================================================================== -|periodicity_finding| |image_blur| -=================================================================================================================== =================================================================================================================== - -| - -.. topic:: Exercise: Denoise moon landing image - :class: green - - .. image:: ../../data/moonlanding.png - :scale: 70 - - 1. Examine the provided image :download:`moonlanding.png - <../../data/moonlanding.png>`, which is heavily contaminated with periodic - noise. In this exercise, we aim to clean up the noise using the - Fast Fourier Transform. - - 2. Load the image using :func:`matplotlib.pyplot.imread`. - - 3. Find and use the 2-D FFT function in :mod:`scipy.fft`, and plot the - spectrum (Fourier transform of) the image. Do you have any trouble - visualising the spectrum? If so, why? - - 4. The spectrum consists of high and low frequency components. The noise is - contained in the high-frequency part of the spectrum, so set some of - those components to zero (use array slicing). - - 5. Apply the inverse Fourier transform to see the resulting image. - - :ref:`Solution ` - -| - - -Signal processing: :mod:`scipy.signal` --------------------------------------- - -.. tip:: - - :mod:`scipy.signal` is for typical signal processing: 1D, - regularly-sampled signals. - -.. image:: auto_examples/images/sphx_glr_plot_resample_001.png - :target: auto_examples/plot_resample.html - :scale: 65 - :align: right - - -**Resampling** :func:`scipy.signal.resample`: resample a signal to `n` -points using FFT. :: - - >>> t = np.linspace(0, 5, 100) - >>> x = np.sin(t) - - >>> x_resampled = sp.signal.resample(x, 25) - - >>> plt.plot(t, x) - [] - >>> plt.plot(t[::4], x_resampled, 'ko') - [] - -.. tip:: - - Notice how on the side of the window the resampling is less accurate - and has a rippling effect. - - This resampling is different from the :ref:`interpolation - ` provided by :mod:`scipy.interpolate` as it - only applies to regularly sampled data. - - -.. image:: auto_examples/images/sphx_glr_plot_detrend_001.png - :target: auto_examples/plot_detrend.html - :scale: 65 - :align: right - -**Detrending** :func:`scipy.signal.detrend`: remove linear trend from signal:: - - >>> t = np.linspace(0, 5, 100) - >>> rng = np.random.default_rng() - >>> x = t + rng.normal(size=100) - - >>> x_detrended = sp.signal.detrend(x) - - >>> plt.plot(t, x) - [] - >>> plt.plot(t, x_detrended) - [] - -.. raw:: html - -
- -**Filtering**: -For non-linear filtering, :mod:`scipy.signal` has filtering (median -filter :func:`scipy.signal.medfilt`, Wiener :func:`scipy.signal.wiener`), -but we will discuss this in the image section. - -.. tip:: - - :mod:`scipy.signal` also has a full-blown set of tools for the design - of linear filter (finite and infinite response filters), but this is - out of the scope of this tutorial. - - -**Spectral analysis**: -:func:`scipy.signal.spectrogram` compute a spectrogram --frequency -spectrums over consecutive time windows--, while -:func:`scipy.signal.welch` comptes a power spectrum density (PSD). - -.. |chirp_fig| image:: auto_examples/images/sphx_glr_plot_spectrogram_001.png - :target: auto_examples/plot_spectrogram.html - :scale: 45 - -.. |spectrogram_fig| image:: auto_examples/images/sphx_glr_plot_spectrogram_002.png - :target: auto_examples/plot_spectrogram.html - :scale: 45 - -.. |psd_fig| image:: auto_examples/images/sphx_glr_plot_spectrogram_003.png - :target: auto_examples/plot_spectrogram.html - :scale: 45 - -|chirp_fig| |spectrogram_fig| |psd_fig| - -Image manipulation: :mod:`scipy.ndimage` ------------------------------------------ - -.. include:: image_processing/image_processing.rst - :start-line: 1 - - -Summary exercises on scientific computing ------------------------------------------ - -The summary exercises use mainly NumPy, SciPy and Matplotlib. They provide some -real-life examples of scientific computing with Python. Now that the basics of -working with NumPy and SciPy have been introduced, the interested user is -invited to try these exercises. - -.. only:: html - - **Exercises:** - -.. toctree:: - :maxdepth: 1 - - summary-exercises/stats-interpolate.rst - summary-exercises/optimize-fit.rst - summary-exercises/image-processing.rst - -.. only:: html - - **Proposed solutions:** - -.. toctree:: - :maxdepth: 1 - - summary-exercises/answers_image_processing.rst - -.. include the gallery. Skip the first line to avoid the "orphan" - declaration - -.. include:: auto_examples/index.rst - :start-line: 1 - - -.. seealso:: **References to go further** - - * Some chapters of the `advanced `__ and the - `packages and applications `__ parts of the SciPy - lectures - - * The `SciPy cookbook `__ - -.. compile solutions, but don't list them explicitly -.. toctree:: - :hidden: - - solutions.rst diff --git a/intro/scipy/scipy_examples.md b/intro/scipy/scipy_examples.md new file mode 100644 index 000000000..84aeb22cc --- /dev/null +++ b/intro/scipy/scipy_examples.md @@ -0,0 +1,671 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +# Examples for Scipy introduction + +This is a collection of examples for introductory Scipy. See the [Scipy page](scipy) for the main introduction. + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt + +import scipy as sp +``` + +```{code-cell} +:tags: [hide-input] + +# Machinery to store outputs for later use. +# This is for rendering in the Jupyter Book version of these pages. +from myst_nb import glue +``` + +(optimize-example1)= + +## Finding the minimum of a smooth function + + + ++++ + +Demos various methods to find the minimum of a function. + +```{code-cell} +def f(x): + return x**2 + 10 * np.sin(x) + +x = np.arange(-5, 5, 0.1) +plt.plot(x, f(x)); +``` + +```{code-cell} +# Now find the minimum with a few methods +# The default (Nelder Mead) +print(sp.optimize.minimize(f, x0=0)) +``` + +## Other examples + ++++ + +(connect-measurements)= + +### connect_measurements + + + +Demo connected components + +Extracting and labeling connected components in a 2D array + +```{code-cell} +# Generate some binary data +x, y = np.indices((100, 100)) +sig = ( + np.sin(2 * np.pi * x / 50.0) + * np.sin(2 * np.pi * y / 50.0) + * (1 + x * y / 50.0**2) ** 2 +) +mask = sig > 1 +``` + +```{code-cell} +plt.figure(figsize=(7, 3.5)) +plt.subplot(1, 2, 1) +plt.imshow(sig) +plt.axis("off") +plt.title("sig") + +plt.subplot(1, 2, 2) +plt.imshow(mask, cmap="gray") +plt.axis("off") +plt.title("mask") +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.9); +``` + +Label connected components + +```{code-cell} +labels, nb = sp.ndimage.label(mask) +``` + +```{code-cell} +plt.figure(figsize=(3.5, 3.5)) +plt.imshow(labels) +plt.title("label") +plt.axis("off") +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.9); +``` + +```{code-cell} +# Extract the 4th connected component, and crop the array around it +sl = sp.ndimage.find_objects(labels == 4) +plt.figure(figsize=(3.5, 3.5)) +plt.imshow(sig[sl[0]]) +plt.title("Cropped connected component") +plt.axis("off") +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.9); +``` + +(image-filters)= + +### image_filters + + + +Plot filtering on images + +Demo filtering for denoising of images. + +```{code-cell} +# Load some data +face = sp.datasets.face(gray=True) +face = face[:512, -512:] # crop out square on right +``` + +```{code-cell} +# Apply a variety of filters +noisy_face = np.copy(face).astype(float) +rng = np.random.default_rng() +noisy_face += face.std() * 0.5 * rng.standard_normal(face.shape) +blurred_face = sp.ndimage.gaussian_filter(noisy_face, sigma=3) +median_face = sp.ndimage.median_filter(noisy_face, size=5) +wiener_face = sp.signal.wiener(noisy_face, (5, 5)) +``` + +```{code-cell} +plt.figure(figsize=(12, 3.5)) +plt.subplot(141) +plt.imshow(noisy_face, cmap="gray") +plt.axis("off") +plt.title("noisy") +plt.subplot(142) +plt.imshow(blurred_face, cmap="gray") +plt.axis("off") +plt.title("Gaussian filter") +plt.subplot(143) +plt.imshow(median_face, cmap="gray") +plt.axis("off") +plt.title("median filter") +plt.subplot(144) +plt.imshow(wiener_face, cmap="gray") +plt.title("Wiener filter") +plt.axis("off") +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.99); +``` + +(image-transform)= + +### image_transform + + + +Plot geometrical transformations on images + +Demo geometrical transformations of images. + +```{code-cell} +# Load some data +face = sp.datasets.face(gray=True) + +# Apply a variety of transformations +shifted_face = sp.ndimage.shift(face, (50, 50)) +shifted_face2 = sp.ndimage.shift(face, (50, 50), mode="nearest") +rotated_face = sp.ndimage.rotate(face, 30) +cropped_face = face[50:-50, 50:-50] +zoomed_face = sp.ndimage.zoom(face, 2) +zoomed_face.shape +``` + +```{code-cell} +plt.figure(figsize=(15, 3)) +plt.subplot(151) +plt.imshow(shifted_face, cmap="gray") +plt.axis("off") +plt.subplot(152) +plt.imshow(shifted_face2, cmap="gray") +plt.axis("off") +plt.subplot(153) +plt.imshow(rotated_face, cmap="gray") +plt.axis("off") +plt.subplot(154) +plt.imshow(cropped_face, cmap="gray") +plt.axis("off") +plt.subplot(155) +plt.imshow(zoomed_face, cmap="gray") +plt.axis("off") +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.99); +``` + +(mathematical-morpho)= + +### Mathematical morphology + + + +Demo mathematical morphology + +A basic demo of binary opening and closing. + +```{code-cell} +# Generate some binary data +rng = np.random.default_rng(0) +a = np.zeros((50, 50)) +a[10:-10, 10:-10] = 1 +a += 0.25 * rng.standard_normal(a.shape) +mask = a >= 0.5 +``` + +```{code-cell} +# Apply mathematical morphology +opened_mask = sp.ndimage.binary_opening(mask) +closed_mask = sp.ndimage.binary_closing(opened_mask) +``` + +```{code-cell} +# Plot +plt.figure(figsize=(12, 3.5)) +plt.subplot(141) +plt.imshow(a, cmap="gray") +plt.axis("off") +plt.title("a") +plt.subplot(142) +plt.imshow(mask, cmap="gray") +plt.axis("off") +plt.title("mask") +plt.subplot(143) +plt.imshow(opened_mask, cmap="gray") +plt.axis("off") +plt.title("opened_mask") +plt.subplot(144) +plt.imshow(closed_mask, cmap="gray") +plt.title("closed_mask") +plt.axis("off") +plt.subplots_adjust(wspace=0.05, left=0.01, bottom=0.01, right=0.99, top=0.99); +``` + +(optimize-example2)= + +### optimize_example2 + + + +Minima and roots of a function + +Demos finding minima and roots of a function. + +Define the function: + +```{code-cell} +x = np.arange(-10, 10, 0.1) + +def f(x): + return x**2 + 10 * np.sin(x) +``` + +Find minima: + +```{code-cell} +# Global optimization +grid = (-10, 10, 0.1) +xmin_global = sp.optimize.brute(f, (grid,)) +print(f"Global minima found {xmin_global}") +``` + +```{code-cell} +# Constrain optimization +xmin_local = sp.optimize.fminbound(f, 0, 10) +print(f"Local minimum found {xmin_local}") +``` + +Root finding + +```{code-cell} +root = sp.optimize.root(f, 1) # our initial guess is 1 +print(f"First root found {root.x}") +root2 = sp.optimize.root(f, -2.5) +print(f"Second root found {root2.x}") +``` + +Plot function, minima, and roots + +```{code-cell} +fig = plt.figure(figsize=(6, 4)) +ax = fig.add_subplot(111) +# Plot the function +ax.plot(x, f(x), "b-", label="f(x)") +# Plot the minima +xmins = np.array([xmin_global[0], xmin_local]) +ax.plot(xmins, f(xmins), "go", label="Minima") +# Plot the roots +roots = np.array([root.x, root2.x]) +ax.plot(roots, f(roots), "kv", label="Roots") +# Decorate the figure +ax.legend(loc="best") +ax.set_xlabel("x") +ax.set_ylabel("f(x)") +ax.axhline(0, color="gray"); +``` + +(scipy-fft-example)= + +### Plotting and manipulating FFTs for filtering + +Plot the power of the FFT of a signal and inverse FFT back to reconstruct +a signal. + +This example demonstrates {func}`scipy.fft.fft`, {func}`scipy.fft.fftfreq` and +{func}`scipy.fft.ifft`. It implements a basic filter that is very suboptimal, +and should not be used. + +#### Generate the signal + +```{code-cell} +# Seed the random number generator +rng = np.random.default_rng(27446968) + +time_step = 0.02 +period = 5.0 + +time_vec = np.arange(0, 20, time_step) +sig = np.sin(2 * np.pi / period * time_vec) + 0.5 * rng.normal(size=time_vec.size) +``` + +```{code-cell} +plt.figure(figsize=(6, 5)) +plt.plot(time_vec, sig, label="Original signal") + +# Store the figure for the book pages. +glue('original_signal_fig', plt.gcf(), display=False) +``` + +#### Compute and plot the power + +```{code-cell} +# The FFT of the signal +sig_fft = sp.fft.fft(sig) + +# And the power (sig_fft is of complex dtype) +power = np.abs(sig_fft) ** 2 + +# The corresponding frequencies +sample_freq = sp.fft.fftfreq(sig.size, d=time_step) +``` + +#### Find the peak frequency + +We can focus on only the positive frequencies. + +```{code-cell} +pos_mask = np.where(sample_freq > 0) +freqs = sample_freq[pos_mask] +peak_freq = freqs[power[pos_mask].argmax()] +``` + +Check that the found peak frequency does indeed correspond to the frequency +that we generate the signal with: + +```{code-cell} +np.allclose(peak_freq, 1.0 / period) +``` + +```{code-cell} +# Plot the FFT power +plt.figure(figsize=(6, 5)) +plt.plot(sample_freq, power) +plt.xlabel("Frequency [Hz]") +plt.ylabel("power") +# An inner plot to show the peak frequency +axes = plt.axes((0.55, 0.3, 0.3, 0.5)) +plt.title("Peak frequency") +plt.plot(freqs[:8], power[pos_mask][:8]) +plt.setp(axes, yticks=[]) + +# Store the figure for the book pages. +glue('fft_of_signal_fig', plt.gcf(), display=False) +``` + +`scipy.signal.find_peaks_cwt` can also be used for more advanced peak +detection. + +#### Remove all the high frequencies + +We now remove all the high frequencies and transform back from frequencies to +signal. + +```{code-cell} +high_freq_fft = sig_fft.copy() +high_freq_fft[np.abs(sample_freq) > peak_freq] = 0 +filtered_sig = sp.fft.ifft(high_freq_fft) +``` + +```{code-cell} +plt.figure(figsize=(6, 5)) +plt.plot(time_vec, sig, label="Original signal") +plt.plot(time_vec, filtered_sig, linewidth=3, label="Filtered signal") +plt.xlabel("Time [s]") +plt.ylabel("Amplitude") +plt.legend(loc="best") + +# Store the figure for the book pages. +glue('fft_filter_fig', plt.gcf(), display=False) +``` + +**Note** This is actually a bad way of creating a filter: such a brutal +cut-off in frequency space does not control distortion on the signal. + +Filters should be created using the SciPy filter design code. + ++++ + +(scipy-spectrogram-example)= + +### Spectrogram, power spectral density + + + +Demo spectrogram and power spectral density on a frequency chirp. + +Generate a chirp signal: + +```{code-cell} +# Seed the random number generator +np.random.seed(0) +``` + +```{code-cell} +time_step = 0.01 +time_vec = np.arange(0, 70, time_step) + +# A signal with a small frequency chirp +sig = np.sin(0.5 * np.pi * time_vec * (1 + 0.1 * time_vec)) +``` + +```{code-cell} +plt.figure(figsize=(8, 5)) +plt.plot(time_vec, sig) + +# Store the figure for the book pages. +glue('chirp_fig', plt.gcf(), display=False) +``` + +Compute and plot the spectrogram + +The spectrum of the signal on consecutive time windows + +```{code-cell} +freqs, times, spectrogram = sp.signal.spectrogram(sig) +``` + +```{code-cell} +plt.figure(figsize=(5, 4)) +plt.imshow(spectrogram, aspect="auto", cmap="hot_r", origin="lower") +plt.title("Spectrogram") +plt.ylabel("Frequency band") +plt.xlabel("Time window") +plt.tight_layout(); + +# Store the figure for the book pages. +glue('spectrogram_fig', plt.gcf(), display=False) +``` + +Next we compute and plot the power spectral density (PSD) + +The power of the signal per frequency band: + +```{code-cell} +freqs, psd = sp.signal.welch(sig) +``` + +```{code-cell} +plt.figure(figsize=(5, 4)) +plt.semilogx(freqs, psd) +plt.title("PSD: power spectral density") +plt.xlabel("Frequency") +plt.ylabel("Power") +plt.tight_layout(); +# Store the figure for the book pages. +glue('psd_fig', plt.gcf(), display=False) +``` + +(t-test)= + +### t_test + + + +Comparing 2 sets of samples from Gaussians + +```{code-cell} +# Generates 2 sets of observations +rng = np.random.default_rng(27446968) +samples1 = rng.normal(0, size=1000) +samples2 = rng.normal(1, size=1000) +``` + +```{code-cell} +# Compute a histogram of the sample +bins = np.linspace(-4, 4, 30) +histogram1, bins = np.histogram(samples1, bins=bins, density=True) +histogram2, bins = np.histogram(samples2, bins=bins, density=True) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(6, 4)) +plt.hist(samples1, bins=bins, density=True, label="Samples 1") # type: ignore[arg-type] +plt.hist(samples2, bins=bins, density=True, label="Samples 2") # type: ignore[arg-type] +plt.legend(loc="best"); +``` + +(eg-image-blur)= + +### Simple image blur by convolution with a Gaussian kernel + + + ++++ + +Blur an image ({download}`data/elephant.png`) using a +Gaussian kernel. + +Convolution is easy to perform with FFT: convolving two signals boils +down to multiplying their FFTs (and performing an inverse FFT). + +The original image: + +```{code-cell} +# read image +img = plt.imread("data/elephant.png") +plt.figure() +plt.imshow(img); +``` + +Prepare an Gaussian convolution kernel + +```{code-cell} +# First a 1-D Gaussian +t = np.linspace(-10, 10, 30) +bump = np.exp(-0.1 * t**2) +bump /= np.trapezoid(bump) # normalize the integral to 1 + +# make a 2-D kernel out of it +kernel = bump[:, np.newaxis] * bump[np.newaxis, :] +``` + +Implement convolution via FFT + +```{code-cell} +# Padded Fourier transform, with the same shape as the image +# We use {func}`scipy.fft.fft2` to have a 2D FFT +kernel_ft = sp.fft.fft2(kernel, s=img.shape[:2], axes=(0, 1)) + +# convolve +img_ft = sp.fft.fft2(img, axes=(0, 1)) + +# the 'newaxis' is to match to color direction +img2_ft = kernel_ft[:, :, np.newaxis] * img_ft +img2 = sp.fft.ifft2(img2_ft, axes=(0, 1)).real + +# clip values to range +img2 = np.clip(img2, 0, 1) +``` + +```{code-cell} +# plot output +plt.figure() +plt.imshow(img2); + +# Store figure for use in main page. +glue("blur_fig", plt.gcf(), display=False) +``` + +Further exercise (only if you are familiar with this stuff): + +A "wrapped border" appears in the upper left and top edges of the +image. This is because the padding is not done correctly, and does +not take the kernel size into account (so the convolution "flows out +of bounds of the image"). Try to remove this artifact. + ++++ + +A function to do it: {func}`scipy.signal.fftconvolve` + +The above exercise was only for didactic reasons: there exists a +function in Scipy that will do this for us, and probably do a better +job: {func}`scipy.signal.fftconvolve` + +```{code-cell} +# mode='same' is there to enforce the same output shape as input arrays +# (ie avoid border effects). +img3 = sp.signal.fftconvolve(img, kernel[:, :, np.newaxis], mode="same") +plt.figure() +plt.imshow(img3); +``` + +Note that we still have a decay to zero at the border of the image. +Using {func}`scipy.ndimage.gaussian_filter` would get rid of this +artifact. + ++++ + +(eg-periodicity-finder)= + +### Crude periodicity finding + + + +Discover the periods in evolution of animal populations +({download}`data/populations.txt`) + +Load the data: + +```{code-cell} +data = np.loadtxt("data/populations.txt") +years = data[:, 0] +populations = data[:, 1:] +``` + +Plot the data: + +```{code-cell} +plt.figure() +plt.plot(years, populations * 1e-3) +plt.xlabel("Year") +plt.ylabel(r"Population number ($\cdot10^3$)") +plt.legend(["hare", "lynx", "carrot"], loc=1); + +# Store figure for use in main page. +glue("periodicity_fig", plt.gcf(), display=False) +``` + +```{code-cell} +# Plot its periods +ft_populations = sp.fft.fft(populations, axis=0) +frequencies = sp.fft.fftfreq(populations.shape[0], years[1] - years[0]) +periods = 1 / frequencies +``` + +```{code-cell} +plt.figure() +plt.plot(periods, abs(ft_populations) * 1e-3, "o") +plt.xlim(0, 22) +plt.xlabel("Period") +plt.ylabel(r"Power ($\cdot10^3$)"); +``` + +There's probably a period of around 10 years (obvious from the +plot), but for this crude a method, there's not enough data to say +much more. diff --git a/intro/scipy/solutions.rst b/intro/scipy/solutions.rst deleted file mode 100644 index 43ec0b4a7..000000000 --- a/intro/scipy/solutions.rst +++ /dev/null @@ -1,105 +0,0 @@ -=========== -Solutions -=========== - - -.. _pi_wallis: - -The Pi Wallis Solution ----------------------- - -Compute the decimals of Pi using the Wallis formula: - -.. literalinclude:: solutions/pi_wallis.py - -.. _quick_sort: - -The Quicksort Solution ----------------------- - -Implement the quicksort algorithm, as defined by wikipedia: - -:: - - function quicksort(array) - var list less, greater - if length(array) ≤ 1 - return array - select and remove a pivot value pivot from array - for each x in array - if x ≤ pivot then append x to less - else append x to greater - return concatenate(quicksort(less), pivot, quicksort(greater)) - -.. literalinclude:: solutions/quick_sort.py - -.. _fibonacci: - -Fibonacci sequence ------------------- - -Write a function that displays the ``n`` first terms of the Fibonacci -sequence, defined by: - -* ``u_0 = 1; u_1 = 1`` -* ``u_(n+2) = u_(n+1) + u_n`` - -:: - - >>> def fib(n): - ... """Display the n first terms of Fibonacci sequence""" - ... a, b = 0, 1 - ... i = 0 - ... while i < n: - ... print(b) - ... a, b = b, a+b - ... i +=1 - ... - >>> fib(10) - 1 - 1 - 2 - 3 - 5 - 8 - 13 - 21 - 34 - 55 - -.. _dir_sort: - -The Directory Listing Solution ------------------------------- - -Implement a script that takes a directory name as argument, and -returns the list of '.py' files, sorted by name length. - -**Hint:** try to understand the docstring of list.sort - -.. literalinclude:: solutions/dir_sort.py - -.. _data_file: - -The Data File I/O Solution --------------------------- - -Write a function that will load the column of numbers in ``data.txt`` -and calculate the min, max and sum values. - -Data file: - -.. literalinclude:: solutions/data.txt - -Solution: - -.. literalinclude:: solutions/data_file.py - -.. _path_site: - -The PYTHONPATH Search Solution ------------------------------- - -Write a program to search your PYTHONPATH for the module ``site.py``. - -.. literalinclude:: solutions/path_site.py diff --git a/intro/scipy/solutions/data_file.py b/intro/scipy/solutions/data_file.py deleted file mode 100644 index 79614a91f..000000000 --- a/intro/scipy/solutions/data_file.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -=================== -I/O script example -=================== - -Script to read in a column of numbers and calculate the min, max and sum. - -Data is stored in data.txt. -""" - - -def load_data(filename): - fp = open(filename) - data_string = fp.read() - fp.close() - - data = [] - for x in data_string.split(): - # Data is read in as a string. We need to convert it to floats - data.append(float(x)) - - # Could instead use the following one line with list comprehensions! - # data = [float(x) for x in data_string.split()] - return data - - -if __name__ == "__main__": - data = load_data("data.txt") - # Python provides these basic math functions - print(f"min: {min(data):f}") - print(f"max: {max(data):f}") - print(f"sum: {sum(data):f}") diff --git a/intro/scipy/solutions/pi_wallis.py b/intro/scipy/solutions/pi_wallis.py deleted file mode 100644 index 4e9fab6cd..000000000 --- a/intro/scipy/solutions/pi_wallis.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -The correction for the calculation of pi using the Wallis formula. -""" - -from functools import reduce - - -pi = 3.14159265358979312 - -my_pi = 1.0 - -for i in range(1, 100000): - my_pi *= 4 * i**2 / (4 * i**2 - 1.0) - -my_pi *= 2 - -print(pi) -print(my_pi) -print(abs(pi - my_pi)) - -############################################################################### -num = 1 -den = 1 -for i in range(1, 100000): - tmp = 4 * i * i - num *= tmp - den *= tmp - 1 - -better_pi = 2 * (num / den) - -print(pi) -print(better_pi) -print(abs(pi - better_pi)) -print(abs(my_pi - better_pi)) - -############################################################################### -# Solution in a single line using more advanced constructs (reduce, lambda, -# list comprehensions -print( - 2 - * reduce( - lambda x, y: x * y, - [float(4 * (i**2)) / ((4 * (i**2)) - 1) for i in range(1, 100000)], - ) -) diff --git a/intro/scipy/solutions/quick_sort.py b/intro/scipy/solutions/quick_sort.py deleted file mode 100644 index 84c4f5f59..000000000 --- a/intro/scipy/solutions/quick_sort.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -Implement the quick sort algorithm. -""" - - -def qsort(lst): - """Quick sort: returns a sorted copy of the list.""" - if len(lst) <= 1: - return lst - pivot, rest = lst[0], lst[1:] - - # Could use list comprehension: - # less_than = [ lt for lt in rest if lt < pivot ] - - less_than = [] - for lt in rest: - if lt < pivot: - less_than.append(lt) - - # Could use list comprehension: - # greater_equal = [ ge for ge in rest if ge >= pivot ] - - greater_equal = [] - for ge in rest: - if ge >= pivot: - greater_equal.append(ge) - return qsort(less_than) + [pivot] + qsort(greater_equal) - - -# And now check that qsort does sort: -assert qsort(range(10)) == range(10) -assert qsort(range(10)[::-1]) == range(10) -assert qsort([1, 4, 2, 5, 3]) == sorted([1, 4, 2, 5, 3]) diff --git a/intro/scipy/summary-exercises/answers_image_processing.md b/intro/scipy/summary-exercises/answers_image_processing.md new file mode 100644 index 000000000..62419bad1 --- /dev/null +++ b/intro/scipy/summary-exercises/answers_image_processing.md @@ -0,0 +1,136 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +(image-answers)= + +# Example of solution for the image processing exercise: unmolten grains in glass + +![](../image_processing/MV_HFV_012.jpg) + + + +## Open the image file + +Open the image file `MV_HFV_012.jpg` and display it. Browse through the +keyword arguments in the docstring of `imshow` to display the image with the +"right" orientation (origin in the bottom left corner, and not the upper left +corner as for standard arrays). + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +import scipy as sp +``` + +```{code-cell} +dat = plt.imread('data/MV_HFV_012.jpg') +``` + +## Crop the image + +to remove the lower panel with measure information. + +```{code-cell} +dat = dat[:-60] +``` + +## Filter + +Slightly filter the image with a median filter in order to refine its +histogram. Check how the histogram changes. + +```{code-cell} +filtdat = sp.ndimage.median_filter(dat, size=(7,7)) +hi_dat = np.histogram(dat, bins=np.arange(256)) +hi_filtdat = np.histogram(filtdat, bins=np.arange(256)) +``` + +![](../image_processing/exo_histos.png) + + + +## Determine thresholds + +Using the histogram of the filtered image, determine thresholds that allow to +define masks for sand pixels, glass pixels and bubble pixels. Other option +(homework): write a function that determines automatically the thresholds from +the minima of the histogram. + +```{code-cell} +void = filtdat <= 50 +sand = np.logical_and(filtdat > 50, filtdat <= 114) +glass = filtdat > 114 +``` + +## Display + +Display an image in which the three phases are colored with three different +colors. + +```{code-cell} +phases = void.astype(int) + 2*glass.astype(int) + 3*sand.astype(int) +``` + +![](../image_processing/three_phases.png) + + + +## Clean + +Use mathematical morphology to clean the different phases. + +```{code-cell} +sand_op = sp.ndimage.binary_opening(sand, iterations=2) +``` + +## Remove small grains + +Attribute labels to all bubbles and sand grains, and remove from the sand mask +grains that are smaller than 10 pixels. To do so, use `sp.ndimage.sum` or +`np.bincount` to compute the grain sizes. + +```{code-cell} +sand_labels, sand_nb = sp.ndimage.label(sand_op) +sand_areas = np.array(sp.ndimage.sum(sand_op, sand_labels, np.arange(sand_labels.max()+1))) +mask = sand_areas > 100 +remove_small_sand = mask[sand_labels.ravel()].reshape(sand_labels.shape) +``` + +![](../image_processing/sands.png) + + + +## Bubble size + +Compute the mean size of bubbles. + +```{code-cell} +bubbles_labels, bubbles_nb = sp.ndimage.label(void) +bubbles_areas = np.bincount(bubbles_labels.ravel())[1:] +mean_bubble_size = bubbles_areas.mean() +median_bubble_size = np.median(bubbles_areas) +mean_bubble_size, median_bubble_size +``` diff --git a/intro/scipy/summary-exercises/answers_image_processing.rst b/intro/scipy/summary-exercises/answers_image_processing.rst deleted file mode 100644 index f95a440d7..000000000 --- a/intro/scipy/summary-exercises/answers_image_processing.rst +++ /dev/null @@ -1,79 +0,0 @@ - -.. only:: html - - >>> import numpy as np - >>> import matplotlib.pyplot as plt - >>> import scipy as sp - -.. _image-answers: - -Example of solution for the image processing exercise: unmolten grains in glass -=============================================================================== - - -.. image:: ../image_processing/MV_HFV_012.jpg - :align: center - -1. Open the image file MV_HFV_012.jpg and display it. Browse through the - keyword arguments in the docstring of ``imshow`` to display the image - with the "right" orientation (origin in the bottom left corner, and not - the upper left corner as for standard arrays). :: - - >>> dat = plt.imread('data/MV_HFV_012.jpg') - -2. Crop the image to remove the lower panel with measure information. :: - - >>> dat = dat[:-60] - -3. Slightly filter the image with a median filter in order to refine its - histogram. Check how the histogram changes. :: - - >>> filtdat = sp.ndimage.median_filter(dat, size=(7,7)) - >>> hi_dat = np.histogram(dat, bins=np.arange(256)) - >>> hi_filtdat = np.histogram(filtdat, bins=np.arange(256)) - - .. image:: ../image_processing/exo_histos.png - :align: center - -4. Using the histogram of the filtered image, determine thresholds that - allow to define masks for sand pixels, glass pixels and bubble pixels. - Other option (homework): write a function that determines automatically - the thresholds from the minima of the histogram. :: - - >>> void = filtdat <= 50 - >>> sand = np.logical_and(filtdat > 50, filtdat <= 114) - >>> glass = filtdat > 114 - -5. Display an image in which the three phases are colored with three - different colors. :: - - >>> phases = void.astype(int) + 2*glass.astype(int) + 3*sand.astype(int) - - .. image:: ../image_processing/three_phases.png - :align: center - -6. Use mathematical morphology to clean the different phases. :: - - >>> sand_op = sp.ndimage.binary_opening(sand, iterations=2) - -7. Attribute labels to all bubbles and sand grains, and remove from the - sand mask grains that are smaller than 10 pixels. To do so, use - ``sp.ndimage.sum`` or ``np.bincount`` to compute the grain sizes. :: - - >>> sand_labels, sand_nb = sp.ndimage.label(sand_op) - >>> sand_areas = np.array(sp.ndimage.sum(sand_op, sand_labels, np.arange(sand_labels.max()+1))) - >>> mask = sand_areas > 100 - >>> remove_small_sand = mask[sand_labels.ravel()].reshape(sand_labels.shape) - - .. image:: ../image_processing/sands.png - :align: center - - -8. Compute the mean size of bubbles. :: - - >>> bubbles_labels, bubbles_nb = sp.ndimage.label(void) - >>> bubbles_areas = np.bincount(bubbles_labels.ravel())[1:] - >>> mean_bubble_size = bubbles_areas.mean() - >>> median_bubble_size = np.median(bubbles_areas) - >>> mean_bubble_size, median_bubble_size - (np.float64(1699.875), np.float64(65.0)) diff --git a/intro/scipy/summary-exercises/data b/intro/scipy/summary-exercises/data new file mode 120000 index 000000000..a4ced2ff1 --- /dev/null +++ b/intro/scipy/summary-exercises/data @@ -0,0 +1 @@ +../../../data \ No newline at end of file diff --git a/intro/scipy/summary-exercises/examples/plot_cumulative_wind_speed_prediction.py b/intro/scipy/summary-exercises/examples/plot_cumulative_wind_speed_prediction.py index 699268c9f..ae063b689 100644 --- a/intro/scipy/summary-exercises/examples/plot_cumulative_wind_speed_prediction.py +++ b/intro/scipy/summary-exercises/examples/plot_cumulative_wind_speed_prediction.py @@ -3,7 +3,7 @@ ================================ Generate the image cumulative-wind-speed-prediction.png -for the interpolate section of scipy.rst. +for the interpolate section of the Scipy tutorial page. """ import numpy as np diff --git a/intro/scipy/summary-exercises/image-processing.md b/intro/scipy/summary-exercises/image-processing.md new file mode 100644 index 000000000..49f9fa6e2 --- /dev/null +++ b/intro/scipy/summary-exercises/image-processing.md @@ -0,0 +1,45 @@ +--- +orphan: true +--- + +(summary-exercise-image-processing)= + +# Image processing application: counting bubbles and unmolten grains + +![](../image_processing/MV_HFV_012.jpg) + +## Statement of the problem + +1. Open the image file MV_HFV_012.jpg and display it. Browse through the + keyword arguments in the docstring of `imshow` to display the image with + the "right" orientation (origin in the bottom left corner, and not the + upper left corner as for standard arrays). + + This Scanning Element Microscopy image shows a glass sample (light gray + matrix) with some bubbles (on black) and unmolten sand grains (dark gray). + We wish to determine the fraction of the sample covered by these three + phases, and to estimate the typical size of sand grains and bubbles, their + sizes, etc. + +2. Crop the image to remove the lower panel with measure information. + +3. Slightly filter the image with a median filter in order to refine its + histogram. Check how the histogram changes. + +4. Using the histogram of the filtered image, determine thresholds that allow + to define masks for sand pixels, glass pixels and bubble pixels. Other + option (homework): write a function that determines automatically the + thresholds from the minima of the histogram. + +5. Display an image in which the three phases are colored with three + different colors. + +6. Use mathematical morphology to clean the different phases. + +7. Attribute labels to all bubbles and sand grains, and remove from the sand + mask grains that are smaller than 10 pixels. To do so, use `ndimage.sum` or + `np.bincount` to compute the grain sizes. + +8. Compute the mean size of bubbles. + +See [image processing answers](image-answers) for a proposed solution. diff --git a/intro/scipy/summary-exercises/image-processing.rst b/intro/scipy/summary-exercises/image-processing.rst deleted file mode 100644 index 899b2e635..000000000 --- a/intro/scipy/summary-exercises/image-processing.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. _summary_exercise_image_processing: - -Image processing application: counting bubbles and unmolten grains ------------------------------------------------------------------- - -.. image:: ../image_processing/MV_HFV_012.jpg - :align: center - -.. only:: latex - -Statement of the problem -.......................... - -1. Open the image file MV_HFV_012.jpg and display it. Browse through the keyword arguments - in the docstring of ``imshow`` to display the image with the "right" orientation (origin - in the bottom left corner, and not the upper left corner as for standard arrays). - - This Scanning Element Microscopy image shows a glass sample (light gray matrix) with some - bubbles (on black) and unmolten sand grains (dark gray). We wish to determine the - fraction of the sample covered by these three phases, and to estimate the typical size of - sand grains and bubbles, their sizes, etc. - -2. Crop the image to remove the lower panel with measure information. - -3. Slightly filter the image with a median filter in order to refine its - histogram. Check how the histogram changes. - -4. Using the histogram of the filtered image, determine thresholds that allow to define - masks for sand pixels, glass pixels and bubble pixels. Other option (homework): write a - function that determines automatically the thresholds from the minima of the histogram. - -5. Display an image in which the three phases are colored with three - different colors. - -6. Use mathematical morphology to clean the different phases. - -7. Attribute labels to all bubbles and sand grains, and remove from the sand mask grains - that are smaller than 10 pixels. To do so, use ``ndimage.sum`` or ``np.bincount`` to - compute the grain sizes. - -8. Compute the mean size of bubbles. diff --git a/intro/scipy/summary-exercises/optimize-fit.md b/intro/scipy/summary-exercises/optimize-fit.md new file mode 100644 index 000000000..703a82880 --- /dev/null +++ b/intro/scipy/summary-exercises/optimize-fit.md @@ -0,0 +1,258 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +(summary-exercise-optimize)= + +# Non linear least squares curve fitting: application to point extraction in topographical lidar data + +The goal of this exercise is to fit a model to some data. The data used in this tutorial are lidar data and are described in details in the following introductory paragraph. If you're impatient and want to practice now, please skip it and go directly to {ref}`opt-fit-first-step`. + +## Introduction + +Lidars systems are optical rangefinders that analyze property of scattered +light to measure distances. Most of them emit a short light impulsion towards +a target and record the reflected signal. This signal is then processed to +extract the distance between the lidar system and the target. + +Topographical lidar systems are such systems embedded in airborne platforms. +They measure distances between the platform and the Earth, so as to deliver +information on the Earth's topography (see [^mallet] for more details). + +[^mallet]: + Mallet, C. and Bretar, F. Full-Waveform Topographic Lidar: + State-of-the-Art. _ISPRS Journal of Photogrammetry and Remote Sensing_ + 64(1), pp.1-16, January 2009 + + +In this tutorial, the goal is to analyze the waveform recorded by the lidar +system [^data]. Such a signal contains peaks whose center and amplitude permit +to compute the position and some characteristics of the hit target. When the +footprint of the laser beam is around 1m on the Earth surface, the beam can +hit multiple targets during the two-way propagation (for example the ground +and the top of a tree or building). The sum of the contributions of each +target hit by the laser beam then produces a complex signal with multiple +peaks, each one containing information about one target. + +One state of the art method to extract information from these data is to +decompose them in a sum of Gaussian functions where each function represents +the contribution of a target hit by the laser beam. + +Therefore, we use the {mod}`scipy.optimize` module to fit a waveform to one or +a sum of Gaussian functions. + +(opt-fit-first-step)= + +## Loading and visualization + +Load the first waveform using: + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import scipy as sp +import matplotlib.pyplot as plt +``` + +```{code-cell} +waveform_1 = np.load('examples/waveform_1.npy') + +# Times for samples. +t = np.arange(len(waveform_1)) +``` + +and visualize it: + +```{code-cell} +fig, ax = plt.subplots(figsize=(8, 6)) +plt.plot(t, waveform_1) +plt.xlabel("Time [ns]") +plt.ylabel("Amplitude [bins]") +``` + +As shown above, this waveform is a 80-bin-length signal with a single peak +with an amplitude of approximately 30 in the 15 nanosecond bin. Additionally, +the base level of noise is approximately 3. These values can be used in the +initial solution. + ++++ + +## Fitting a waveform with a simple Gaussian model + +The signal is very simple and can be modeled as a single Gaussian function and +an offset corresponding to the background noise. To fit the signal with the +function, we must: + +- define the model +- propose an initial solution +- call `scipy.optimize.leastsq` + +### Model + +A Gaussian function defined by + +$$ +B + A \exp\left\{-\left(\frac{t-\mu}{\sigma}\right)^2\right\} +$$ + +can be defined in python by: + +```{code-cell} +def model(t, coeffs): + return coeffs[0] + coeffs[1] * np.exp( - ((t-coeffs[2])/coeffs[3])**2 ) +``` + +where + +- `coeffs[0]` is $B$ (noise) +- `coeffs[1]` is $A$ (amplitude) +- `coeffs[2]` is $\mu$ (center) +- `coeffs[3]` is $\sigma$ (width) + +### Initial solution + +One possible initial solution that we determine by inspection is: + +```{code-cell} +x0 = np.array([3, 30, 15, 1], dtype=float) +``` + +### Fit + +`scipy.optimize.leastsq` minimizes the sum of squares of the function given as +an argument. Basically, the function to minimize is the residuals (the +difference between the data and the model): + +```{code-cell} +def residuals(coeffs, y, t): + return y - model(t, coeffs) +``` + +So let's get our solution by calling {func}`scipy.optimize.leastsq` with the +following arguments: + +- the function to minimize +- an initial solution +- the additional arguments to pass to the function + +```{code-cell} +t = np.arange(len(waveform_1)) +x, flag = sp.optimize.leastsq(residuals, x0, args=(waveform_1, t)) +x +``` + +And visualize the solution: + +```{code-cell} +:tags: [hide-input] + +fig, ax = plt.subplots(figsize=(8, 6)) +plt.plot(t, waveform_1, t, model(t, x)) +plt.xlabel("Time [ns]") +plt.ylabel("Amplitude [bins]") +plt.legend(["Waveform", "Model"]); +``` + +_Remark:_ in fact, you should rather use {func}`scipy.optimize.curve_fit` +which takes the model and the data as arguments, so you don't need to define +the residuals any more. + ++++ + +## Going further + +::: {exercise-start} +:label: opt-complex-lidar-ex +:class: dropdown +::: + +Try with a more complex waveform (for instance `waveform_2.npy`) that contains +three significant peaks. You must adapt the model which is now a sum of +Gaussian functions instead of only one Gaussian peak. + +```{code-cell} +waveform_2 = np.load("examples/waveform_2.npy") + +t = np.arange(len(waveform_2)) + +fig, ax = plt.subplots(figsize=(8, 6)) +plt.plot(t, waveform_2) +plt.xlabel("Time [ns]") +plt.ylabel("Amplitude [bins]"); +``` + +In some cases, writing an explicit function to compute the Jacobian is faster +than letting `leastsq` estimate it numerically. Create a function to compute +the Jacobian of the residuals and use it as an input for `leastsq`. + +When we want to detect very small peaks in the signal, or when the initial +guess is too far from a good solution, the result given by the algorithm is +often not satisfying. Adding constraints to the parameters of the model +enables to overcome such limitations. An example of _a priori_ knowledge we can +add is the sign of our variables (which are all positive). + +::: {exercise-end} +::: + +::: {solution-start} opt-complex-lidar-ex +:class: dropdown +::: + +Generate a chart of the data fitted by Gaussian curve: + +```{code-cell} +def model(t, coeffs): + return ( + coeffs[0] + + coeffs[1] * np.exp(-(((t - coeffs[2]) / coeffs[3]) ** 2)) + + coeffs[4] * np.exp(-(((t - coeffs[5]) / coeffs[6]) ** 2)) + + coeffs[7] * np.exp(-(((t - coeffs[8]) / coeffs[9]) ** 2)) + ) +``` + +```{code-cell} +def residuals(coeffs, y, t): + return y - model(t, coeffs) +``` + +```{code-cell} +waveform_2 = np.load("examples/waveform_2.npy") +t = np.arange(len(waveform_2)) +``` + +```{code-cell} +x0 = np.array([3, 30, 20, 1, 12, 25, 1, 8, 28, 1], dtype=float) +x, flag = sp.optimize.leastsq(residuals, x0, args=(waveform_2, t)) +``` + +```{code-cell} +fig, ax = plt.subplots(figsize=(8, 6)) +plt.plot(t, waveform_2, t, model(t, x)) +plt.xlabel("Time [ns]") +plt.ylabel("Amplitude [bins]") +plt.legend(["Waveform", "Model"]); +``` + +::: {solution-end} +::: + +Further exercise: compare the result of {func}`scipy.optimize.leastsq` and +what you can get with {func}`scipy.optimize.fmin_slsqp` when adding boundary +constraints. + +[^data]: + The data used for this tutorial are part of the demonstration data + available for the [FullAnalyze + software](https://fullanalyze.sourceforge.net) and were kindly provided by + the GIS DRAIX. diff --git a/intro/scipy/summary-exercises/optimize-fit.rst b/intro/scipy/summary-exercises/optimize-fit.rst deleted file mode 100644 index cc9e3ea59..000000000 --- a/intro/scipy/summary-exercises/optimize-fit.rst +++ /dev/null @@ -1,178 +0,0 @@ -.. for doctests - >>> import matplotlib.pyplot as plt - - - -.. _summary_exercise_optimize: - -Non linear least squares curve fitting: application to point extraction in topographical lidar data ---------------------------------------------------------------------------------------------------- - -The goal of this exercise is to fit a model to some data. The data used in this tutorial are lidar data and are described in details in the following introductory paragraph. If you're impatient and want to practice now, please skip it and go directly to :ref:`first_step`. - - -Introduction -~~~~~~~~~~~~ - -Lidars systems are optical rangefinders that analyze property of scattered light -to measure distances. Most of them emit a short light impulsion towards a target -and record the reflected signal. This signal is then processed to extract the -distance between the lidar system and the target. - -Topographical lidar systems are such systems embedded in airborne -platforms. They measure distances between the platform and the Earth, so as to -deliver information on the Earth's topography (see [#mallet]_ for more details). - -.. [#mallet] Mallet, C. and Bretar, F. Full-Waveform Topographic Lidar: State-of-the-Art. *ISPRS Journal of Photogrammetry and Remote Sensing* 64(1), pp.1-16, January 2009 http://dx.doi.org/10.1016/j.isprsjprs.2008.09.007 - -In this tutorial, the goal is to analyze the waveform recorded by the lidar -system [#data]_. Such a signal contains peaks whose center and amplitude permit to -compute the position and some characteristics of the hit target. When the -footprint of the laser beam is around 1m on the Earth surface, the beam can hit -multiple targets during the two-way propagation (for example the ground and the -top of a tree or building). The sum of the contributions of each target hit by -the laser beam then produces a complex signal with multiple peaks, each one -containing information about one target. - -One state of the art method to extract information from these data is to -decompose them in a sum of Gaussian functions where each function represents the -contribution of a target hit by the laser beam. - -Therefore, we use the :mod:`scipy.optimize` module to fit a waveform to one -or a sum of Gaussian functions. - -.. _first_step: - -Loading and visualization -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Load the first waveform using:: - - >>> import numpy as np - >>> waveform_1 = np.load('intro/scipy/summary-exercises/examples/waveform_1.npy') - -and visualize it:: - - >>> import matplotlib.pyplot as plt - >>> t = np.arange(len(waveform_1)) - >>> plt.plot(t, waveform_1) #doctest: +ELLIPSIS - [] - >>> plt.show() - -As shown below, this waveform is a 80-bin-length signal with a single peak -with an amplitude of approximately 30 in the 15 nanosecond bin. Additionally, the -base level of noise is approximately 3. These values can be used in the initial solution. - -.. figure:: auto_examples/images/sphx_glr_plot_optimize_lidar_data_001.png - :align: center - :target: auto_examples/plot_optimize_lidar_data.html - - -Fitting a waveform with a simple Gaussian model -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The signal is very simple and can be modeled as a single Gaussian function and -an offset corresponding to the background noise. To fit the signal with the -function, we must: - -* define the model -* propose an initial solution -* call ``scipy.optimize.leastsq`` - - -Model -^^^^^ - -A Gaussian function defined by - -.. math:: - B + A \exp\left\{-\left(\frac{t-\mu}{\sigma}\right)^2\right\} - -can be defined in python by:: - - >>> def model(t, coeffs): - ... return coeffs[0] + coeffs[1] * np.exp( - ((t-coeffs[2])/coeffs[3])**2 ) - -where - -* ``coeffs[0]`` is :math:`B` (noise) -* ``coeffs[1]`` is :math:`A` (amplitude) -* ``coeffs[2]`` is :math:`\mu` (center) -* ``coeffs[3]`` is :math:`\sigma` (width) - - -Initial solution -^^^^^^^^^^^^^^^^ - -One possible initial solution that we determine by inspection is:: - - >>> x0 = np.array([3, 30, 15, 1], dtype=float) - -Fit -^^^ - -``scipy.optimize.leastsq`` minimizes the sum of squares of the function given as -an argument. Basically, the function to minimize is the residuals (the -difference between the data and the model):: - - >>> def residuals(coeffs, y, t): - ... return y - model(t, coeffs) - -So let's get our solution by calling :func:`scipy.optimize.leastsq` with the -following arguments: - -* the function to minimize -* an initial solution -* the additional arguments to pass to the function - -:: - - >>> import scipy as sp - >>> t = np.arange(len(waveform_1)) - >>> x, flag = sp.optimize.leastsq(residuals, x0, args=(waveform_1, t)) - >>> x - array([ 2.70363, 27.82020, 15.47924, 3.05636]) - -And visualize the solution: - -.. literalinclude:: examples/plot_optimize_lidar_data_fit.py - :lines: 29- - -.. figure:: auto_examples/images/sphx_glr_plot_optimize_lidar_data_fit_001.png - :align: center - :target: auto_examples/plot_optimize_lidar_data_fit.html - - -*Remark:* from scipy v0.8 and above, you should rather use :func:`scipy.optimize.curve_fit` which takes the model and the data as arguments, so you don't need to define the residuals any more. - - - -Going further -~~~~~~~~~~~~~ - -* Try with a more complex waveform (for instance ``waveform_2.npy``) - that contains three significant peaks. You must adapt the model which is - now a sum of Gaussian functions instead of only one Gaussian peak. - -.. figure:: auto_examples/images/sphx_glr_plot_optimize_lidar_complex_data_001.png - :align: center - :target: auto_examples/plot_optimize_lidar_complex_data.html - - -* In some cases, writing an explicit function to compute the Jacobian is faster - than letting ``leastsq`` estimate it numerically. Create a function to compute - the Jacobian of the residuals and use it as an input for ``leastsq``. - -* When we want to detect very small peaks in the signal, or when the initial - guess is too far from a good solution, the result given by the algorithm is - often not satisfying. Adding constraints to the parameters of the model - enables to overcome such limitations. An example of *a priori* knowledge we can - add is the sign of our variables (which are all positive). - -* See the `solution `_. - -* Further exercise: compare the result of :func:`scipy.optimize.leastsq` and what you can - get with :func:`scipy.optimize.fmin_slsqp` when adding boundary constraints. - - -.. [#data] The data used for this tutorial are part of the demonstration data available for the `FullAnalyze software `_ and were kindly provided by the GIS DRAIX. diff --git a/intro/scipy/summary-exercises/stats-interpolate.md b/intro/scipy/summary-exercises/stats-interpolate.md new file mode 100644 index 000000000..01953d952 --- /dev/null +++ b/intro/scipy/summary-exercises/stats-interpolate.md @@ -0,0 +1,232 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +(summary-exercise-stat-interp)= + +# Maximum wind speed prediction at the Sprogø station + +The exercise goal is to predict the maximum wind speed occurring every +50 years even if no measure exists for such a period. The available +data are only measured over 21 years at the Sprogø meteorological +station located in Denmark. First, the statistical steps will be given +and then illustrated with functions from the `scipy.interpolate` module. At +the end the interested readers are invited to compute results from raw data +and in a slightly different approach. + +## Statistical approach + +The annual maxima are supposed to fit a normal probability density +function. However such function is not going to be estimated because +it gives a probability from a wind speed maxima. Finding the maximum wind +speed occurring every 50 years requires the opposite approach, the result +needs to be found from a defined probability. That is the quantile function +role and the exercise goal will be to find it. In the current model, +it is supposed that the maximum wind speed occurring every 50 years is +defined as the upper 2% quantile. + +By definition, the quantile function is the inverse of the cumulative +distribution function. The latter describes the probability distribution +of an annual maxima. In the exercise, the cumulative probability `p_i` +for a given year `i` is defined as `p_i = i/(N+1)` with `N = 21`, +the number of measured years. Thus it will be possible to calculate +the cumulative probability of every measured wind speed maxima. +From those experimental points, the scipy.interpolate module will be +very useful for fitting the quantile function. Finally the 50 years +maxima is going to be evaluated from the cumulative probability +of the 2% quantile. + +## Computing the cumulative probabilities + +The annual wind speeds maxima have already been computed and saved in +the NumPy format in the file {download}`examples/max-speeds.npy`, thus they will be loaded +by using NumPy: + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +``` + +```{code-cell} +max_speeds = np.load('examples/max-speeds.npy') +years_nb = max_speeds.shape[0] +``` + +Following the cumulative probability definition `p_i` from the previous +section, the corresponding values will be: + +```{code-cell} +cprob = (np.arange(years_nb, dtype=np.float32) + 1) / (years_nb + 1) +``` + +and they are assumed to fit the given wind speeds: + +```{code-cell} +sorted_max_speeds = np.sort(max_speeds) +``` + +## Prediction with `UnivariateSpline` + +In this section the quantile function will be estimated by using the +`UnivariateSpline` class which can represent a spline from points. The +default behavior is to build a spline of degree 3 and points can +have different weights according to their reliability. Variants are +`InterpolatedUnivariateSpline` and `LSQUnivariateSpline` on which +errors checking is going to change. In case a 2D spline is wanted, +the `BivariateSpline` class family is provided. All those classes +for 1D and 2D splines use the FITPACK Fortran subroutines, that's why a +lower library access is available through the `splrep` and `splev` +functions for respectively representing and evaluating a spline. +Moreover interpolation functions without the use of FITPACK parameters +are also provided for simpler use. + +For the Sprogø maxima wind speeds, the `UnivariateSpline` will be +used because a spline of degree 3 seems to correctly fit the data: + +```{code-cell} +import scipy as sp + +quantile_func = sp.interpolate.UnivariateSpline(cprob, sorted_max_speeds) +``` + +The quantile function is now going to be evaluated from the full range +of probabilities: + +```{code-cell} +nprob = np.linspace(0, 1, 100) +fitted_max_speeds = quantile_func(nprob) +``` + +In the current model, the maximum wind speed occurring every 50 years is +defined as the upper 2% quantile. As a result, the cumulative probability +value will be: + +```{code-cell} +fifty_prob = 1. - 0.02 +``` + +So the storm wind speed occurring every 50 years can be guessed by: + +```{code-cell} +fifty_wind = quantile_func(fifty_prob) +fifty_wind +``` + +The results are now gathered on a Matplotlib figure: + +```{code-cell} +:tags: [hide-input] + +plt.plot(sorted_max_speeds, cprob, "o") +plt.plot(fitted_max_speeds, nprob, "g--") +plt.plot([fifty_wind], [fifty_prob], "o", ms=8.0, mfc="y", mec="y") +plt.text(30, 0.05, rf"$V_{{50}} = {fifty_wind:.2f} \, m/s$") +plt.plot([fifty_wind, fifty_wind], [plt.axis()[2], fifty_prob], "k--") +plt.xlabel("Annual wind speed maxima [$m/s$]") +plt.ylabel("Cumulative probability") +``` + +## Exercise with the Gumbell distribution + +The interested readers are now invited to make an exercise by using the wind +speeds measured over 21 years. The measurement period is around 90 minutes +(the original period was around 10 minutes but the file size has been reduced +for making the exercise setup easier). The data are stored in NumPy format +inside the file {download}`examples/sprog-windspeeds.npy`. Do not look at the +source code for the plots until you have completed the exercise. + +::: {exercise-start} +:label: gumbel-max-ex +:class: dropdown +::: + +The **first step** will be to find the annual maxima by using NumPy and plot +them as a matplotlib bar figure. + +::: {exercise-end} +::: + +::: {solution-start} gumbel-max-ex +:class: dropdown +::: + +```{code-cell} +years_nb = 21 +wspeeds = np.load("examples/sprog-windspeeds.npy") +max_speeds = np.array([arr.max() for arr in np.array_split(wspeeds, years_nb)]) +``` + +```{code-cell} +plt.bar(np.arange(years_nb) + 1, max_speeds) +plt.axis("tight") +plt.xlabel("Year") +plt.ylabel("Annual wind speed maxima [$m/s$]"); +``` + +::: {solution-end} +::: + +::: {exercise-start} +:label: gumbel-predict-ex +:class: dropdown +::: + +The **second step** will be to use the Gumbell distribution on cumulative +probabilities `p_i` defined as `-log( -log(p_i) )` for fitting a linear +quantile function (remember that you can define the degree of the +`UnivariateSpline`). Plotting the annual maxima versus the Gumbell +distribution should give you the following figure. + +The **last step** will be to find 34.23 m/s for the maximum wind speed +occurring every 50 years. + +::: {exercise-end} +::: + +::: {solution-start} gumbel-predict-ex +:class: dropdown +::: + +This follows on from the exercise above. + +```{code-cell} +def gumbell_dist(arr): + return -np.log(-np.log(arr)) +``` + +```{code-cell} +sorted_max_speeds = np.sort(max_speeds) +cprob = (np.arange(years_nb, dtype=np.float32) + 1) / (years_nb + 1) +gprob = gumbell_dist(cprob) +speed_spline = sp.interpolate.UnivariateSpline(gprob, sorted_max_speeds, k=1) +nprob = gumbell_dist(np.linspace(1e-3, 1 - 1e-3, 100)) +fitted_max_speeds = speed_spline(nprob) +``` + +```{code-cell} +fifty_prob = gumbell_dist(49.0 / 50.0) +fifty_wind = speed_spline(fifty_prob) +``` + +```{code-cell} +plt.plot(sorted_max_speeds, gprob, "o") +plt.plot(fitted_max_speeds, nprob, "g--") +plt.plot([fifty_wind], [fifty_prob], "o", ms=8.0, mfc="y", mec="y") +plt.plot([fifty_wind, fifty_wind], [plt.axis()[2], fifty_prob], "k--") +plt.text(35, -1, rf"$V_{{50}} = {fifty_wind:.2f} \, m/s$") +plt.xlabel("Annual wind speed maxima [$m/s$]") +plt.ylabel("Gumbell cumulative probability"); +``` + +::: {solution-end} +::: diff --git a/intro/scipy/summary-exercises/stats-interpolate.rst b/intro/scipy/summary-exercises/stats-interpolate.rst deleted file mode 100644 index cb531c8c3..000000000 --- a/intro/scipy/summary-exercises/stats-interpolate.rst +++ /dev/null @@ -1,137 +0,0 @@ -.. _summary_exercise_stat_interp: - -Maximum wind speed prediction at the Sprogø station ---------------------------------------------------- -The exercise goal is to predict the maximum wind speed occurring every -50 years even if no measure exists for such a period. The available -data are only measured over 21 years at the Sprogø meteorological -station located in Denmark. First, the statistical steps will be given -and then illustrated with functions from the scipy.interpolate module. -At the end the interested readers are invited to compute results from -raw data and in a slightly different approach. - -Statistical approach -~~~~~~~~~~~~~~~~~~~~ -The annual maxima are supposed to fit a normal probability density -function. However such function is not going to be estimated because -it gives a probability from a wind speed maxima. Finding the maximum wind -speed occurring every 50 years requires the opposite approach, the result -needs to be found from a defined probability. That is the quantile function -role and the exercise goal will be to find it. In the current model, -it is supposed that the maximum wind speed occurring every 50 years is -defined as the upper 2% quantile. - -By definition, the quantile function is the inverse of the cumulative -distribution function. The latter describes the probability distribution -of an annual maxima. In the exercise, the cumulative probability ``p_i`` -for a given year ``i`` is defined as ``p_i = i/(N+1)`` with ``N = 21``, -the number of measured years. Thus it will be possible to calculate -the cumulative probability of every measured wind speed maxima. -From those experimental points, the scipy.interpolate module will be -very useful for fitting the quantile function. Finally the 50 years -maxima is going to be evaluated from the cumulative probability -of the 2% quantile. - -Computing the cumulative probabilities -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The annual wind speeds maxima have already been computed and saved in -the NumPy format in the file :download:`examples/max-speeds.npy`, thus they will be loaded -by using NumPy:: - - >>> import numpy as np - >>> max_speeds = np.load('intro/scipy/summary-exercises/examples/max-speeds.npy') - >>> years_nb = max_speeds.shape[0] - -Following the cumulative probability definition ``p_i`` from the previous -section, the corresponding values will be:: - - >>> cprob = (np.arange(years_nb, dtype=np.float32) + 1)/(years_nb + 1) - -and they are assumed to fit the given wind speeds:: - - >>> sorted_max_speeds = np.sort(max_speeds) - - -Prediction with UnivariateSpline -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In this section the quantile function will be estimated by using the -``UnivariateSpline`` class which can represent a spline from points. The -default behavior is to build a spline of degree 3 and points can -have different weights according to their reliability. Variants are -``InterpolatedUnivariateSpline`` and ``LSQUnivariateSpline`` on which -errors checking is going to change. In case a 2D spline is wanted, -the ``BivariateSpline`` class family is provided. All those classes -for 1D and 2D splines use the FITPACK Fortran subroutines, that's why a -lower library access is available through the ``splrep`` and ``splev`` -functions for respectively representing and evaluating a spline. -Moreover interpolation functions without the use of FITPACK parameters -are also provided for simpler use. - -For the Sprogø maxima wind speeds, the ``UnivariateSpline`` will be -used because a spline of degree 3 seems to correctly fit the data:: - - >>> import scipy as sp - >>> quantile_func = sp.interpolate.UnivariateSpline(cprob, sorted_max_speeds) - -The quantile function is now going to be evaluated from the full range -of probabilities:: - - >>> nprob = np.linspace(0, 1, 100) - >>> fitted_max_speeds = quantile_func(nprob) - -In the current model, the maximum wind speed occurring every 50 years is -defined as the upper 2% quantile. As a result, the cumulative probability -value will be:: - - >>> fifty_prob = 1. - 0.02 - - -So the storm wind speed occurring every 50 years can be guessed by:: - - >>> fifty_wind = quantile_func(fifty_prob) - >>> fifty_wind - array(32.97989825...) - -The results are now gathered on a Matplotlib figure: - -.. figure:: auto_examples/images/sphx_glr_plot_cumulative_wind_speed_prediction_001.png - :align: center - - Solution: :download:`Python source file ` - - -Exercise with the Gumbell distribution -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The interested readers are now invited to make an exercise by using the wind -speeds measured over 21 years. The measurement period is around 90 minutes (the -original period was around 10 minutes but the file size has been reduced for -making the exercise setup easier). The data are stored in NumPy format inside -the file :download:`examples/sprog-windspeeds.npy`. Do not look at -the source code for the plots -until you have completed the exercise. - -* The first step will be to find the annual maxima by using NumPy - and plot them as a matplotlib bar figure. - -.. figure:: auto_examples/images/sphx_glr_plot_sprog_annual_maxima_001.png - :align: center - - Solution: :download:`Python source file ` - - - -* The second step will be to use the Gumbell distribution on cumulative - probabilities ``p_i`` defined as ``-log( -log(p_i) )`` for fitting - a linear quantile function (remember that you can define the degree - of the ``UnivariateSpline``). Plotting the annual maxima versus the - Gumbell distribution should give you the following figure. - -.. figure:: auto_examples/images/sphx_glr_plot_gumbell_wind_speed_prediction_001.png - :align: center - - Solution: :download:`Python source file ` - - - -* The last step will be to find 34.23 m/s for the maximum wind speed - occurring every 50 years. diff --git a/intro/scipy/summary-exercises/stats-interpolate_examples.md b/intro/scipy/summary-exercises/stats-interpolate_examples.md new file mode 100644 index 000000000..0a1da199a --- /dev/null +++ b/intro/scipy/summary-exercises/stats-interpolate_examples.md @@ -0,0 +1,280 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +# Examples for intro/scipy/summary-exercises/stats-interpolate.md + +(the-gumbell-distribution-results)= + +## The Gumbell distribution, results + + + ++++ + +Generate the exercise results on the Gumbell distribution + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +``` + +```{code-cell} +years_nb = 21 +wspeeds = np.load("examples/sprog-windspeeds.npy") +max_speeds = np.array([arr.max() for arr in np.array_split(wspeeds, years_nb)]) +``` + +```{code-cell} +plt.figure() +plt.bar(np.arange(years_nb) + 1, max_speeds) +plt.axis("tight") +plt.xlabel("Year") +plt.ylabel("Annual wind speed maxima [$m/s$]") +``` + +(the-gumbell-distribution)= + +## The Gumbell distribution + + + ++++ + +Generate the exercise results on the Gumbell distribution + +```{code-cell} +import scipy as sp +``` + +```{code-cell} +def gumbell_dist(arr): + return -np.log(-np.log(arr)) +``` + +```{code-cell} +years_nb = 21 +wspeeds = np.load("examples/sprog-windspeeds.npy") +max_speeds = np.array([arr.max() for arr in np.array_split(wspeeds, years_nb)]) +sorted_max_speeds = np.sort(max_speeds) +``` + +```{code-cell} +cprob = (np.arange(years_nb, dtype=np.float32) + 1) / (years_nb + 1) +gprob = gumbell_dist(cprob) +speed_spline = sp.interpolate.UnivariateSpline(gprob, sorted_max_speeds, k=1) +nprob = gumbell_dist(np.linspace(1e-3, 1 - 1e-3, 100)) +fitted_max_speeds = speed_spline(nprob) +``` + +```{code-cell} +fifty_prob = gumbell_dist(49.0 / 50.0) +fifty_wind = speed_spline(fifty_prob) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure() +plt.plot(sorted_max_speeds, gprob, "o") +plt.plot(fitted_max_speeds, nprob, "g--") +plt.plot([fifty_wind], [fifty_prob], "o", ms=8.0, mfc="y", mec="y") +plt.plot([fifty_wind, fifty_wind], [plt.axis()[2], fifty_prob], "k--") +plt.text(35, -1, rf"$V_{{50}} = {fifty_wind:.2f} \, m/s$") +plt.xlabel("Annual wind speed maxima [$m/s$]") +plt.ylabel("Gumbell cumulative probability") +``` + +## Other examples + ++++ + +(cumulative-wind-speed-prediction)= + +### Cumulative wind speed prediction + + + ++++ + +Generate the image cumulative-wind-speed-prediction.png +for the interpolate section of the Scipy tutorial page. + +```{code-cell} +max_speeds = np.load("examples/max-speeds.npy") +years_nb = max_speeds.shape[0] +``` + +```{code-cell} +cprob = (np.arange(years_nb, dtype=np.float32) + 1) / (years_nb + 1) +sorted_max_speeds = np.sort(max_speeds) +speed_spline = sp.interpolate.UnivariateSpline(cprob, sorted_max_speeds) +nprob = np.linspace(0, 1, 100) +fitted_max_speeds = speed_spline(nprob) +``` + +```{code-cell} +fifty_prob = 1.0 - 0.02 +fifty_wind = speed_spline(fifty_prob) +``` + +```{code-cell} +plt.figure() +plt.plot(sorted_max_speeds, cprob, "o") +plt.plot(fitted_max_speeds, nprob, "g--") +plt.plot([fifty_wind], [fifty_prob], "o", ms=8.0, mfc="y", mec="y") +plt.text(30, 0.05, rf"$V_{{50}} = {fifty_wind:.2f} \, m/s$") +plt.plot([fifty_wind, fifty_wind], [plt.axis()[2], fifty_prob], "k--") +plt.xlabel("Annual wind speed maxima [$m/s$]") +plt.ylabel("Cumulative probability") +``` + +(the-lidar-system-data-2-of-2-datasets)= + +### The lidar system, data (2 of 2 datasets) + + + ++++ + +Generate a chart of more complex data recorded by the lidar system + +```{code-cell} +waveform_2 = np.load("examples/waveform_2.npy") +``` + +```{code-cell} +t = np.arange(len(waveform_2)) +``` + +```{code-cell} +:tags: [hide-input] + +fig, ax = plt.subplots(figsize=(8, 6)) +plt.plot(t, waveform_2) +plt.xlabel("Time [ns]") +plt.ylabel("Amplitude [bins]") +``` + +(the-lidar-system-data-and-fit-2-of-2-datasets)= + +### The lidar system, data and fit (2 of 2 datasets) + + + ++++ + +Generate a chart of the data fitted by Gaussian curve + +```{code-cell} +def model(t, coeffs): + return ( + coeffs[0] + + coeffs[1] * np.exp(-(((t - coeffs[2]) / coeffs[3]) ** 2)) + + coeffs[4] * np.exp(-(((t - coeffs[5]) / coeffs[6]) ** 2)) + + coeffs[7] * np.exp(-(((t - coeffs[8]) / coeffs[9]) ** 2)) + ) +``` + +```{code-cell} +def residuals(coeffs, y, t): + return y - model(t, coeffs) +``` + +```{code-cell} +waveform_2 = np.load("examples/waveform_2.npy") +t = np.arange(len(waveform_2)) +``` + +```{code-cell} +x0 = np.array([3, 30, 20, 1, 12, 25, 1, 8, 28, 1], dtype=float) +x, flag = sp.optimize.leastsq(residuals, x0, args=(waveform_2, t)) +``` + +```{code-cell} +:tags: [hide-input] + +fig, ax = plt.subplots(figsize=(8, 6)) +plt.plot(t, waveform_2, t, model(t, x)) +plt.xlabel("Time [ns]") +plt.ylabel("Amplitude [bins]") +plt.legend(["Waveform", "Model"]) +``` + +(the-lidar-system-data-1-of-2-datasets)= + +### The lidar system, data (1 of 2 datasets) + + + ++++ + +Generate a chart of the data recorded by the lidar system + +```{code-cell} +waveform_1 = np.load("examples/waveform_1.npy") +``` + +```{code-cell} +t = np.arange(len(waveform_1)) +``` + +```{code-cell} +:tags: [hide-input] + +fig, ax = plt.subplots(figsize=(8, 6)) +plt.plot(t, waveform_1) +plt.xlabel("Time [ns]") +plt.ylabel("Amplitude [bins]") +``` + +(the-lidar-system-data-and-fit-1-of-2-datasets)= + +### The lidar system, data and fit (1 of 2 datasets) + + + ++++ + +Generate a chart of the data fitted by Gaussian curve + +```{code-cell} +def model(t, coeffs): + return coeffs[0] + coeffs[1] * np.exp(-(((t - coeffs[2]) / coeffs[3]) ** 2)) +``` + +```{code-cell} +def residuals(coeffs, y, t): + return y - model(t, coeffs) +``` + +```{code-cell} +waveform_1 = np.load("examples/waveform_1.npy") +t = np.arange(len(waveform_1)) +``` + +```{code-cell} +x0 = np.array([3, 30, 15, 1], dtype=float) +x, flag = sp.optimize.leastsq(residuals, x0, args=(waveform_1, t)) +x +``` + +```{code-cell} +:tags: [hide-input] + +fig, ax = plt.subplots(figsize=(8, 6)) +plt.plot(t, waveform_1, t, model(t, x)) +plt.xlabel("Time [ns]") +plt.ylabel("Amplitude [bins]") +plt.legend(["Waveform", "Model"]) +``` diff --git a/jl-build-requirements.txt b/jl-build-requirements.txt new file mode 100644 index 000000000..78ba71c9e --- /dev/null +++ b/jl-build-requirements.txt @@ -0,0 +1,5 @@ +# Build requirements +-r requirements.txt +jupyterlite-core +jupyterlite-pyodide-kernel +jupyterlab_server diff --git a/jupytext.toml b/jupytext.toml new file mode 100644 index 000000000..824408058 --- /dev/null +++ b/jupytext.toml @@ -0,0 +1,3 @@ +# https://jupytext.readthedocs.io/en/latest/config.html +# Pair ipynb notebooks to Myst Markdown text notebooks. +formats = "ipynb,md:myst" diff --git a/packages/index.md b/packages/index.md new file mode 100644 index 000000000..f722b3f8c --- /dev/null +++ b/packages/index.md @@ -0,0 +1,12 @@ +--- +orphan: true +--- + +(applications-part)= + +# Introduction to packages and applications + +This part of the _Scientific Python Lectures_ is dedicated to various +scientific packages useful for extended needs. + +See the "Packages and applications" section in the table of contents. diff --git a/packages/index.rst b/packages/index.rst deleted file mode 100644 index 420817638..000000000 --- a/packages/index.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. _applications_part: - -Packages and applications -========================== - -This part of the *Scientific Python Lectures* is dedicated to various -scientific packages useful for extended needs. - -| - - -.. include:: ../includes/big_toc_css.rst - :start-line: 1 - -.. rst-class:: tune - - .. toctree:: - :maxdepth: 3 - - statistics/index.rst - sympy.rst - scikit-image/index.rst - scikit-learn/index.rst diff --git a/packages/scikit-image/examples/README.txt b/packages/scikit-image/examples/README.txt deleted file mode 100644 index 0b7a16d6a..000000000 --- a/packages/scikit-image/examples/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -Examples for the scikit-image chapter -====================================== diff --git a/packages/scikit-image/examples/plot_boundaries.py b/packages/scikit-image/examples/plot_boundaries.py deleted file mode 100644 index 7c6df30f0..000000000 --- a/packages/scikit-image/examples/plot_boundaries.py +++ /dev/null @@ -1,28 +0,0 @@ -""" -Segmentation contours -===================== - -Visualize segmentation contours on original grayscale image. -""" - -from skimage import data, segmentation -from skimage import filters -import matplotlib.pyplot as plt -import numpy as np - -coins = data.coins() -mask = coins > filters.threshold_otsu(coins) -clean_border = segmentation.clear_border(mask).astype(int) - -coins_edges = segmentation.mark_boundaries(coins, clean_border) - -plt.figure(figsize=(8, 3.5)) -plt.subplot(121) -plt.imshow(clean_border, cmap="gray") -plt.axis("off") -plt.subplot(122) -plt.imshow(coins_edges) -plt.axis("off") - -plt.tight_layout() -plt.show() diff --git a/packages/scikit-image/examples/plot_camera.py b/packages/scikit-image/examples/plot_camera.py deleted file mode 100644 index 030b6d1ef..000000000 --- a/packages/scikit-image/examples/plot_camera.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -Displaying a simple image -========================= - -Load and display an image -""" - -import matplotlib.pyplot as plt -from skimage import data - -camera = data.camera() - - -plt.figure(figsize=(4, 4)) -plt.imshow(camera, cmap="gray", interpolation="nearest") -plt.axis("off") - -plt.tight_layout() -plt.show() diff --git a/packages/scikit-image/examples/plot_camera_uint.py b/packages/scikit-image/examples/plot_camera_uint.py deleted file mode 100644 index bb9253e41..000000000 --- a/packages/scikit-image/examples/plot_camera_uint.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Integers can overflow -====================== - -An illustration of overflow problem arising when working with integers -""" - -import matplotlib.pyplot as plt -from skimage import data - -camera = data.camera() -camera_multiply = 3 * camera - -plt.figure(figsize=(8, 4)) -plt.subplot(121) -plt.imshow(camera, cmap="gray", interpolation="nearest") -plt.axis("off") -plt.subplot(122) -plt.imshow(camera_multiply, cmap="gray", interpolation="nearest") -plt.axis("off") - -plt.tight_layout() -plt.show() diff --git a/packages/scikit-image/examples/plot_check.py b/packages/scikit-image/examples/plot_check.py deleted file mode 100644 index 79e7a5b47..000000000 --- a/packages/scikit-image/examples/plot_check.py +++ /dev/null @@ -1,17 +0,0 @@ -""" -Creating an image -================== - -How to create an image with basic NumPy commands : ``np.zeros``, slicing... - -This examples show how to create a simple checkerboard. -""" - -import numpy as np -import matplotlib.pyplot as plt - -check = np.zeros((8, 8)) -check[::2, 1::2] = 1 -check[1::2, ::2] = 1 -plt.matshow(check, cmap="gray") -plt.show() diff --git a/packages/scikit-image/examples/plot_equalize_hist.py b/packages/scikit-image/examples/plot_equalize_hist.py deleted file mode 100644 index 9696b5e1c..000000000 --- a/packages/scikit-image/examples/plot_equalize_hist.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Equalizing the histogram of an image -===================================== - -Histogram equalizing makes images have a uniform histogram. -""" - -from skimage import data, exposure -import matplotlib.pyplot as plt - -camera = data.camera() -camera_equalized = exposure.equalize_hist(camera) - -plt.figure(figsize=(7, 3)) - -plt.subplot(121) -plt.imshow(camera, cmap="gray", interpolation="nearest") -plt.axis("off") -plt.subplot(122) -plt.imshow(camera_equalized, cmap="gray", interpolation="nearest") -plt.axis("off") -plt.tight_layout() -plt.show() diff --git a/packages/scikit-image/examples/plot_features.py b/packages/scikit-image/examples/plot_features.py deleted file mode 100644 index 74cda9f2d..000000000 --- a/packages/scikit-image/examples/plot_features.py +++ /dev/null @@ -1,26 +0,0 @@ -""" -Affine transform -================= - -Warping and affine transforms of images. -""" - -import matplotlib.pyplot as plt - -from skimage import data -from skimage.feature import corner_harris, corner_subpix, corner_peaks -from skimage.transform import warp, AffineTransform - - -tform = AffineTransform(scale=(1.3, 1.1), rotation=1, shear=0.7, translation=(210, 50)) -image = warp(data.checkerboard(), tform.inverse, output_shape=(350, 350)) - -coords = corner_peaks(corner_harris(image), min_distance=5) -coords_subpix = corner_subpix(image, coords, window_size=13) - -plt.gray() -plt.imshow(image, interpolation="nearest") -plt.plot(coords_subpix[:, 1], coords_subpix[:, 0], "+r", markersize=15, mew=5) -plt.plot(coords[:, 1], coords[:, 0], ".b", markersize=7) -plt.axis("off") -plt.show() diff --git a/packages/scikit-image/examples/plot_filter_coins.py b/packages/scikit-image/examples/plot_filter_coins.py deleted file mode 100644 index f44d8324d..000000000 --- a/packages/scikit-image/examples/plot_filter_coins.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Various denoising filters -========================= - -This example compares several denoising filters available in scikit-image: -a Gaussian filter, a median filter, and total variation denoising. -""" - -import numpy as np -import matplotlib.pyplot as plt -from skimage import data -from skimage import filters -from skimage import restoration - -coins = data.coins() -gaussian_filter_coins = filters.gaussian(coins, sigma=2) -med_filter_coins = filters.median(coins, np.ones((3, 3))) -tv_filter_coins = restoration.denoise_tv_chambolle(coins, weight=0.1) - -plt.figure(figsize=(16, 4)) -plt.subplot(141) -plt.imshow(coins[10:80, 300:370], cmap="gray", interpolation="nearest") -plt.axis("off") -plt.title("Image") -plt.subplot(142) -plt.imshow(gaussian_filter_coins[10:80, 300:370], cmap="gray", interpolation="nearest") -plt.axis("off") -plt.title("Gaussian filter") -plt.subplot(143) -plt.imshow(med_filter_coins[10:80, 300:370], cmap="gray", interpolation="nearest") -plt.axis("off") -plt.title("Median filter") -plt.subplot(144) -plt.imshow(tv_filter_coins[10:80, 300:370], cmap="gray", interpolation="nearest") -plt.axis("off") -plt.title("TV filter") -plt.show() diff --git a/packages/scikit-image/examples/plot_labels.py b/packages/scikit-image/examples/plot_labels.py deleted file mode 100644 index 1b99701fd..000000000 --- a/packages/scikit-image/examples/plot_labels.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -Labelling connected components of an image -=========================================== - -This example shows how to label connected components of a binary image, using -the dedicated skimage.measure.label function. -""" - -from skimage import measure -from skimage import filters -import matplotlib.pyplot as plt -import numpy as np - -n = 12 -l = 256 -rng = np.random.default_rng(27446968) -im = np.zeros((l, l)) -points = l * rng.random((2, n**2)) -im[(points[0]).astype(int), (points[1]).astype(int)] = 1 -im = filters.gaussian(im, sigma=l / (4.0 * n)) -blobs = im > 0.7 * im.mean() - -all_labels = measure.label(blobs) -blobs_labels = measure.label(blobs, background=0) - -plt.figure(figsize=(9, 3.5)) -plt.subplot(131) -plt.imshow(blobs, cmap="gray") -plt.axis("off") -plt.subplot(132) -plt.imshow(all_labels, cmap="nipy_spectral") -plt.axis("off") -plt.subplot(133) -plt.imshow(blobs_labels, cmap="nipy_spectral") -plt.axis("off") - -plt.tight_layout() -plt.show() diff --git a/packages/scikit-image/examples/plot_segmentations.py b/packages/scikit-image/examples/plot_segmentations.py deleted file mode 100644 index 16896c987..000000000 --- a/packages/scikit-image/examples/plot_segmentations.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -Watershed and random walker for segmentation -============================================ - -This example compares two segmentation methods in order to separate two -connected disks: the watershed algorithm, and the random walker algorithm. - -Both segmentation methods require seeds, that are pixels belonging -unambigusouly to a reagion. Here, local maxima of the distance map to the -background are used as seeds. -""" - -import numpy as np -from skimage.segmentation import watershed -from skimage.feature import peak_local_max -from skimage import measure -from skimage.segmentation import random_walker -import matplotlib.pyplot as plt -import scipy as sp - -# Generate an initial image with two overlapping circles -x, y = np.indices((80, 80)) -x1, y1, x2, y2 = 28, 28, 44, 52 -r1, r2 = 16, 20 -mask_circle1 = (x - x1) ** 2 + (y - y1) ** 2 < r1**2 -mask_circle2 = (x - x2) ** 2 + (y - y2) ** 2 < r2**2 -image = np.logical_or(mask_circle1, mask_circle2) -# Now we want to separate the two objects in image -# Generate the markers as local maxima of the distance -# to the background -distance = sp.ndimage.distance_transform_edt(image) -peak_idx = peak_local_max(distance, footprint=np.ones((3, 3)), labels=image) -peak_mask = np.zeros_like(distance, dtype=bool) -peak_mask[tuple(peak_idx.T)] = True -markers = measure.label(peak_mask) -labels_ws = watershed(-distance, markers, mask=image) - -markers[~image] = -1 -labels_rw = random_walker(image, markers) - -plt.figure(figsize=(12, 3.5)) -plt.subplot(141) -plt.imshow(image, cmap="gray", interpolation="nearest") -plt.axis("off") -plt.title("image") -plt.subplot(142) -plt.imshow(-distance, interpolation="nearest") -plt.axis("off") -plt.title("distance map") -plt.subplot(143) -plt.imshow(labels_ws, cmap="nipy_spectral", interpolation="nearest") -plt.axis("off") -plt.title("watershed segmentation") -plt.subplot(144) -plt.imshow(labels_rw, cmap="nipy_spectral", interpolation="nearest") -plt.axis("off") -plt.title("random walker segmentation") - -plt.tight_layout() -plt.show() diff --git a/packages/scikit-image/examples/plot_sobel.py b/packages/scikit-image/examples/plot_sobel.py deleted file mode 100644 index c1d7a3195..000000000 --- a/packages/scikit-image/examples/plot_sobel.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Computing horizontal gradients with the Sobel filter -===================================================== - -This example illustrates the use of the horizontal Sobel filter, to compute -horizontal gradients. -""" - -from skimage import data -from skimage import filters -import matplotlib.pyplot as plt - -text = data.text() -hsobel_text = filters.sobel_h(text) - -plt.figure(figsize=(12, 3)) - -plt.subplot(121) -plt.imshow(text, cmap="gray", interpolation="nearest") -plt.axis("off") -plt.subplot(122) -plt.imshow(hsobel_text, cmap="nipy_spectral", interpolation="nearest") -plt.axis("off") -plt.tight_layout() -plt.show() diff --git a/packages/scikit-image/examples/plot_threshold.py b/packages/scikit-image/examples/plot_threshold.py deleted file mode 100644 index b03c75df4..000000000 --- a/packages/scikit-image/examples/plot_threshold.py +++ /dev/null @@ -1,30 +0,0 @@ -""" -Otsu thresholding -================== - -This example illustrates automatic Otsu thresholding. -""" - -import matplotlib.pyplot as plt -from skimage import data -from skimage import filters -from skimage import exposure - -camera = data.camera() -val = filters.threshold_otsu(camera) - -hist, bins_center = exposure.histogram(camera) - -plt.figure(figsize=(9, 4)) -plt.subplot(131) -plt.imshow(camera, cmap="gray", interpolation="nearest") -plt.axis("off") -plt.subplot(132) -plt.imshow(camera < val, cmap="gray", interpolation="nearest") -plt.axis("off") -plt.subplot(133) -plt.plot(bins_center, hist, lw=2) -plt.axvline(val, color="k", ls="--") - -plt.tight_layout() -plt.show() diff --git a/packages/scikit-image/index.md b/packages/scikit-image/index.md new file mode 100644 index 000000000..0e79ae92d --- /dev/null +++ b/packages/scikit-image/index.md @@ -0,0 +1,872 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(scikit-image)= + +# `scikit-image`: image processing + +**Author**: _Emmanuelle Gouillart_ + +```{code-cell} +import numpy as np +import scipy as sp +import matplotlib.pyplot as plt +``` + +[scikit-image](https://scikit-image.org/) is a Python package dedicated +to image processing, using NumPy arrays as image objects. +This chapter describes how to use `scikit-image` for various image +processing tasks, and how it relates to other scientific Python +modules such as NumPy and SciPy. + +:::{admonition} See also + +For basic image manipulation, such as image cropping or simple +filtering, a large number of simple operations can be realized with +NumPy and SciPy only. See {ref}`basic-image`. + +Note that you should be familiar with the content of the previous +chapter before reading the current one, as basic operations such as +masking and labeling are a prerequisite. +::: + +## Introduction and concepts + +Images are NumPy's arrays `np.ndarray` + ++++ + +::: {list-table} **Terms** + +- - Pixels + - array values: `a[2, 3]` + +- - Channels + - array dimensions + +- - Image encoding + - `dtype` (`np.uint8`, `np.uint16`, `np.float`) + +- - Filters + - functions (`numpy`, `skimage`, `scipy`) + +::: + +```{code-cell} +# This example shows how to create a simple checkerboard. +check = np.zeros((8, 8)) +check[::2, 1::2] = 1 +check[1::2, ::2] = 1 +plt.imshow(check, cmap='gray', interpolation='nearest'); +``` + +### `scikit-image` and the scientific Python ecosystem + +`scikit-image` is packaged in both `pip` and `conda`-based +Python installations, as well as in most Linux distributions. Other +Python packages for image processing & visualization that operate on +NumPy arrays include: + +::: {list-table} **Other packages for working with images** + +- - {mod}`scipy.ndimage` + - For N-dimensional arrays. Basic filtering, mathematical morphology, + regions properties +- - [Mahotas](https://mahotas.readthedocs.io) + - With a focus on high-speed implementations. +- - [Napari](https://napari.org) + - A fast, interactive, multi-dimensional image viewer built in Qt. + +::: + +Some powerful C++ image processing libraries also have Python bindings: + +::: {list-table} **C++ libraries with Python bindings** + +- - [OpenCV](https://docs.opencv.org/4.x/d6/d00/tutorial_py_root.html) + - A highly optimized computer vision library with a focus on real-time + applications. +- - [ITK](https://www.itk.org) + - The Insight ToolKit, especially useful for registration and working with + 3D images. + +::: + +To varying degrees, these C++-based libraries tend to be less Pythonic and +NumPy-friendly. + ++++ + +### What is included in scikit-image + +- Website: +- Gallery of examples: + + +The library contains predominantly image processing algorithms, but +also utility functions to ease data handling and processing. +It contains the following submodules: + +::: {list-table} **Scikit-image submodules** + +- - {mod}`skimage.color` + - Color space conversion. +- - {mod}`skimage.data` + - Test images and example data. +- - {mod}`skimage.draw` + - Drawing primitives (lines, text, etc.) that operate on NumPy arrays. +- - {mod}`skimage.exposure` + - Image intensity adjustment, e.g., histogram equalization, etc. +- - {mod}`skimage.feature` + - Feature detection and extraction, e.g., texture analysis corners, etc. +- - {mod}`skimage.filters` + - Sharpening, edge finding, rank filters, thresholding, etc. +- - {mod}`skimage.graph` + - Graph-theoretic operations, e.g., shortest paths. +- - {mod}`skimage.io` + - Reading, saving, and displaying images and video. +- - {mod}`skimage.measure` + - Measurement of image properties, e.g., region properties and contours. +- - {mod}`skimage.metrics` + - Metrics corresponding to images, e.g. distance metrics, similarity, etc. +- - {mod}`skimage.morphology` + - Morphological operations, e.g., opening or skeletonization. +- - {mod}`skimage.restoration` + - Restoration algorithms, e.g., deconvolution algorithms, denoising, etc. +- - {mod}`skimage.segmentation` + - Partitioning an image into multiple regions. +- - {mod}`skimage.transform` + - Geometric and other transforms, e.g., rotation or the Radon transform. +- - {mod}`skimage.util` + - Generic utilities. + +::: + + + +## Importing + +We import `scikit-image` using the convention: + +```{code-cell} +import skimage as ski +``` + +Most functionality lives in subpackages, e.g.: + +```{code-cell} +image = ski.data.cat() +``` + +You can list all submodules with: + +```{code-cell} +for m in dir(ski): print(m) +``` + +Most `scikit-image` functions take NumPy `ndarrays` as arguments + +```{code-cell} +camera = ski.data.camera() +camera.dtype +``` + +```{code-cell} +camera.shape +``` + +```{code-cell} +filtered_camera = ski.filters.gaussian(camera, sigma=1) +type(filtered_camera) +``` + +## Example data + +To start off, we need example images to work with. +The library ships with a few of these: + +{mod}`skimage.data` + +```{code-cell} +image = ski.data.cat() +image.shape +``` + +## Input/output, data types and colorspaces + +I/O: {mod}`skimage.io` + +Save an image to disk: {func}`skimage.io.imsave` + +```{code-cell} +ski.io.imsave("cat.png", image) +``` + +Reading from files: {func}`skimage.io.imread` + +```{code-cell} +cat = ski.io.imread("cat.png") +``` + +```{code-cell} +camera = ski.data.camera() + +plt.figure(figsize=(4, 4)) +plt.imshow(camera, cmap="gray", interpolation="nearest") +plt.axis("off") + +plt.tight_layout() +``` + +This works with many data formats supported by the +[ImageIO](https://imageio.readthedocs.io) library. + +Loading also works with URLs: + +```{code-cell} +logo = ski.io.imread('https://scikit-image.org/_static/img/logo.png') +``` + +### Data types + +Image ndarrays can be represented either by integers (signed or unsigned) or +floats. + +Careful with overflows with integer data types + +```{code-cell} +camera = ski.data.camera() +camera.dtype +camera_multiply = 3 * camera +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(8, 4)) +plt.subplot(121) +plt.imshow(camera, cmap="gray", interpolation="nearest") +plt.axis("off") +plt.subplot(122) +plt.imshow(camera_multiply, cmap="gray", interpolation="nearest") +plt.axis("off") + +plt.tight_layout() +``` + +Different integer sizes are possible: 8-, 16- or 32-bytes, signed or +unsigned. + +**Warning**: An important (if questionable) `skimage` **convention**: float +images are supposed to lie in [-1, 1] (in order to have comparable contrast +for all float images): + +```{code-cell} +camera_float = ski.util.img_as_float(camera) +camera.max(), camera_float.max() +``` + +Some image processing routines need to work with float arrays, and may +hence output an array with a different type and the data range from the +input array + +```{code-cell} +camera_sobel = ski.filters.sobel(camera) +camera_sobel.max() +``` + +Utility functions are provided in {mod}`skimage` to convert both the +dtype and the data range, following skimage's conventions: +`util.img_as_float`, `util.img_as_ubyte`, etc. + +See the [user guide](https://scikit-image.org/docs/stable/user_guide/data_types.html) for +more details. + ++++ + +### Colorspaces + +Color images are of shape (N, M, 3) or (N, M, 4) (when an alpha channel +encodes transparency) + +```{code-cell} +face = sp.datasets.face() +face.shape +``` + +Routines converting between different colorspaces (RGB, HSV, LAB etc.) +are available in {mod}`skimage.color` : `color.rgb2hsv`, `color.lab2rgb`, +etc. Check the docstring for the expected dtype (and data range) of input +images. + +:::{admonition} 3D images + +Most functions of `skimage` can take 3D images as input arguments. +Check the docstring to know if a function can be used on 3D images +(for example MRI or CT images). + +::: + +::: {exercise-start} +:label: ski-grayscale-ex +:class: dropdown +::: + +Open a color image on your disk as a NumPy array. + +Find a skimage function computing the histogram of an image and +plot the histogram of each color channel + +Convert the image to grayscale and plot its histogram. + +::: {exercise-end} +::: + ++++ + +## Image preprocessing / enhancement + +Goals: denoising, feature (edges) extraction, ... + +### Local filters + +Local filters replace the value of pixels by a function of the +values of neighboring pixels. The function can be linear or non-linear. + +Neighbourhood: square (choose size), disk, or more complicated +_structuring element_. + +![](../../advanced/image_processing/kernels.png) + +Example : horizontal Sobel filter + +```{code-cell} +text = ski.data.text() +hsobel_text = ski.filters.sobel_h(text) +``` + +Uses the following linear kernel for computing horizontal gradients: + +``` + 1 2 1 + 0 0 0 +-1 -2 -1 +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(12, 3)) + +plt.subplot(121) +plt.imshow(text, cmap="gray", interpolation="nearest") +plt.axis("off") +plt.subplot(122) +plt.imshow(hsobel_text, cmap="nipy_spectral", interpolation="nearest") +plt.axis("off") +plt.tight_layout() +``` + +### Non-local filters + +Non-local filters use a large region of the image (or all the image) to +transform the value of one pixel: + +```{code-cell} +camera = ski.data.camera() +camera_equalized = ski.exposure.equalize_hist(camera) +``` + +Enhances contrast in large almost uniform regions. + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(7, 3)) + +plt.subplot(121) +plt.imshow(camera, cmap="gray", interpolation="nearest") +plt.axis("off") +plt.subplot(122) +plt.imshow(camera_equalized, cmap="gray", interpolation="nearest") +plt.axis("off") +plt.tight_layout() +``` + +### Mathematical morphology + +See [wikipedia](https://en.wikipedia.org/wiki/Mathematical_morphology) +for an introduction on mathematical morphology. + +Probe an image with a simple shape (a **structuring element**), and +modify this image according to how the shape locally fits or misses the +image. + +Default structuring element: 4-connectivity of a pixel + +```{code-cell} +# Import structuring elements to make them more easily accessible +from skimage.morphology import disk, diamond +``` + +```{code-cell} +diamond(1) +``` + +![](../../advanced/image_processing/diamond_kernel.png) + +**Erosion** = minimum filter. Replace the value of a pixel by the minimal value covered by the structuring element.: + +```{code-cell} +a = np.zeros((7,7), dtype=np.uint8) +a[1:6, 2:5] = 1 +a +``` + +```{code-cell} +ski.morphology.binary_erosion(a, diamond(1)).astype(np.uint8) +``` + +```{code-cell} +#Erosion removes objects smaller than the structure +ski.morphology.binary_erosion(a, diamond(2)).astype(np.uint8) +``` + +**Dilation**: maximum filter: + +```{code-cell} +a = np.zeros((5, 5)) +a[2, 2] = 1 +a +``` + +```{code-cell} +ski.morphology.binary_dilation(a, diamond(1)).astype(np.uint8) +``` + +**Opening**: erosion + dilation: + +```{code-cell} +a = np.zeros((5,5), dtype=int) +a[1:4, 1:4] = 1; a[4, 4] = 1 +a +``` + +```{code-cell} +ski.morphology.binary_opening(a, diamond(1)).astype(np.uint8) +``` + +Opening removes small objects and smoothes corners. + +:::{admonition} Grayscale mathematical morphology + +Mathematical morphology operations are also available for +(non-binary) grayscale images (int or float type). Erosion and dilation +correspond to minimum (resp. maximum) filters. + +::: + +Higher-level mathematical morphology are available: tophat, +skeletonization, etc. + +:::{admonition} See also + +Basic mathematical morphology is also implemented in +{mod}`scipy.ndimage.morphology`. The `scipy.ndimage` implementation +works on arbitrary-dimensional arrays. + +::: + +--- + +### Example of filters comparison: image denoising + +```{code-cell} +coins = ski.data.coins() +coins_zoom = coins[10:80, 300:370] +median_coins = ski.filters.median( + coins_zoom, disk(1) +) +tv_coins = ski.restoration.denoise_tv_chambolle( + coins_zoom, weight=0.1 +) +gaussian_filter_coins = ski.filters.gaussian(coins, sigma=2) +med_filter_coins = ski.filters.median(coins, np.ones((3, 3))) +tv_filter_coins = ski.restoration.denoise_tv_chambolle(coins, weight=0.1) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(16, 4)) +plt.subplot(141) +plt.imshow(coins[10:80, 300:370], cmap="gray", interpolation="nearest") +plt.axis("off") +plt.title("Image") +plt.subplot(142) +plt.imshow(gaussian_filter_coins[10:80, 300:370], cmap="gray", interpolation="nearest") +plt.axis("off") +plt.title("Gaussian filter") +plt.subplot(143) +plt.imshow(med_filter_coins[10:80, 300:370], cmap="gray", interpolation="nearest") +plt.axis("off") +plt.title("Median filter") +plt.subplot(144) +plt.imshow(tv_filter_coins[10:80, 300:370], cmap="gray", interpolation="nearest") +plt.axis("off") +plt.title("TV filter") +``` + +## Image segmentation + +Image segmentation is the attribution of different labels to different +regions of the image, for example in order to extract the pixels of an +object of interest. + +### Binary segmentation: foreground + background + +#### Histogram-based method: **Otsu thresholding** + +::: {note} +:class: dropdown + +The [Otsu method](https://en.wikipedia.org/wiki/Otsu%27s_method) is a +simple heuristic to find a threshold to separate the foreground from +the background. +::: + +:::{sidebar} Earlier scikit-image versions +{mod}`skimage.filters` is called {mod}`skimage.filter` in earlier +versions of scikit-image +::: + +```{code-cell} +camera = ski.data.camera() +val = ski.filters.threshold_otsu(camera) +mask = camera < val +``` + +```{code-cell} +# The histogram from which Otsu calculated the threshold. +hist, bins_center = ski.exposure.histogram(camera) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(9, 4)) +plt.subplot(131) +plt.imshow(camera, cmap="gray", interpolation="nearest") +plt.axis("off") +plt.subplot(132) +plt.imshow(mask, cmap="gray", interpolation="nearest") +plt.axis("off") +plt.subplot(133) +plt.plot(bins_center, hist, lw=2) +plt.axvline(val, color="k", ls="--") + +plt.tight_layout() +``` + +#### Labeling connected components of a discrete image + +::: {note} +:class: dropdown + +Once you have separated foreground objects, it is use to separate them +from each other. For this, we can assign a different integer labels to +each one. +::: + +Synthetic data: + +```{code-cell} +n = 20 +l = 256 +im = np.zeros((l, l)) +rng = np.random.default_rng() +points = l * rng.random((2, n ** 2)) +im[(points[0]).astype(int), (points[1]).astype(int)] = 1 +im = ski.filters.gaussian(im, sigma=l / (4. * n)) +blobs = im > im.mean() +``` + +Label all connected components: + +```{code-cell} +all_labels = ski.measure.label(blobs) +``` + +Label only foreground connected components: + +```{code-cell} +blobs_labels = ski.measure.label(blobs, background=0) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(9, 3.5)) +plt.subplot(131) +plt.imshow(blobs, cmap="gray") +plt.axis("off") +plt.subplot(132) +plt.imshow(all_labels, cmap="nipy_spectral") +plt.axis("off") +plt.subplot(133) +plt.imshow(blobs_labels, cmap="nipy_spectral") +plt.axis("off") + +plt.tight_layout() +``` + +:::{admonition} See also + +{func}`scipy.ndimage.find_objects` is useful to return slices on +object in an image. +::: + ++++ + +### Marker based methods + +If you have markers inside a set of regions, you can use these to segment +the regions. + ++++ + +#### _Watershed_ segmentation + +The Watershed ({func}`skimage.segmentation.watershed`) is a region-growing +approach that fills "basins" in the image + +```{code-cell} +# Generate an initial image with two overlapping circles +x, y = np.indices((80, 80)) +x1, y1, x2, y2 = 28, 28, 44, 52 +r1, r2 = 16, 20 +mask_circle1 = (x - x1) ** 2 + (y - y1) ** 2 < r1 ** 2 +mask_circle2 = (x - x2) ** 2 + (y - y2) ** 2 < r2 ** 2 +image = np.logical_or(mask_circle1, mask_circle2) +# Now we want to separate the two objects in image +# Generate the markers as local maxima of the distance +# to the background +# Use scipy.ndimage.distance_transform_edt +distance = sp.ndimage.distance_transform_edt(image) +peak_idx = ski.feature.peak_local_max( + distance, footprint=np.ones((3, 3)), labels=image +) +peak_mask = np.zeros_like(distance, dtype=bool) +peak_mask[tuple(peak_idx.T)] = True +markers = ski.morphology.label(peak_mask) +labels_ws = ski.segmentation.watershed( + -distance, markers, mask=image +) +``` + +#### _Random walker_ segmentation + +The random walker algorithm ({func}`skimage.segmentation.random_walker`) +is similar to the Watershed, but with a more "probabilistic" approach. It +is based on the idea of the diffusion of labels in the image: + +```{code-cell} +# Transform markers image so that 0-valued pixels are to +# be labelled, and -1-valued pixels represent background +markers[~image] = -1 +labels_rw = ski.segmentation.random_walker(image, markers) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(12, 3.5)) +plt.subplot(141) +plt.imshow(image, cmap="gray", interpolation="nearest") +plt.axis("off") +plt.title("image") +plt.subplot(142) +plt.imshow(-distance, interpolation="nearest") +plt.axis("off") +plt.title("distance map") +plt.subplot(143) +plt.imshow(labels_ws, cmap="nipy_spectral", interpolation="nearest") +plt.axis("off") +plt.title("watershed segmentation") +plt.subplot(144) +plt.imshow(labels_rw, cmap="nipy_spectral", interpolation="nearest") +plt.axis("off") +plt.title("random walker segmentation") + +plt.tight_layout() +``` + +:::{admonition} Postprocessing label images +`skimage` provides several utility functions that can be used on +label images (ie images where different discrete values identify +different regions). Functions names are often self-explaining: +{func}`skimage.segmentation.clear_border`, +{func}`skimage.segmentation.relabel_from_one`, +{func}`skimage.morphology.remove_small_objects`, etc. +::: + +::: {exercise-start} +:label: ski-coins-otsu-ex +:class: dropdown +::: + +- Load the `coins` image from the `data` submodule. +- Separate the coins from the background by testing several + segmentation methods: Otsu thresholding, adaptive thresholding, and + watershed or random walker segmentation. +- If necessary, use a postprocessing function to improve the coins / + background segmentation. + +::: {exercise-end} +::: + ++++ + +## Measuring regions' properties + +Example: compute the size and perimeter of the two segmented regions: + +```{code-cell} +properties = ski.measure.regionprops(labels_rw) +[float(prop.area) for prop in properties] +``` + +```{code-cell} +[prop.perimeter for prop in properties] +``` + +:::{admonition} See also + +for some properties, functions are available as well in +{mod}`scipy.ndimage.measurements` with a different API (a list is +returned). +::: + +::: {exercise-start} +:label: ski-coin-labels-ex +:class: dropdown +::: + +- Use the binary image of the coins and background from the previous + exercise. +- Compute an image of labels for the different coins. +- Compute the size and eccentricity of all coins. + +::: {exercise-end} +::: + ++++ + +## Data visualization and interaction + +Meaningful visualizations are useful when testing a given processing +pipeline. + +Some image processing operations: + +```{code-cell} +coins = ski.data.coins() +mask = coins > ski.filters.threshold_otsu(coins) +clean_border = ski.segmentation.clear_border(mask) +``` + +Visualize binary result: + +```{code-cell} +plt.figure() +plt.imshow(clean_border, cmap='gray') +``` + +Visualize contour + +```{code-cell} +plt.figure() +plt.imshow(coins, cmap='gray') +plt.contour(clean_border, [0.5]) +``` + +Use `skimage` dedicated utility function: + +```{code-cell} +coins_edges = ski.segmentation.mark_boundaries( + coins, clean_border.astype(int) +) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(8, 3.5)) +plt.subplot(121) +plt.imshow(clean_border, cmap="gray") +plt.axis("off") +plt.subplot(122) +plt.imshow(coins_edges) +plt.axis("off") + +plt.tight_layout() +``` + +## Feature extraction for computer vision + +Geometric or textural descriptor can be extracted from images in order to + +- classify parts of the image (e.g. sky vs. buildings) +- match parts of different images (e.g. for object detection) +- and many other applications of + [Computer Vision](https://en.wikipedia.org/wiki/Computer_vision) + +Example: detecting corners using Harris detector + +```{code-cell} +tform = ski.transform.AffineTransform( + scale=(1.3, 1.1), rotation=1, shear=0.7, + translation=(210, 50) +) + +image = ski.transform.warp( + ski.data.checkerboard(), tform.inverse, output_shape=(350, 350) +) + +coords = ski.feature.corner_peaks( + ski.feature.corner_harris(image), min_distance=5 +) +coords_subpix = ski.feature.corner_subpix( + image, coords, window_size=13 +) +``` + +```{code-cell} +:tags: [hide-input] + +plt.gray() +plt.imshow(image, interpolation="nearest") +plt.plot(coords_subpix[:, 1], coords_subpix[:, 0], "+r", markersize=15, mew=5) +plt.plot(coords[:, 1], coords[:, 0], ".b", markersize=7) +plt.axis("off") +``` + +(this example is taken from the [plot_corner](https://scikit-image.org/docs/stable/auto_examples/features_detection/plot_corner.html) +example in scikit-image) + +Points of interest such as corners can then be used to match objects in +different images, as described in the [plot_matching](https://scikit-image.org/docs/stable/auto_examples/transform/plot_matching.html) +example of scikit-image. diff --git a/packages/scikit-image/index.rst b/packages/scikit-image/index.rst deleted file mode 100644 index d7b5e7a3e..000000000 --- a/packages/scikit-image/index.rst +++ /dev/null @@ -1,781 +0,0 @@ -.. for doctests - >>> import numpy as np - >>> import scipy as sp - >>> import matplotlib.pyplot as plt - -.. _scikit_image: - -================================== -``scikit-image``: image processing -================================== - -.. currentmodule:: skimage - - -**Author**: *Emmanuelle Gouillart* - -`scikit-image `_ is a Python package dedicated -to image processing, using NumPy arrays as image objects. -This chapter describes how to use ``scikit-image`` for various image -processing tasks, and how it relates to other scientific Python -modules such as NumPy and SciPy. - -.. seealso:: - - For basic image manipulation, such as image cropping or simple - filtering, a large number of simple operations can be realized with - NumPy and SciPy only. See :ref:`basic_image`. - - Note that you should be familiar with the content of the previous - chapter before reading the current one, as basic operations such as - masking and labeling are a prerequisite. - -.. contents:: Chapters contents - :local: - :depth: 2 - - -Introduction and concepts -========================= - -Images are NumPy's arrays ``np.ndarray`` - -:image: - - ``np.ndarray`` - -:pixels: - - array values: ``a[2, 3]`` - -:channels: - - array dimensions - -:image encoding: - - ``dtype`` (``np.uint8``, ``np.uint16``, ``np.float``) - -:filters: - - functions (``numpy``, ``skimage``, ``scipy``) - - -:: - - >>> import numpy as np - >>> check = np.zeros((8, 8)) - >>> check[::2, 1::2] = 1 - >>> check[1::2, ::2] = 1 - >>> import matplotlib.pyplot as plt - >>> plt.imshow(check, cmap='gray', interpolation='nearest') - - - -.. image:: auto_examples/images/sphx_glr_plot_check_001.png - :scale: 60 - :target: auto_examples/plot_check.html - :align: center - -``scikit-image`` and the scientific Python ecosystem ----------------------------------------------------- - -``scikit-image`` is packaged in both ``pip`` and ``conda``-based -Python installations, as well as in most Linux distributions. Other -Python packages for image processing & visualization that operate on -NumPy arrays include: - -:mod:`scipy.ndimage` - For N-dimensional arrays. Basic filtering, - mathematical morphology, regions properties - -`Mahotas `_ - With a focus on high-speed implementations. - -`Napari `_ - A fast, interactive, multi-dimensional image viewer built in Qt. - -Some powerful C++ image processing libraries also have Python bindings: - -`OpenCV `_ - A highly optimized computer vision library with a focus on real-time - applications. - -`ITK `_ - The Insight ToolKit, especially useful for registration and - working with 3D images. - -To varying degrees, these tend to be less Pythonic and NumPy-friendly. - -What is included in scikit-image --------------------------------- - -* Website: https://scikit-image.org/ - -* Gallery of examples: - https://scikit-image.org/docs/stable/auto_examples/ - -The library contains predominantly image processing algorithms, but -also utility functions to ease data handling and processing. -It contains the following submodules: - -:mod:`color` - Color space conversion. - -:mod:`data` - Test images and example data. - -:mod:`draw` - Drawing primitives (lines, text, etc.) that operate on NumPy - arrays. - -:mod:`exposure` - Image intensity adjustment, e.g., histogram equalization, etc. - -:mod:`feature` - Feature detection and extraction, e.g., texture analysis corners, etc. - -:mod:`filters` - Sharpening, edge finding, rank filters, thresholding, etc. - -:mod:`graph` - Graph-theoretic operations, e.g., shortest paths. - -:mod:`io` - Reading, saving, and displaying images and video. - -:mod:`measure` - Measurement of image properties, e.g., region properties and contours. - -:mod:`metrics` - Metrics corresponding to images, e.g. distance metrics, similarity, etc. - -:mod:`morphology` - Morphological operations, e.g., opening or skeletonization. - -:mod:`restoration` - Restoration algorithms, e.g., deconvolution algorithms, denoising, etc. - -:mod:`segmentation` - Partitioning an image into multiple regions. - -:mod:`transform` - Geometric and other transforms, e.g., rotation or the Radon transform. - -:mod:`util` - Generic utilities. - -.. TODO Edit this section with a more refined discussion of the various - package features. - -Importing -========= - -We import ``scikit-image`` using the convention:: - - >>> import skimage as ski - -Most functionality lives in subpackages, e.g.:: - - >>> image = ski.data.cat() - -You can list all submodules with:: - - >>> for m in dir(ski): print(m) - __version__ - color - data - draw - exposure - feature - filters - future - graph - io - measure - metrics - morphology - registration - restoration - segmentation - transform - util - -Most ``scikit-image`` functions take NumPy ``ndarrays`` as arguments :: - - >>> camera = ski.data.camera() - >>> camera.dtype - dtype('uint8') - >>> camera.shape - (512, 512) - >>> filtered_camera = ski.filters.gaussian(camera, sigma=1) - >>> type(filtered_camera) - - -Example data -============ - -To start off, we need example images to work with. -The library ships with a few of these: - -:mod:`skimage.data` :: - - >>> image = ski.data.cat() - >>> image.shape - (300, 451, 3) - -Input/output, data types and colorspaces -======================================== - -I/O: :mod:`skimage.io` - -Save an image to disk: :func:`skimage.io.imsave` :: - - >>> ski.io.imsave("cat.png", image) - -Reading from files: :func:`skimage.io.imread` :: - - >>> cat = ski.io.imread("cat.png") - -.. image:: auto_examples/images/sphx_glr_plot_camera_001.png - :width: 50% - :target: auto_examples/plot_camera.html - :align: center - -This works with many data formats supported by the -`ImageIO `__ library. - -Loading also works with URLs:: - - >>> logo = ski.io.imread('https://scikit-image.org/_static/img/logo.png') - -Data types ------------ - - -.. image:: auto_examples/images/sphx_glr_plot_camera_uint_001.png - :align: right - :width: 50% - :target: auto_examples/plot_camera_uint.html - -Image ndarrays can be represented either by integers (signed or unsigned) or -floats. - -Careful with overflows with integer data types - -:: - - >>> camera = ski.data.camera() - >>> camera.dtype - dtype('uint8') - >>> camera_multiply = 3 * camera - -Different integer sizes are possible: 8-, 16- or 32-bytes, signed or -unsigned. - -.. warning:: - - An important (if questionable) ``skimage`` **convention**: float images - are supposed to lie in [-1, 1] (in order to have comparable contrast for - all float images) :: - - >>> camera_float = ski.util.img_as_float(camera) - >>> camera.max(), camera_float.max() - (np.uint8(255), np.float64(1.0)) - -Some image processing routines need to work with float arrays, and may -hence output an array with a different type and the data range from the -input array :: - - >>> camera_sobel = ski.filters.sobel(camera) - >>> camera_sobel.max() - np.float64(0.644...) - - -Utility functions are provided in :mod:`skimage` to convert both the -dtype and the data range, following skimage's conventions: -``util.img_as_float``, ``util.img_as_ubyte``, etc. - -See the `user guide -`_ for -more details. - -Colorspaces ------------- - -Color images are of shape (N, M, 3) or (N, M, 4) (when an alpha channel -encodes transparency) :: - - >>> face = sp.datasets.face() - >>> face.shape - (768, 1024, 3) - - -Routines converting between different colorspaces (RGB, HSV, LAB etc.) -are available in :mod:`skimage.color` : ``color.rgb2hsv``, ``color.lab2rgb``, -etc. Check the docstring for the expected dtype (and data range) of input -images. - -.. topic:: 3D images - - Most functions of ``skimage`` can take 3D images as input arguments. - Check the docstring to know if a function can be used on 3D images - (for example MRI or CT images). - - - -.. topic:: Exercise - :class: green - - Open a color image on your disk as a NumPy array. - - Find a skimage function computing the histogram of an image and - plot the histogram of each color channel - - Convert the image to grayscale and plot its histogram. - -Image preprocessing / enhancement -================================== - -Goals: denoising, feature (edges) extraction, ... - - -Local filters --------------- - -Local filters replace the value of pixels by a function of the -values of neighboring pixels. The function can be linear or non-linear. - -Neighbourhood: square (choose size), disk, or more complicated -*structuring element*. - -.. image:: ../../advanced/image_processing/kernels.png - :width: 80% - :align: center - -Example : horizontal Sobel filter :: - - >>> text = ski.data.text() - >>> hsobel_text = ski.filters.sobel_h(text) - - -Uses the following linear kernel for computing horizontal gradients:: - - 1 2 1 - 0 0 0 - -1 -2 -1 - -.. image:: auto_examples/images/sphx_glr_plot_sobel_001.png - :width: 70% - :target: auto_examples/plot_sobel.html - :align: center - - -Non-local filters ------------------ - -Non-local filters use a large region of the image (or all the image) to -transform the value of one pixel:: - - >>> camera = ski.data.camera() - >>> camera_equalized = ski.exposure.equalize_hist(camera) - -Enhances contrast in large almost uniform regions. - -.. image:: auto_examples/images/sphx_glr_plot_equalize_hist_001.png - :width: 70% - :target: auto_examples/plot_equalize_hist.html - :align: center - -Mathematical morphology ------------------------ - -See `wikipedia `_ -for an introduction on mathematical morphology. - -Probe an image with a simple shape (a **structuring element**), and -modify this image according to how the shape locally fits or misses the -image. - -Default structuring element: 4-connectivity of a pixel :: - - >>> # Import structuring elements to make them more easily accessible - >>> from skimage.morphology import disk, diamond - - >>> diamond(1) - array([[0, 1, 0], - [1, 1, 1], - [0, 1, 0]], dtype=uint8) - - -.. image:: ../../advanced/image_processing/diamond_kernel.png - :align: center - -**Erosion** = minimum filter. Replace the value of a pixel by the minimal value covered by the structuring element.:: - - >>> a = np.zeros((7,7), dtype=np.uint8) - >>> a[1:6, 2:5] = 1 - >>> a - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 1, 1, 1, 0, 0], - [0, 0, 0, 0, 0, 0, 0]], dtype=uint8) - >>> ski.morphology.binary_erosion(a, diamond(1)).astype(np.uint8) - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]], dtype=uint8) - >>> #Erosion removes objects smaller than the structure - >>> ski.morphology.binary_erosion(a, diamond(2)).astype(np.uint8) - array([[0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]], dtype=uint8) - -**Dilation**: maximum filter:: - - >>> a = np.zeros((5, 5)) - >>> a[2, 2] = 1 - >>> a - array([[0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0.], - [0., 0., 1., 0., 0.], - [0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0.]]) - >>> ski.morphology.binary_dilation(a, diamond(1)).astype(np.uint8) - array([[0, 0, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 1, 1, 1, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 0, 0]], dtype=uint8) - -**Opening**: erosion + dilation:: - - >>> a = np.zeros((5,5), dtype=int) - >>> a[1:4, 1:4] = 1; a[4, 4] = 1 - >>> a - array([[0, 0, 0, 0, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 1, 1, 1, 0], - [0, 0, 0, 0, 1]]) - >>> ski.morphology.binary_opening(a, diamond(1)).astype(np.uint8) - array([[0, 0, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 1, 1, 1, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 0, 0]], dtype=uint8) - -Opening removes small objects and smoothes corners. - -.. topic:: Grayscale mathematical morphology - - Mathematical morphology operations are also available for - (non-binary) grayscale images (int or float type). Erosion and dilation - correspond to minimum (resp. maximum) filters. - -Higher-level mathematical morphology are available: tophat, -skeletonization, etc. - -.. seealso:: - - Basic mathematical morphology is also implemented in - :mod:`scipy.ndimage.morphology`. The ``scipy.ndimage`` implementation - works on arbitrary-dimensional arrays. - ---------------------- - -.. topic:: Example of filters comparison: image denoising - - :: - - >>> coins = ski.data.coins() - >>> coins_zoom = coins[10:80, 300:370] - >>> median_coins = ski.filters.median( - ... coins_zoom, disk(1) - ... ) - >>> tv_coins = ski.restoration.denoise_tv_chambolle( - ... coins_zoom, weight=0.1 - ... ) - >>> gaussian_coins = ski.filters.gaussian(coins, sigma=2) - - .. image:: auto_examples/images/sphx_glr_plot_filter_coins_001.png - :width: 99% - :target: auto_examples/plot_filter_coins.html - -Image segmentation -=================== - -Image segmentation is the attribution of different labels to different -regions of the image, for example in order to extract the pixels of an -object of interest. - -Binary segmentation: foreground + background ---------------------------------------------- - -Histogram-based method: **Otsu thresholding** -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. tip:: - - The `Otsu method `_ is a - simple heuristic to find a threshold to separate the foreground from - the background. - -.. sidebar:: Earlier scikit-image versions - - :mod:`skimage.filters` is called :mod:`skimage.filter` in earlier - versions of scikit-image - -:: - - camera = ski.data.camera() - val = ski.filters.threshold_otsu(camera) - mask = camera < val - -.. image:: auto_examples/images/sphx_glr_plot_threshold_001.png - :width: 70% - :target: auto_examples/plot_threshold.html - :align: center - -Labeling connected components of a discrete image -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. tip:: - - Once you have separated foreground objects, it is use to separate them - from each other. For this, we can assign a different integer labels to - each one. - -Synthetic data:: - - >>> n = 20 - >>> l = 256 - >>> im = np.zeros((l, l)) - >>> rng = np.random.default_rng() - >>> points = l * rng.random((2, n ** 2)) - >>> im[(points[0]).astype(int), (points[1]).astype(int)] = 1 - >>> im = ski.filters.gaussian(im, sigma=l / (4. * n)) - >>> blobs = im > im.mean() - -Label all connected components:: - - >>> all_labels = ski.measure.label(blobs) - -Label only foreground connected components:: - - >>> blobs_labels = ski.measure.label(blobs, background=0) - - -.. image:: auto_examples/images/sphx_glr_plot_labels_001.png - :width: 90% - :target: auto_examples/plot_labels.html - :align: center - -.. seealso:: - - :func:`scipy.ndimage.find_objects` is useful to return slices on - object in an image. - -Marker based methods ---------------------------------------------- - -If you have markers inside a set of regions, you can use these to segment -the regions. - -*Watershed* segmentation -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The Watershed (:func:`skimage.segmentation.watershed`) is a region-growing -approach that fills "basins" in the image :: - - >>> # Generate an initial image with two overlapping circles - >>> x, y = np.indices((80, 80)) - >>> x1, y1, x2, y2 = 28, 28, 44, 52 - >>> r1, r2 = 16, 20 - >>> mask_circle1 = (x - x1) ** 2 + (y - y1) ** 2 < r1 ** 2 - >>> mask_circle2 = (x - x2) ** 2 + (y - y2) ** 2 < r2 ** 2 - >>> image = np.logical_or(mask_circle1, mask_circle2) - >>> # Now we want to separate the two objects in image - >>> # Generate the markers as local maxima of the distance - >>> # to the background - >>> import scipy as sp - >>> distance = sp.ndimage.distance_transform_edt(image) - >>> peak_idx = ski.feature.peak_local_max( - ... distance, footprint=np.ones((3, 3)), labels=image - ... ) - >>> peak_mask = np.zeros_like(distance, dtype=bool) - >>> peak_mask[tuple(peak_idx.T)] = True - >>> markers = ski.morphology.label(peak_mask) - >>> labels_ws = ski.segmentation.watershed( - ... -distance, markers, mask=image - ... ) - -*Random walker* segmentation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The random walker algorithm (:func:`skimage.segmentation.random_walker`) -is similar to the Watershed, but with a more "probabilistic" approach. It -is based on the idea of the diffusion of labels in the image:: - - >>> # Transform markers image so that 0-valued pixels are to - >>> # be labelled, and -1-valued pixels represent background - >>> markers[~image] = -1 - >>> labels_rw = ski.segmentation.random_walker(image, markers) - -.. image:: auto_examples/images/sphx_glr_plot_segmentations_001.png - :width: 90% - :target: auto_examples/plot_segmentations.html - :align: center - - -.. topic:: Postprocessing label images - - ``skimage`` provides several utility functions that can be used on - label images (ie images where different discrete values identify - different regions). Functions names are often self-explaining: - :func:`skimage.segmentation.clear_border`, - :func:`skimage.segmentation.relabel_from_one`, - :func:`skimage.morphology.remove_small_objects`, etc. - - -.. topic:: Exercise - :class: green - - * Load the ``coins`` image from the ``data`` submodule. - - * Separate the coins from the background by testing several - segmentation methods: Otsu thresholding, adaptive thresholding, and - watershed or random walker segmentation. - - * If necessary, use a postprocessing function to improve the coins / - background segmentation. - - -Measuring regions' properties -============================== - -Example: compute the size and perimeter of the two segmented regions:: - - >>> properties = ski.measure.regionprops(labels_rw) - >>> [float(prop.area) for prop in properties] - [770.0, 1168.0] - >>> [prop.perimeter for prop in properties] - [np.float64(100.91...), np.float64(126.81...)] - -.. seealso:: - - for some properties, functions are available as well in - :mod:`scipy.ndimage.measurements` with a different API (a list is - returned). - - -.. topic:: Exercise (continued) - :class: green - - * Use the binary image of the coins and background from the previous - exercise. - - * Compute an image of labels for the different coins. - - * Compute the size and eccentricity of all coins. - -Data visualization and interaction -=================================== - -Meaningful visualizations are useful when testing a given processing -pipeline. - -Some image processing operations:: - - >>> coins = ski.data.coins() - >>> mask = coins > ski.filters.threshold_otsu(coins) - >>> clean_border = ski.segmentation.clear_border(mask) - -Visualize binary result:: - - >>> plt.figure() -
- >>> plt.imshow(clean_border, cmap='gray') - - -Visualize contour :: - - >>> plt.figure() -
- >>> plt.imshow(coins, cmap='gray') - - >>> plt.contour(clean_border, [0.5]) - - -Use ``skimage`` dedicated utility function:: - - >>> coins_edges = ski.segmentation.mark_boundaries( - ... coins, clean_border.astype(int) - ... ) - -.. image:: auto_examples/images/sphx_glr_plot_boundaries_001.png - :width: 90% - :target: auto_examples/plot_boundaries.html - :align: center - -Feature extraction for computer vision -======================================= - -Geometric or textural descriptor can be extracted from images in order to - -* classify parts of the image (e.g. sky vs. buildings) - -* match parts of different images (e.g. for object detection) - -* and many other applications of - `Computer Vision `_ - -Example: detecting corners using Harris detector :: - - tform = ski.transform.AffineTransform( - scale=(1.3, 1.1), rotation=1, shear=0.7, - translation=(210, 50) - ) - image = ski.transform.warp( - data.checkerboard(), tform.inverse, output_shape=(350, 350) - ) - - coords = ski.feature.corner_peaks( - ski.feature.corner_harris(image), min_distance=5 - ) - coords_subpix = ski.feature.corner_subpix( - image, coords, window_size=13 - ) - -.. image:: auto_examples/images/sphx_glr_plot_features_001.png - :width: 90% - :target: auto_examples/plot_features.html - :align: center - -(this example is taken from the `plot_corner -`_ -example in scikit-image) - -Points of interest such as corners can then be used to match objects in -different images, as described in the `plot_matching -`_ -example of scikit-image. - -Full code examples -================== - -.. include the gallery. Skip the first line to avoid the "orphan" - declaration - -.. include:: auto_examples/index.rst - :start-line: 1 diff --git a/packages/scikit-learn/examples/plot_svm_non_linear.py b/packages/scikit-learn/examples/plot_svm_non_linear.py index f5c246b42..1d73914c1 100644 --- a/packages/scikit-learn/examples/plot_svm_non_linear.py +++ b/packages/scikit-learn/examples/plot_svm_non_linear.py @@ -14,8 +14,7 @@ rng = np.random.default_rng(27446968) -############################################################################## -# data that is linearly separable +# Data that is linearly separable def linear_model(rseed=42, n_samples=30): @@ -62,8 +61,7 @@ def linear_model(rseed=42, n_samples=30): ) -############################################################################## -# data with a non-linear separation +# Data with a non-linear separation def nonlinear_model(rseed=27446968, n_samples=30): diff --git a/packages/scikit-learn/index.md b/packages/scikit-learn/index.md new file mode 100644 index 000000000..2a0862d53 --- /dev/null +++ b/packages/scikit-learn/index.md @@ -0,0 +1,2130 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(scikit-learn-chapter)= + +# `scikit-learn`: machine learning in Python + +```{code-cell} +:tags: [hide-input] + +import numpy as np +import matplotlib.pyplot as plt +``` + +**Authors**: _Gael Varoquaux_ + +![](images/scikit-learn-logo.png) + +:::{admonition} Prerequisites + +- {ref}`numpy ` +- {ref}`scipy ` +- {ref}`matplotlib (optional) ` +- {ref}`ipython (the enhancements come in handy) ` + ::: + +:::{sidebar} Acknowledgements + +This chapter is adapted from [a +tutorial](https://www.youtube.com/watch?v=r4bRUvvlaBw) given by Gaël +Varoquaux, Jake Vanderplas, Olivier Grisel. + +::: + +:::{admonition} See also + +**Data science in Python** + +- The {ref}`statistics` chapter may also be of interest + for readers looking into machine learning. +- The [documentation of scikit-learn](https://scikit-learn.org) is + very complete and didactic. + +::: + +## Introduction: problem settings + +### What is machine learning? + +::: {note} +:class: dropdown + +Machine Learning is about building programs with **tunable +parameters** that are adjusted automatically so as to improve their +behavior by **adapting to previously seen data.** + +Machine Learning can be considered a subfield of **Artificial +Intelligence** since those algorithms can be seen as building blocks +to make computers learn to behave more intelligently by somehow +**generalizing** rather that just storing and retrieving data items +like a database system would do. +::: + +```{code-cell} +:tags: [hide-input] + +from sklearn.linear_model import SGDClassifier +from sklearn.datasets import make_blobs + +# we create 50 separable synthetic points +X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) + +# fit the model +clf = SGDClassifier(loss="hinge", alpha=0.01, fit_intercept=True) +clf.fit(X, Y) + +# plot the line, the points, and the nearest vectors to the plane +xx = np.linspace(-1, 5, 10) +yy = np.linspace(-1, 5, 10) +X1, X2 = np.meshgrid(xx, yy) +Z = np.empty(X1.shape) +for (i, j), val in np.ndenumerate(X1): + x1 = val + x2 = X2[i, j] + p = clf.decision_function([[x1, x2]]) + Z[i, j] = p[0] + +plt.figure(figsize=(4, 3)) +ax = plt.axes() +ax.contour( + X1, X2, Z, [-1.0, 0.0, 1.0], colors="k", linestyles=["dashed", "solid", "dashed"] +) +ax.scatter(X[:, 0], X[:, 1], c=Y, cmap="Paired") +ax.set_title('A classification problem') +ax.axis("tight"); +``` + +We'll take a look at two very simple machine learning tasks here. The +first is a **classification** task: the figure shows a collection of +two-dimensional data, colored according to two different class labels. A +classification algorithm may be used to draw a dividing boundary between +the two clusters of points: + +By drawing this separating line, we have learned a model which can +**generalize** to new data: if you were to drop another point onto the +plane which is unlabeled, this algorithm could now **predict** whether +it's a blue or a red point. + +```{code-cell} +:tags: [hide-input] + +from sklearn.linear_model import LinearRegression + +# x from 0 to 30 +rng = np.random.default_rng() +x = 30 * rng.random((20, 1)) +# y = a*x + b with noise +y = 0.5 * x + 1.0 + rng.normal(size=x.shape) + +# create a linear regression model +model = LinearRegression() +model.fit(x, y) + +# predict y from the data +x_new = np.linspace(0, 30, 100) +y_new = model.predict(x_new[:, np.newaxis]) + +# plot the results +plt.figure(figsize=(4, 3)) +ax = plt.axes() +ax.scatter(x, y) +ax.plot(x_new, y_new) +ax.set_xlabel("x") +ax.set_ylabel("y") +ax.axis("tight") +ax.set_title('A regression problem'); +``` + +The next simple task we'll look at is a **regression** task: a simple +best-fit line to a set of data. + +Again, this is an example of fitting a model to data, but our focus here +is that the model can make generalizations about new data. The model has +been **learned** from the training data, and can be used to predict the +result of test data: here, we might be given an x-value, and the model +would allow us to predict the y value. + ++++ + +### Data in Scikit-learn + +#### The data matrix + +Machine learning algorithms implemented in scikit-learn expect data +to be stored in a **two-dimensional array or matrix**. The arrays can be +either `numpy` arrays, or in some cases `scipy.sparse` matrices. The +size of the array is expected to be `[n_samples, n_features]` + +- **n_samples:** The number of samples: each sample is an item to + process (e.g. classify). A sample can be a document, a picture, a + sound, a video, an astronomical object, a row in database or CSV + file, or whatever you can describe with a fixed set of quantitative + traits. +- **n_features:** The number of features or distinct traits that can + be used to describe each item in a quantitative manner. Features are + generally real-valued, but may be boolean or discrete-valued in some + cases. + +::: {note} +:class: dropdown + +The number of features must be fixed in advance. However it can be +very high dimensional (e.g. millions of features) with most of them +being zeros for a given sample. This is a case where `scipy.sparse` +matrices can be useful, in that they are much more memory-efficient +than NumPy arrays. +::: + +#### A Simple Example: the Iris Dataset + +##### The application problem + +As an example of a simple dataset, let us a look at the +iris data stored by scikit-learn. Suppose we want to recognize species of +irises. The data consists of measurements of +three different species of irises: + +| ![](images/iris_setosa.jpg) | ![](images/iris_versicolor.jpg) | ![](images/iris_virginica.jpg) | +| :-------------------------: | :-----------------------------: | :----------------------------: | +| Setosa Iris | Versicolor Iris | Virginica Iris | + +:::{admonition} Quick Question: + +**If we want to design an algorithm to recognize iris species, what +might the data be?** + +Remember: we need a 2D array of size `[n_samples x n_features]`. + +- What would the `n_samples` refer to? +- What might the `n_features` refer to? + ::: + +Remember that there must be a **fixed** number of features for each +sample, and feature number `i` must be a similar kind of quantity for +each sample. + +##### Loading the Iris Data with Scikit-learn + +Scikit-learn has a very straightforward set of data on these iris +species. The data consist of the following: + +- Features in the Iris dataset: + + - sepal length (cm) + - sepal width (cm) + - petal length (cm) + - petal width (cm) + +- Target classes to predict: + + - Setosa + - Versicolour + - Virginica + +{mod}`scikit-learn` embeds a copy of the iris CSV file along with a +function to load it into NumPy arrays: + +```{code-cell} +from sklearn.datasets import load_iris +iris = load_iris() +``` + +:::{note} +**Import sklearn** Note that scikit-learn is imported as {mod}`sklearn` +::: + +The features of each sample flower are stored in the `data` attribute +of the dataset: + +```{code-cell} +iris.data.shape +``` + +```{code-cell} +n_samples, n_features = iris.data.shape +n_samples +``` + +```{code-cell} +n_features +``` + +```{code-cell} +iris.data[0] +``` + +The information about the class of each sample is stored in the +`target` attribute of the dataset: + +```{code-cell} +iris.target.shape +``` + +```{code-cell} +iris.target +``` + +The names of the classes are stored in the last attribute, namely +`target_names`: + +```{code-cell} +iris.target_names +``` + +This data is four-dimensional, but we can visualize two of the +dimensions at a time using a scatter plot: + +::: {note} + +There is a more elaborate visualization of this dataset is detailed in the +{ref}`statistics` chapter. + +```{code-cell} +:tags: [hide-input] + +from matplotlib import ticker + +# The indices of the features that we are plotting +x_index = 0 +y_index = 1 + +# this formatter will label the colorbar with the correct target names +formatter = ticker.FuncFormatter(lambda i, *args: iris.target_names[int(i)]) + +plt.figure(figsize=(5, 4)) +plt.scatter(iris.data[:, x_index], iris.data[:, y_index], c=iris.target) +plt.colorbar(ticks=[0, 1, 2], format=formatter) +plt.xlabel(iris.feature_names[x_index]) +plt.ylabel(iris.feature_names[y_index]) +plt.tight_layout() +``` + +::: {exercise-start} +:label: chose-two-features +:class: dropdown +::: + +Can you choose 2 features to find a plot where it is easier to +separate the different classes of irises? + +**Hint**: click on the figure above to see the code that generates it, +and modify this code. + +::: {exercise-end} +::: + +## Basic principles of machine learning with Scikit-learn + +### Introducing the Scikit-learn estimator object + +Every algorithm is exposed in Scikit-learn via an "Estimator" object. For +instance a linear regression is: +{class}`sklearn.linear_model.LinearRegression` + +```{code-cell} +from sklearn.linear_model import LinearRegression +``` + +**Estimator parameters**: All the parameters of an estimator can be set +when it is instantiated: + +```{code-cell} +model = LinearRegression(n_jobs=1) +model +``` + +#### Fitting on data + +Let's create some simple data with {ref}`numpy `: + +```{code-cell} +import numpy as np +x = np.array([0, 1, 2]) +y = np.array([0, 1, 2]) +``` + +```{code-cell} +# The input data for sklearn is 2D: (samples == 3 x features == 1) +X = x[:, np.newaxis] +X +``` + +```{code-cell} +model.fit(X, y) +``` + +**Estimated parameters**: When data is fitted with an estimator, +parameters are estimated from the data at hand. All the estimated +parameters are attributes of the estimator object ending by an +underscore: + +```{code-cell} +model.coef_ +``` + +### Supervised Learning: Classification and regression + +In **Supervised Learning**, we have a dataset consisting of both +features and labels. The task is to construct an estimator which is able +to predict the label of an object given the set of features. A +relatively simple example is predicting the species of iris given a set +of measurements of its flower. This is a relatively simple task. Some +more complicated examples are: + +- given a multicolor image of an object through a telescope, determine + whether that object is a star, a quasar, or a galaxy. +- given a photograph of a person, identify the person in the photo. +- given a list of movies a person has watched and their personal rating + of the movie, recommend a list of movies they would like (So-called + _recommender systems_: a famous example is the [Netflix + Prize](https://en.wikipedia.org/wiki/Netflix_prize)). + +::: {note} +:class: dropdown + +What these tasks have in common is that there is one or more unknown +quantities associated with the object which needs to be determined from +other observed quantities. +::: + +Supervised learning is further broken down into two categories, +**classification** and **regression**. In classification, the label is +discrete, while in regression, the label is continuous. For example, in +astronomy, the task of determining whether an object is a star, a +galaxy, or a quasar is a classification problem: the label is from three +distinct categories. On the other hand, we might wish to estimate the +age of an object based on such observations: this would be a regression +problem, because the label (age) is a continuous quantity. + +**Classification**: K nearest neighbors (kNN) is one of the simplest +learning strategies: given a new, unknown observation, look up in your +reference database which ones have the closest features and assign the +predominant class. Let's try it out on our iris classification problem: + +```{code-cell} +from sklearn import neighbors, datasets + +X, y = iris.data, iris.target +knn = neighbors.KNeighborsClassifier(n_neighbors=1) +knn.fit(X, y) + +# What kind of iris has 3cm x 5cm sepal and 4cm x 2cm petal? +iris.target_names[knn.predict([[3, 5, 4, 2]])] +``` + +```{code-cell} +:tags: [hide-input] + +from matplotlib.colors import ListedColormap + +# Create color maps for 3-class classification problem, as with iris +cmap_light = ListedColormap(["#FFAAAA", "#AAFFAA", "#AAAAFF"]) +cmap_bold = ListedColormap(["#FF0000", "#00FF00", "#0000FF"]) + +X = iris.data[:, :2] # we only take the first two features. We could +# avoid this ugly slicing by using a two-dim dataset +y = iris.target + +knn = neighbors.KNeighborsClassifier(n_neighbors=1) +knn.fit(X, y) + +x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1 +y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1 +xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, +100)) +Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) + +# Put the result into a color plot +Z = Z.reshape(xx.shape) +plt.figure() +plt.pcolormesh(xx, yy, Z, cmap=cmap_light) +# Plot also the training points +plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) +plt.xlabel("sepal length (cm)") +plt.ylabel("sepal width (cm)") +plt.axis("tight") +plt.title('Sepal space and the prediction of the KNN'); +``` + +**Regression**: The simplest possible regression setting is the linear +regression one: + +```{code-cell} +from sklearn.linear_model import LinearRegression + +# x from 0 to 30 +rng = np.random.default_rng() +x = 30 * rng.random((20, 1)) + +# y = a*x + b with noise +y = 0.5 * x + 1.0 + rng.normal(size=x.shape) + +# create a linear regression model +model = LinearRegression() +model.fit(x, y) +``` + +```{code-cell} +# predict y from the data +x_new = np.linspace(0, 30, 100) +y_new = model.predict(x_new[:, np.newaxis]) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(4, 3)) +ax = plt.axes() +ax.scatter(x, y) +ax.plot(x_new, y_new) +ax.set_xlabel("x") +ax.set_ylabel("y") +ax.axis("tight"); +ax.set_title('A plot of a simple linear regression'); +``` + +### A recap on Scikit-learn's estimator interface + +Scikit-learn strives to have a uniform interface across all methods, and +we’ll see examples of these below. Given a scikit-learn _estimator_ +object named `model`, the following methods are available: + +::: {list-table} **Estimator interfaces** + +- - All Estimators + - - `model.fit()` : fit training data. For supervised learning + applications, this accepts two arguments: the data `X` and the labels + `y` (e.g. `model.fit(X, y)`). For unsupervised learning + applications, this accepts only a single argument, the data `X` (e.g. + `model.fit(X)`). +- - Supervised estimators + - - `model.predict()` : given a trained model, predict the label of a new + set of data. This method accepts one argument, the new data `X_new` + (e.g. `model.predict(X_new)`), and returns the learned label for each + object in the array. + - `model.predict_proba()` : For classification problems, some estimators + also provide this method, which returns the probability that a new + observation has each categorical label. In this case, the label with the + highest probability is returned by `model.predict()`. + - `model.score()` : for classification or regression problems, most + (all?) estimators implement a score method. Scores are between 0 and 1, + with a larger score indicating a better fit. +- - Unsupervised estimators + - - `model.transform()` : given an unsupervised model, transform new data + into the new basis. This also accepts one argument `X_new`, and + returns the new representation of the data based on the unsupervised + model. + - `model.fit_transform()` : some estimators implement this method, which + more efficiently performs a fit and a transform on the same input data. + +::: + ++++ + +### Regularization: what it is and why it is necessary + +#### Preferring simpler models + +**Train errors** Suppose you are using a 1-nearest neighbor estimator. +How many errors do you expect on your train set? + +- Train set error is not a good measurement of prediction performance. + You need to leave out a test set. +- In general, we should accept errors on the train set. + +**An example of regularization** The core idea behind regularization is that +we are going to prefer models that are simpler, for a certain definition of +''simpler'', even if they lead to more errors on the train set. + +As an example, let's generate with a 9th order polynomial, with noise: + +```{code-cell} +:tags: [hide-input] + +from sklearn import linear_model + +rng = np.random.default_rng(27446968) +x = 2 * rng.random(100) - 1 + +f = lambda t: 1.2 * t**2 + 0.1 * t**3 - 0.4 * t**5 - 0.5 * t**9 +y = f(x) + 0.4 * rng.normal(size=100) + +x_test = np.linspace(-1, 1, 100) +plt.figure(figsize=(6, 4)) +plt.scatter(x, y, s=4); +``` + +And now, let's fit a 4th order and a 9th order polynomial to the data. + +```{code-cell} +:tags: [hide-input] + +# Fitting 4th and 9th order polynomials +# +# For this we need to engineer features: the n_th powers of x: +plt.figure(figsize=(6, 4)) +plt.scatter(x, y, s=4) + +X = np.array([x**i for i in range(5)]).T +X_test = np.array([x_test**i for i in range(5)]).T +regr = linear_model.LinearRegression() +regr.fit(X, y) +plt.plot(x_test, regr.predict(X_test), label="4th order") + +X = np.array([x**i for i in range(10)]).T +X_test = np.array([x_test**i for i in range(10)]).T +regr = linear_model.LinearRegression() +regr.fit(X, y) +plt.plot(x_test, regr.predict(X_test), label="9th order") + +plt.legend(loc="best") +plt.axis("tight") +plt.title("Fitting a 4th and a 9th order polynomial"); +``` + +With your naked eye, which model do you prefer, the 4th order one, or the 9th +order one? + +Let's look at the ground truth: + +```{code-cell} +:tags: [hide-input] + +# Ground truth +plt.figure(figsize=(6, 4)) +plt.scatter(x, y, s=4) +plt.plot(x_test, f(x_test), label="truth") +plt.axis("tight") +plt.title("Ground truth (9th order polynomial)"); +``` + +::: {note} +:class: dropdown + +Regularization is ubiquitous in machine learning. Most scikit-learn +estimators have a parameter to tune the amount of regularization. For +instance, with k-NN, it is 'k', the number of nearest neighbors used to +make the decision. k=1 amounts to no regularization: 0 error on the +training set, whereas large k will push toward smoother decision +boundaries in the feature space. +::: + +#### Simple versus complex models for classification + +```{code-cell} +:tags: [hide-input] + +# This is an example plot from the tutorial which accompanies an explanation +# of the support vector machine GUI. + +from sklearn import svm + +rng = np.random.default_rng(27446968) + +# Data that is linearly separable +def linear_model(rng, n_samples=30): + "Generate data according to a linear model" + data = rng.normal(0, 10, (n_samples, 2)) + data[: n_samples // 2] -= 15 + data[n_samples // 2 :] += 15 + + labels = np.ones(n_samples) + labels[: n_samples // 2] = -1 + + return data, labels + +X, y = linear_model(rng) +clf = svm.SVC(kernel="linear") +clf.fit(X, y) + +fig, axes = plt.subplots(1, 2, figsize=(10, 5)) +ax = axes[0] +ax.scatter(X[:, 0], X[:, 1], c=y, cmap="bone") +ax.scatter( + clf.support_vectors_[:, 0], + clf.support_vectors_[:, 1], + s=80, + edgecolors="k", + facecolors="none", +) +delta = 1 +y_min, y_max = -50, 50 +x_min, x_max = -50, 50 +x = np.arange(x_min, x_max + delta, delta) +y = np.arange(y_min, y_max + delta, delta) +X1, X2 = np.meshgrid(x, y) +Z = clf.decision_function(np.c_[X1.ravel(), X2.ravel()]) +Z = Z.reshape(X1.shape) +ax.contour( + X1, X2, Z, [-1.0, 0.0, 1.0], colors="k", linestyles=["dashed", "solid", "dashed"] +) +ax.set_title("A linear separation") + +# Data with a non-linear separation + +def nonlinear_model(rng, n_samples=30): + radius = 40 * rng.random(n_samples) + far_pts = radius > 20 + radius[far_pts] *= 1.2 + radius[~far_pts] *= 1.1 + + theta = rng.random(n_samples) * np.pi * 2 + + data = np.empty((n_samples, 2)) + data[:, 0] = radius * np.cos(theta) + data[:, 1] = radius * np.sin(theta) + + labels = np.ones(n_samples) + labels[far_pts] = -1 + + return data, labels + +rng = np.random.default_rng(27446968) + +X, y = nonlinear_model(rng) +clf = svm.SVC(kernel="rbf", gamma=0.001, coef0=0, degree=3) +clf.fit(X, y) + +ax = axes[1] +ax.scatter(X[:, 0], X[:, 1], c=y, cmap="bone", zorder=2) +ax.scatter( + clf.support_vectors_[:, 0], + clf.support_vectors_[:, 1], + s=80, + edgecolors="k", + facecolors="none", +) +delta = 1 +y_min, y_max = -50, 50 +x_min, x_max = -50, 50 +x = np.arange(x_min, x_max + delta, delta) +y = np.arange(y_min, y_max + delta, delta) +X1, X2 = np.meshgrid(x, y) +Z = clf.decision_function(np.c_[X1.ravel(), X2.ravel()]) +Z = Z.reshape(X1.shape) +ax.contour( + X1, + X2, + Z, + [-1.0, 0.0, 1.0], + colors="k", + linestyles=["dashed", "solid", "dashed"], + zorder=1, +) +ax.set_title("A non-linear separation"); +``` + +::: {note} +:class: dropdown + +For classification models, the decision boundary, that separates the +class expresses the complexity of the model. For instance, a linear +model, that makes a decision based on a linear combination of +features, is more complex than a non-linear one. +::: + +## Supervised Learning: Classification of Handwritten Digits + +### The nature of the data + +In this section we'll apply scikit-learn to the classification of +handwritten digits. This will go a bit beyond the iris classification we +saw before: we'll discuss some of the metrics which can be used in +evaluating the effectiveness of a classification model. + +```{code-cell} +from sklearn.datasets import load_digits +digits = load_digits() +``` + +Let us visualize the data and remind us what we're looking at: + +```{code-cell} +:tags: [hide-input] + +fig = plt.figure(figsize=(6, 6)) # figure size in inches +fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) +for i in range(64): + ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) + ax.imshow(digits.images[i], cmap="binary", interpolation="nearest") + # label the image with the target value + ax.text(0, 7, str(digits.target[i])) +``` + +### Visualizing the Data on its principal components + +A good first-step for many problems is to visualize the data using a +_Dimensionality Reduction_ technique. We'll start with the most +straightforward one, [Principal Component Analysis (PCA)](https://en.wikipedia.org/wiki/Principal_component_analysis). + +PCA seeks orthogonal linear combinations of the features which show the +greatest variance, and as such, can help give you a good idea of the +structure of the data set. + +```{code-cell} +from sklearn.decomposition import PCA +pca = PCA(n_components=2) +proj = pca.fit_transform(digits.data) +plt.scatter(proj[:, 0], proj[:, 1], c=digits.target, cmap='Paired') +plt.colorbar(); +``` + +:::{admonition} Question + +Given these projections of the data, which numbers do you think a +classifier might have trouble distinguishing? +::: + +### Gaussian Naive Bayes Classification + +For most classification problems, it's nice to have a simple, fast +method to provide a quick baseline classification. If the simple +and fast method is sufficient, then we don't have to waste CPU cycles on +more complex models. If not, we can use the results of the simple method +to give us clues about our data. + +One good method to keep in mind is Gaussian Naive Bayes +({class}`sklearn.naive_bayes.GaussianNB`). + +:::{sidebar} Old scikit-learn versions +{func}`~sklearn.model_selection.train_test_split` is imported from +`sklearn.cross_validation` +::: + +::: {note} +:class: dropdown + +Gaussian Naive Bayes fits a Gaussian distribution to each training label +independently on each feature, and uses this to quickly give a rough +classification. It is generally not sufficiently accurate for real-world +data, but can perform surprisingly well, for instance on text data. +::: + +```{code-cell} +from sklearn.naive_bayes import GaussianNB +from sklearn.model_selection import train_test_split +``` + +```{code-cell} +# split the data into training and validation sets +X_train, X_test, y_train, y_test = train_test_split( + digits.data, digits.target, random_state=42) +``` + +```{code-cell} +# train the model +clf = GaussianNB() +clf.fit(X_train, y_train) +``` + +```{code-cell} +# use the model to predict the labels of the test data +predicted = clf.predict(X_test) +predicted +``` + +```{code-cell} +expected = y_test +expected +``` + +As above, we plot the digits with the predicted labels to get an idea of +how well the classification is working. + +```{code-cell} +:tags: [hide-input] + +# plot the digits: each image is 8x8 pixels +fig = plt.figure(figsize=(6, 6)) # figure size in inches +fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) +for i in range(64): + ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) + ax.imshow(X_test.reshape(-1, 8, 8)[i], cmap="binary", interpolation="nearest") + + # label the image with the target value + if predicted[i] == expected[i]: + ax.text(0, 7, str(predicted[i]), color="green") + else: + ax.text(0, 7, str(predicted[i]), color="red") +``` + +:::{admonition} Question + +Why did we split the data into training and validation sets? + +::: + ++++ + +### Quantitative Measurement of Performance + +We'd like to measure the performance of our estimator without having to +resort to plotting examples. A simple method might be to simply compare +the number of matches: + +```{code-cell} +# The number of correct matches +matches = (predicted == expected) +matches.sum() +``` + +```{code-cell} +# The total number of data points +len(matches) +``` + +```{code-cell} +# The ratio of correct predictions +matches.sum() / float(len(matches)) +``` + +We see that more than 80% of the 450 predictions match the input. But +there are other more sophisticated metrics that can be used to judge the +performance of a classifier: several are available in the +{mod}`sklearn.metrics` submodule. + +One of the most useful metrics is the `classification_report`, which +combines several measures and prints a table with the results: + +```{code-cell} +from sklearn import metrics + +print(metrics.classification_report(expected, predicted)) +``` + +Another enlightening metric for this sort of multi-label classification +is a _confusion matrix_: it helps us visualize which labels are being +interchanged in the classification errors: + +```{code-cell} +metrics.confusion_matrix(expected, predicted) +``` + +We see here that in particular, the numbers 1, 2, 3, and 9 are often +being labeled 8. + ++++ + +## Supervised Learning: Regression of Housing Data + +Here we'll do a short example of a regression problem: learning a +continuous value from a set of features. + ++++ + +### A quick look at the data + ++++ + +We'll use the California house prices set, available in Scikit-learn. +This records measurements of 8 attributes of housing markets in +California, as well as the median price. The question is: can you predict +the price of a new market given its attributes?: + +```{code-cell} +from sklearn.datasets import fetch_california_housing + +data = fetch_california_housing(as_frame=True) +data.data.shape +``` + +```{code-cell} +data.target.shape +``` + +We can see that there are just over 20000 data points. + +The `DESCR` variable has a long description of the dataset: + +```{code-cell} +print(data.DESCR) +``` + +It often helps to quickly visualize pieces of the data using histograms, +scatter plots, or other plot types. With matplotlib, let us show a +histogram of the target values: the median price in each neighborhood: + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(4, 3)) +plt.hist(data.target) +plt.xlabel("price ($100k)") +plt.ylabel("count") +plt.tight_layout() +``` + +Let's have a quick look to see if some features are more relevant than +others for our problem: + +```{code-cell} +fig, axes = plt.subplots(3, 3, figsize=(8, 8)) +axes = axes.ravel() +for index, feature_name in enumerate(data.feature_names): + ax = axes[index] + ax.scatter(data.data[feature_name], data.target) + ax.set_ylabel("Price", size=15) + ax.set_xlabel(feature_name, size=15) +axes[-1].axis('off') +plt.tight_layout() +``` + +This is a manual version of a technique called **feature selection**. + +::: {note} +:class: dropdown + +Sometimes, in Machine Learning it is useful to use feature selection to +decide which features are the most useful for a particular problem. +Automated methods exist which quantify this sort of exercise of choosing +the most informative features. +::: + +### Predicting Home Prices: a Simple Linear Regression + +Now we'll use `scikit-learn` to perform a simple linear regression on +the housing data. There are many possibilities of regressors to use. A +particularly simple one is `LinearRegression`: this is basically a +wrapper around an ordinary least squares calculation. + +```{code-cell} +from sklearn.linear_model import LinearRegression + +X_train, X_test, y_train, y_test = train_test_split(data.data, data.target) +clf = LinearRegression() +clf.fit(X_train, y_train) +``` + +```{code-cell} +predicted = clf.predict(X_test) +expected = y_test +print("RMS: %s" % np.sqrt(np.mean((predicted - expected) ** 2))) +``` + +We can plot the error: expected as a function of predicted: + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(4, 3)) +plt.scatter(expected, predicted) +plt.plot([0, 8], [0, 8], "--k") +plt.axis("tight") +plt.xlabel("True price ($100k)") +plt.ylabel("Predicted price ($100k)") +plt.tight_layout() +``` + +::: {note} +:class: dropdown + +The prediction at least correlates with the true price, though there are +clearly some biases. We could imagine evaluating the performance of the +regressor by, say, computing the RMS residuals between the true and +predicted price. There are some subtleties in this, however, which we'll +cover in a later section. +::: + +::: {exercise-start} +:label: gradient-boost-tree-ex +:class: dropdown +::: + +There are many other types of regressors available in scikit-learn: +we'll try a more powerful one here. + +**Use the GradientBoostingRegressor class to fit the housing data**. + +**hint** You can copy and paste some of the above code, replacing +{class}`~sklearn.linear_model.LinearRegression` with +{class}`~sklearn.ensemble.GradientBoostingRegressor`: + +```{code-cell} +from sklearn.ensemble import GradientBoostingRegressor +# Instantiate the model, fit the results, and scatter in vs. out +``` + +::: {exercise-end} +::: + +::: {solution-start} gradient-boost-tree-ex +:class: dropdown +::: + +```{code-cell} +clf = GradientBoostingRegressor() +clf.fit(X_train, y_train) +``` + +```{code-cell} +predicted = clf.predict(X_test) +expected = y_test +``` + +```{code-cell} +plt.figure(figsize=(4, 3)) +plt.scatter(expected, predicted) +plt.plot([0, 5], [0, 5], "--k") +plt.axis("tight") +plt.xlabel("True price ($100k)") +plt.ylabel("Predicted price ($100k)") +plt.tight_layout() +``` + +```{code-cell} +# Print the error rate +print(f"RMS: {np.sqrt(np.mean((predicted - expected) ** 2))!r} ") +``` + +::: {solution-end} +::: + ++++ + +## Measuring prediction performance + +### A quick test on the K-neighbors classifier + +Here we'll continue to look at the digits data, but we'll switch to the +K-Neighbors classifier. The K-neighbors classifier is an instance-based +classifier. The K-neighbors classifier predicts the label of +an unknown point based on the labels of the _K_ nearest points in the +parameter space. + +```{code-cell} +# Get the data (again) +digits = load_digits() +X = digits.data +y = digits.target +``` + +```{code-cell} +# Instantiate and train the classifier +from sklearn.neighbors import KNeighborsClassifier +clf = KNeighborsClassifier(n_neighbors=1) +clf.fit(X, y) +``` + +```{code-cell} +# Check the results using metrics +y_pred = clf.predict(X) + +print(metrics.confusion_matrix(y_pred, y)) +``` + +Apparently, we've found a perfect classifier! But this is misleading for +the reasons we saw before: the classifier essentially "memorizes" all the +samples it has already seen. To really test how well this algorithm +does, we need to try some samples it _hasn't_ yet seen. + +This problem also occurs with regression models. In the following we +fit an other instance-based model named "decision tree" to the California +Housing price dataset we introduced previously: + +```{code-cell} +from sklearn.tree import DecisionTreeRegressor +``` + +```{code-cell} +data = fetch_california_housing(as_frame=True) +clf = DecisionTreeRegressor().fit(data.data, data.target) +predicted = clf.predict(data.data) +expected = data.target +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(4, 3)) +plt.scatter(expected, predicted) +plt.plot([0, 5], [0, 5], "--k") +plt.axis("tight") +plt.xlabel("True price ($100k)") +plt.ylabel("Predicted price ($100k)") +plt.tight_layout() +``` + +Here again the predictions are seemingly perfect as the model was able to +perfectly memorize the training set. + +:::{warning} +**Performance on test set** + +Performance on test set does not measure overfit (as described above) +::: + ++++ + +### A correct approach: Using a validation set + +Learning the parameters of a prediction function and testing it on the +same data is a methodological mistake: a model that would just repeat the +labels of the samples that it has just seen would have a perfect score +but would fail to predict anything useful on yet-unseen data. + +To avoid over-fitting, we have to define two different sets: + +- a training set X_train, y_train which is used for learning the + parameters of a predictive model +- a testing set X_test, y_test which is used for evaluating the fitted + predictive model + +In scikit-learn such a random split can be quickly computed with the +{func}`~sklearn.model_selection.train_test_split` function: + +```{code-cell} +from sklearn import model_selection + +X = digits.data +y = digits.target +``` + +```{code-cell} +X_train, X_test, y_train, y_test = model_selection.train_test_split( + X, y, test_size=0.25, random_state=0) +``` + +```{code-cell} +print("%r, %r, %r" % (X.shape, X_train.shape, X_test.shape)) +``` + +Now we train on the training data, and test on the testing data: + +```{code-cell} +clf = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train) +y_pred = clf.predict(X_test) +``` + +```{code-cell} +print(metrics.confusion_matrix(y_test, y_pred)) +``` + +```{code-cell} +print(metrics.classification_report(y_test, y_pred)) +``` + +The averaged f1-score is often used as a convenient measure of the +overall performance of an algorithm. It appears in the bottom row +of the classification report; it can also be accessed directly: + +```{code-cell} +metrics.f1_score(y_test, y_pred, average="macro") +``` + +The over-fitting we saw previously can be quantified by computing the +f1-score on the training data itself: + +```{code-cell} +metrics.f1_score(y_train, clf.predict(X_train), average="macro") +``` + +:::{note} +**Regression metrics** In the case of regression models, we +need to use different metrics, such as explained variance. +::: + +### Model Selection via Validation + +::: {note} +:class: dropdown + +We have applied Gaussian Naives, support vectors machines, and +K-nearest neighbors classifiers to the digits dataset. Now that we +have these validation tools in place, we can ask quantitatively which +of the three estimators works best for this dataset. +::: + +With the default hyper-parameters for each estimator, which gives the best f1 +score on the **validation set**? Recall that hyperparameters are the +parameters set when you instantiate the classifier: for example, the +`n_neighbors` in `clf = KNeighborsClassifier(n_neighbors=1)` + +```{code-cell} +from sklearn.naive_bayes import GaussianNB +from sklearn.svm import LinearSVC +``` + +```{code-cell} +X = digits.data +y = digits.target + +X_train, X_test, y_train, y_test = model_selection.train_test_split( + X, y, test_size=0.25, random_state=0) +``` + +```{code-cell} +for Model in [GaussianNB(), KNeighborsClassifier(), LinearSVC(dual=False)]: + clf = Model.fit(X_train, y_train) + y_pred = clf.predict(X_test) + print('%s: %s' % + (Model.__class__.__name__, metrics.f1_score(y_test, y_pred, average="macro"))) +``` + +::: {exercise-start} +:label: which-hyper-ex +:class: dropdown +::: + +For each classifier, which value for the hyperparameters gives the best +results for the digits data? For {class}`~sklearn.svm.LinearSVC`, use +`loss='hinge'` and `loss='squared_hinge'`. For +{class}`~sklearn.neighbors.KNeighborsClassifier` we use `n_neighbors` between +1 and 10. Note that {class}`~sklearn.naive_bayes.GaussianNB` does not have any +adjustable hyperparameters. Your results should look something like this: + +```text +LinearSVC(loss='hinge'): 0.9369152611313591 +LinearSVC(loss='squared_hinge'): 0.9323387371152745 +------------------- +KNeighbors(n_neighbors=1): 0.9913675218842191 +KNeighbors(n_neighbors=2): 0.9848442068835102 +KNeighbors(n_neighbors=3): 0.9867753449543099 +KNeighbors(n_neighbors=4): 0.9803719053818863 +KNeighbors(n_neighbors=5): 0.9804562804949924 +KNeighbors(n_neighbors=6): 0.9757924194139573 +KNeighbors(n_neighbors=7): 0.9780645792142071 +KNeighbors(n_neighbors=8): 0.9780645792142071 +KNeighbors(n_neighbors=9): 0.9780645792142071 +KNeighbors(n_neighbors=10): 0.9755550897728812 +``` + +::: {exercise-end} +::: + +::: {solution-start} which-hyper-ex +:class: dropdown +::: + +```{code-cell} +# test SVC loss +for loss in ["hinge", "squared_hinge"]: + clf = LinearSVC(loss=loss).fit(X_train, y_train) + y_pred = clf.predict(X_test) + print( + f"LinearSVC(loss='{loss}'): {metrics.f1_score(y_test, y_pred, average='macro')}" + ) +print("-------------------") +# test the number of neighbors +for n_neighbors in range(1, 11): + clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X_train, y_train) + y_pred = clf.predict(X_test) + print( + f"KNeighbors(n_neighbors={n_neighbors}): {metrics.f1_score(y_test, y_pred, average='macro')}" + ) +``` + +::: {solution-end} +::: + +### Cross-validation + +Cross-validation consists in repeatedly splitting the data in pairs of +train and test sets, called 'folds'. Scikit-learn comes with a function +to automatically compute score on all these folds. Here we do +{class}`~sklearn.model_selection.KFold` with k=5. + +```{code-cell} +from sklearn.model_selection import cross_val_score + +clf = KNeighborsClassifier() +cross_val_score(clf, X, y, cv=5) +``` + +We can use different splitting strategies, such as random splitting: + +```{code-cell} +from sklearn.model_selection import ShuffleSplit + +cv = ShuffleSplit(n_splits=5) +cross_val_score(clf, X, y, cv=cv) +``` + +::: {note} +:class: dropdown + +There exist [many different cross-validation +strategies](https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators) +in scikit-learn. They are often useful to take in account non i.i.d datasets. +::: + ++++ + +### Hyperparameter optimization with cross-validation + +Consider regularized linear models, such as _Ridge Regression_, which uses l2 +regularization, and _Lasso Regression_, which uses l1 regularization. Choosing +their regularization parameter is important. + +Let us set these parameters on the Diabetes dataset, a simple regression +problem. The diabetes data consists of 10 physiological variables (age, +sex, weight, blood pressure) measure on 442 patients, and an indication +of disease progression after one year: + +```{code-cell} +from sklearn.datasets import load_diabetes + +data = load_diabetes() +X, y = data.data, data.target +X.shape +``` + +With the default hyper-parameters: we compute the cross-validation score: + +```{code-cell} +from sklearn.linear_model import Ridge, Lasso + +for Model in [Ridge, Lasso]: + model = Model() + print(f"{Model.__name__}: {cross_val_score(model, X, y).mean()}") +``` + +#### Basic Hyperparameter Optimization + +We compute the cross-validation score as a function of alpha, the +strength of the regularization for {class}`~sklearn.linear_model.Lasso` +and {class}`~sklearn.linear_model.Ridge`. We choose 20 values of alpha +between 0.0001 and 1: + +```{code-cell} +alphas = np.logspace(-3, -1, 30) +``` + +```{code-cell} +plt.figure(figsize=(5, 3)) +for Model in [Lasso, Ridge]: + scores = [cross_val_score(Model(alpha), X, y, cv=3).mean() + for alpha in alphas] + plt.plot(alphas, scores, label=Model.__name__) +plt.legend(loc="lower left") +plt.xlabel("alpha") +plt.ylabel("cross validation score") +plt.tight_layout() +``` + +:::{admonition} Question + +Can we trust our results to be actually useful? +::: + ++++ + +#### Automatically Performing Grid Search + +{class}`sklearn.grid_search.GridSearchCV` is constructed with an +estimator, as well as a dictionary of parameter values to be searched. +We can find the optimal parameters this way: + +```{code-cell} +from sklearn.model_selection import GridSearchCV + +for Model in [Ridge, Lasso]: + gscv = GridSearchCV(Model(), dict(alpha=alphas), cv=3).fit(X, y) + print('%s: %s' % (Model.__name__, gscv.best_params_)) +``` + +#### Built-in Hyperparameter Search + +For some models within scikit-learn, cross-validation can be performed +more efficiently on large datasets. In this case, a cross-validated +version of the particular model is included. The cross-validated +versions of {class}`~sklearn.linear_model.Ridge` and +{class}`~sklearn.linear_model.Lasso` are +{class}`~sklearn.linear_model.RidgeCV` and +{class}`~sklearn.linear_model.LassoCV`, respectively. Parameter search +on these estimators can be performed as follows: + +```{code-cell} +from sklearn.linear_model import RidgeCV, LassoCV +for Model in [RidgeCV, LassoCV]: + model = Model(alphas=alphas, cv=3).fit(X, y) + print('%s: %s' % (Model.__name__, model.alpha_)) +``` + +We see that the results match those returned by GridSearchCV + ++++ + +#### Nested cross-validation + +How do we measure the performance of these estimators? We have used data +to set the hyperparameters, so we need to test on actually new data. We +can do this by running {func}`~sklearn.model_selection.cross_val_score` +on our CV objects. Here there are 2 cross-validation loops going on, this +is called _'nested cross validation'_: + +```{code-cell} +for Model in [RidgeCV, LassoCV]: + scores = cross_val_score(Model(alphas=alphas, cv=3), X, y, cv=3) + print(Model.__name__, np.mean(scores)) +``` + +:::{note} +Note that these results do not match the best results of our curves +above, and {class}`~sklearn.linear_model.LassoCV` seems to +under-perform {class}`~sklearn.linear_model.RidgeCV`. The reason is +that setting the hyper-parameter is harder for Lasso, thus the +estimation error on this hyper-parameter is larger. +::: + ++++ + +## Unsupervised Learning: Dimensionality Reduction and Visualization + +Unsupervised learning is applied on X without y: data without labels. A +typical use case is to find hidden structure in the data. + +### Dimensionality Reduction: PCA + +Dimensionality reduction derives a set of new artificial features smaller +than the original feature set. Here we'll use [Principal Component +Analysis (PCA)](https://en.wikipedia.org/wiki/Principal_component_analysis), a +dimensionality reduction that strives to retain most of the variance of +the original data. We'll use {class}`sklearn.decomposition.PCA` on the +iris dataset: + +```{code-cell} +X = iris.data +y = iris.target +``` + +::: {note} +:class: dropdown + +{class}`~sklearn.decomposition.PCA` computes linear combinations of +the original features using a truncated Singular Value Decomposition +of the matrix X, to project the data onto a base of the top singular +vectors. +::: + +```{code-cell} +from sklearn.decomposition import PCA + +pca = PCA(n_components=2, whiten=True) +pca.fit(X) +``` + +Once fitted, {class}`~sklearn.decomposition.PCA` exposes the singular +vectors in the `components_` attribute: + +```{code-cell} +pca.components_ +``` + +Other attributes are available as well: + +```{code-cell} +pca.explained_variance_ratio_ +``` + +Let us project the iris dataset along those first two dimensions:: + +```{code-cell} +X_pca = pca.transform(X) +X_pca.shape +``` + +{class}`~sklearn.decomposition.PCA` `normalizes` and `whitens` the data, which +means that the data is now centered on both components with unit variance: + +```{code-cell} +X_pca.mean(axis=0) +``` + +```{code-cell} +X_pca.std(axis=0, ddof=1) +``` + +Furthermore, the samples components do no longer carry any linear +correlation: + +```{code-cell} +np.corrcoef(X_pca.T) +``` + +With a number of retained components 2 or 3, PCA is useful to visualize +the dataset: + +```{code-cell} +plt.figure(figsize=(6, 5)) +target_ids = range(len(iris.target_names)) +for i, c, label in zip(target_ids, 'rgbcmykw', iris.target_names): + plt.scatter(X_pca[y == i, 0], X_pca[y == i, 1], + c=c, label=label) +plt.legend(); +``` + +::: {note} +:class: dropdown + +Note that this projection was determined _without_ any information +about the labels (represented by the colors): this is the sense in +which the learning is **unsupervised**. Nevertheless, we see that the +projection gives us insight into the distribution of the different +flowers in parameter space: notably, _iris setosa_ is much more +distinct than the other two species. +::: + ++++ + +### Visualization with a non-linear embedding: tSNE + +For visualization, more complex embeddings can be useful (for statistical +analysis, they are harder to control). {class}`sklearn.manifold.TSNE` is +such a powerful manifold learning method. We apply it to the _digits_ +dataset, as the digits are vectors of dimension 8\*8 = 64. Embedding them +in 2D enables visualization: + +```{code-cell} +# Take the first 500 data points: it's hard to see 1500 points +X = digits.data[:500] +y = digits.target[:500] +``` + +```{code-cell} +# Fit and transform with a TSNE +from sklearn.manifold import TSNE + +tsne = TSNE(n_components=2, random_state=0) +X_2d = tsne.fit_transform(X) +``` + +```{code-cell} +# Visualize the data +target_ids = range(len(digits.target_names)) +plt.figure(figsize=(6, 5)) +colors = "r", "g", "b", "c", "m", "y", "k", "w", "orange", "purple" +for i, c, label in zip(target_ids, colors, digits.target_names, strict=True): + plt.scatter(X_2d[y == i, 0], X_2d[y == i, 1], c=c, label=label) +plt.legend(); +``` + +:::{admonition} fit_transform +As {class}`~sklearn.manifold.TSNE` cannot be applied to new data, we +need to use its `fit_transform` method. +::: + +{class}`sklearn.manifold.TSNE` separates quite well the different classes +of digits even though it had no access to the class information. + ++++ + +::: {exercise-start} +:label: digit-dims-ex +:class: dropdown +::: + +{mod}`sklearn.manifold` has many other non-linear embeddings. Try +them out on the digits dataset. Could you judge their quality without +knowing the labels `y`? + +```{code-cell} +from sklearn.datasets import load_digits +digits = load_digits() +# ... +``` + +::: {exercise-end} +::: + ++++ + +## Parameter selection, Validation, and Testing + +### Hyperparameters, Over-fitting, and Under-fitting + +:::{admonition} See also + +This section is adapted from [Andrew Ng's excellent +Coursera course](https://www.coursera.org/course/ml) +::: + +The issues associated with validation and cross-validation are some of +the most important aspects of the practice of machine learning. +Selecting the optimal model for your data is vital, and is a piece of +the problem that is not often appreciated by machine learning +practitioners. + +The central question is: **If our estimator is underperforming, how +should we move forward?** + +- Use simpler or more complicated model? +- Add more features to each observed data point? +- Add more training samples? + +The answer is often counter-intuitive. In particular, **Sometimes using +a more complicated model will give worse results.** Also, **Sometimes +adding training data will not improve your results.** The ability to +determine what steps will improve your model is what separates the +successful machine learning practitioners from the unsuccessful. + +#### Bias-variance trade-off: illustration on a simple regression problem + +Let us start with a simple 1D regression problem. This +will help us to easily visualize the data and the model, and the results +generalize easily to higher-dimensional datasets. We'll explore a simple +**linear regression** problem, with {mod}`sklearn.linear_model`. + +We consider the situation where we have only 2 data point + +```{code-cell} +X = np.c_[0.5, 1].T +y = [0.5, 1] +X_test = np.c_[0, 2].T +``` + +Without noise, as linear regression fits the data perfectly + +```{code-cell} +from sklearn import linear_model +``` + +```{code-cell} +regr = linear_model.LinearRegression() +regr.fit(X, y) +plt.plot(X, y, "o") +plt.plot(X_test, regr.predict(X_test)) +``` + +In real life situation, we have noise (e.g. measurement noise) in our data: + +```{code-cell} +rng = np.random.default_rng(27446968) +for _ in range(6): + noisy_X = X + rng.normal(loc=0, scale=0.1, size=X.shape) + plt.plot(noisy_X, y, "o") + regr.fit(noisy_X, y) + plt.plot(X_test, regr.predict(X_test)) +``` + +As we can see, our linear model captures and amplifies the noise in the +data. It displays a lot of variance. + +We can use another linear estimator that uses regularization, the +{class}`~sklearn.linear_model.Ridge` estimator. This estimator regularizes the +coefficients by shrinking them to zero, under the assumption that very high +correlations are often spurious. The alpha parameter controls the amount of +shrinkage used. + +```{code-cell} +regr = linear_model.Ridge(alpha=0.1) +for _ in range(6): + noisy_X = X + rng.normal(loc=0, scale=0.1, size=X.shape) + plt.plot(noisy_X, y, "o") + regr.fit(noisy_X, y) + plt.plot(X_test, regr.predict(X_test)) +``` + +As we can see, the estimator displays much less variance. However it +systematically under-estimates the coefficient. It displays a biased +behavior. + +This is a typical example of **bias/variance trade-off**: non-regularized +estimator are not biased, but they can display a lot of variance. +Highly-regularized models have little variance, but high bias. This bias +is not necessarily a bad thing: what matters is choosing the +trade-off between bias and variance that leads to the best prediction +performance. For a specific dataset there is a sweet spot corresponding +to the highest complexity that the data can support, depending on the +amount of noise and of observations available. + ++++ + +### Visualizing the Bias/Variance Tradeoff + +::: {note} +:class: dropdown + +Given a particular dataset and a model (e.g. a polynomial), we'd like to +understand whether bias (underfit) or variance limits prediction, and how +to tune the _hyperparameter_ (here `d`, the degree of the polynomial) +to give the best fit. +::: + +On a given data, let us fit a simple polynomial regression model with +varying degrees: + +```{code-cell} +:tags: [hide-input] + +# A polynomial regression +from sklearn.pipeline import make_pipeline +from sklearn.linear_model import LinearRegression +from sklearn.preprocessing import PolynomialFeatures + +n_samples = 8 + +def generating_func(x, rng=None, error=0.5): + rng = np.random.default_rng(rng) + return rng.normal(10 - 1.0 / (x + 0.1), error) + +rng = np.random.default_rng(27446968) +x = 10 ** np.linspace(-2, 0, n_samples) +y = generating_func(x, rng=rng) + +x_test = np.linspace(-0.2, 1.2, 1000) + +titles = ["d = 1 (under-fit; high bias)", "d = 2", "d = 6 (over-fit; high variance)"] +degrees = [1, 2, 6] + +fig, axes = plt.subplots(1, 3, figsize=(9, 3.5)) +fig.subplots_adjust(left=0.06, right=0.98, bottom=0.15, top=0.85, wspace=0.05) +for i, d in enumerate(degrees): + ax = axes[i] + ax.scatter(x, y, marker="x", c="k", s=50) + + model = make_pipeline(PolynomialFeatures(d), LinearRegression()) + model.fit(x[:, np.newaxis], y) + + ax.plot(x_test, model.predict(x_test[:, np.newaxis]), "-b") + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_xlim(-0.2, 1.2) + ax.set_ylim(0, 12) + ax.set_xlabel("house size") + if i == 0: + ax.set_ylabel("price") + ax.set_title(titles[i]) +``` + +::: {note} +:class: dropdown + +In the above figure, we see fits for three different values of `d`. +For `d = 1`, the data is under-fit. This means that the model is too +simplistic: no straight line will ever be a good fit to this data. In +this case, we say that the model suffers from high bias. The model +itself is biased, and this will be reflected in the fact that the data +is poorly fit. At the other extreme, for `d = 6` the data is over-fit. +This means that the model has too many free parameters (6 in this case) +which can be adjusted to perfectly fit the training data. If we add a +new point to this plot, though, chances are it will be very far from the +curve representing the degree-6 fit. In this case, we say that the model +suffers from high variance. The reason for the term "high variance" is +that if any of the input points are varied slightly, it could result in +a very different model. + +In the middle, for `d = 2`, we have found a good mid-point. It fits +the data fairly well, and does not suffer from the bias and variance +problems seen in the figures on either side. What we would like is a way +to quantitatively identify bias and variance, and optimize the +metaparameters (in this case, the polynomial degree d) in order to +determine the best algorithm. +::: + +#### Polynomial regression with scikit-learn + +A polynomial regression is built by pipelining +{class}`~sklearn.preprocessing.PolynomialFeatures` +and a {class}`~sklearn.linear_model.LinearRegression`: + +```{code-cell} +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import PolynomialFeatures +from sklearn.linear_model import LinearRegression +model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression()) +``` + +#### Validation Curves + +Let us create a dataset like in the example above: + +```{code-cell} +# randomly sample more data +rng = np.random.default_rng(27446968) +x = rng.random(size=200) +y = generating_func(x, rng=rng, error=1.) +``` + +Central to quantify bias and variance of a model is to apply it on _test +data_, sampled from the same distribution as the train, but that will +capture independent noise: + +```{code-cell} +# split into training, validation, and testing sets. +x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4) +``` + +```{code-cell} +:tags: [hide-input] + +# show the training and validation sets +plt.figure(figsize=(6, 4)) +plt.scatter(x_train, y_train, color="red", label="Training set") +plt.scatter(x_test, y_test, color="blue", label="Test set") +plt.title("The data") +plt.legend(loc="best") +``` + +**Validation curve** A validation curve consists in varying a model parameter +that controls its complexity (here the degree of the +polynomial) and measures both error of the model on training data, and on +test data (_eg_ with cross-validation). The model parameter is then +adjusted so that the test error is minimized: + +We use {func}`sklearn.model_selection.validation_curve` to compute train +and test error, and plot it: + +```{code-cell} +from sklearn.model_selection import validation_curve +``` + +```{code-cell} +degrees = np.arange(1, 21) +model = make_pipeline(PolynomialFeatures(), LinearRegression()) +``` + +```{code-cell} +# Vary the "degrees" on the pipeline step "polynomialfeatures" +train_scores, validation_scores = validation_curve( + model, x[:, np.newaxis], y, + param_name='polynomialfeatures__degree', + param_range=degrees) +``` + +Plot the mean train score and validation score across folds: + +```{code-cell} +plt.figure(figsize=(6, 4)) +plt.plot(degrees, validation_scores.mean(axis=1), lw=2, label="cross-validation") +plt.plot(degrees, train_scores.mean(axis=1), lw=2, label="training") +plt.legend(loc="best") +plt.xlabel("degree of fit") +plt.ylabel("explained variance") +plt.title("Validation curve") +plt.tight_layout() +``` + +This figure shows why validation is important. On the left side of the +plot, we have very low-degree polynomial, which under-fit the data. This +leads to a low explained variance for both the training set and the +validation set. On the far right side of the plot, we have a very high +degree polynomial, which over-fits the data. This can be seen in the fact +that the training explained variance is very high, while on the +validation set, it is low. Choosing `d` around 4 or 5 gets us the best +trade-off. + +::: {note} +:class: dropdown + +The astute reader will realize that something is amiss here: in the +above plot, `d` around 7 gives the best results. But in the previous plot, +we found that `d = 6` vastly over-fits the data. What’s going on here? +The difference is the **number of training points** used. In the +previous example, there were only eight training points. In this +example, we have 100. As a general rule of thumb, the more training +points used, the more complicated model can be used. But how can you +determine for a given model whether more training points will be +helpful? A useful diagnostic for this are learning curves. +::: + ++++ + +#### Learning Curves + +A learning curve shows the training and validation score as a +function of the number of training points. Note that when we train on a +subset of the training data, the training score is computed using +this subset, not the full training set. This curve gives a +quantitative view into how beneficial it will be to add training +samples. + +:::{admonition} Questions: + +- As the number of training samples are increased, what do you expect + to see for the training score? For the validation score? +- Would you expect the training score to be higher or lower than the + validation score? Would you ever expect this to change? + ::: + +Scikit-learn provides {func}`sklearn.model_selection.learning_curve`: + ++++ + +Here is the pattern for using a learning curve, here with an order 1 polynomial and linear regression: + +```{code-cell} +from sklearn.model_selection import learning_curve + +model = make_pipeline(PolynomialFeatures(degree=1), LinearRegression()) + +train_sizes, train_scores, validation_scores = learning_curve( + model, x[:, np.newaxis], y, train_sizes=np.logspace(-1, 0, 20)) +``` + +```{code-cell} +:tags: [hide-input] + +# Plot the mean train score and validation score across folds +def plot_model(d): + plt.figure(figsize=(6, 4)) + model = make_pipeline(PolynomialFeatures(degree=d), LinearRegression()) + train_sizes, train_scores, validation_scores = learning_curve( + model, x[:, np.newaxis], y, train_sizes=np.logspace(-1, 0, 20)) + plt.plot(train_sizes, validation_scores.mean(axis=1), lw=2, label='cross-validation') + plt.plot(train_sizes, train_scores.mean(axis=1), lw=2, label='training') + plt.ylim(ymin=-0.1, ymax=1) + plt.legend(loc="best") + plt.xlabel("number of train samples") + plt.ylabel("explained variance") + plt.title(f"Learning curve (degree={d})") + plt.tight_layout() + +plot_model(1) +``` + + + +Note that the validation score _generally increases_ with a growing +training set, while the training score _generally decreases_ with a +growing training set. As the training size +increases, they will converge to a single value. + +From the above discussion, we know that `d = 1` is a high-bias +estimator which under-fits the data. This is indicated by the fact that +both the training and validation scores are low. When confronted +with this type of learning curve, we can expect that adding more +training data will not help: both lines converge to a +relatively low score. + +**When the learning curves have converged to a low score, we have a +high bias model.** + +A high-bias model can be improved by: + +- Using a more sophisticated model (i.e. in this case, increase `d`) +- Gather more features for each sample. +- Decrease regularization in a regularized model. + +Increasing the number of samples, however, does not improve a high-bias +model. + +Now let's look at a high-variance (i.e. over-fit) model: + +```{code-cell} +:tags: [hide-input] + +plot_model(15) +``` + +Here we show the learning curve for `d = 15`. From the above +discussion, we know that `d = 15` is a **high-variance** estimator +which **over-fits** the data. This is indicated by the fact that the +training score is much higher than the validation score. As we add more +samples to this training set, the training score will continue to +decrease, while the cross-validation error will continue to increase, until they +meet in the middle. + +**Learning curves that have not yet converged with the full training +set indicate a high-variance, over-fit model.** + +A high-variance model can be improved by: + +- Gathering more training samples. +- Using a less-sophisticated model (i.e. in this case, make `d` + smaller) +- Increasing regularization. + +In particular, gathering more features for each sample will not help the +results. + +### Summary on model selection + +We’ve seen above that an under-performing algorithm can be due to two +possible situations: high bias (under-fitting) and high variance +(over-fitting). In order to evaluate our algorithm, we set aside a +portion of our training data for cross-validation. Using the technique +of learning curves, we can train on progressively larger subsets of the +data, evaluating the training error and cross-validation error to +determine whether our algorithm has high variance or high bias. But what +do we do with this information? + +#### High Bias + +If a model shows high **bias**, the following actions might help: + +- **Add more features**. In our example of predicting home prices, it + may be helpful to make use of information such as the neighborhood + the house is in, the year the house was built, the size of the lot, + etc. Adding these features to the training and test sets can improve + a high-bias estimator +- **Use a more sophisticated model**. Adding complexity to the model + can help improve on bias. For a polynomial fit, this can be + accomplished by increasing the degree d. Each learning technique has + its own methods of adding complexity. +- **Use fewer samples**. Though this will not improve the + classification, a high-bias algorithm can attain nearly the same + error with a smaller training sample. For algorithms which are + computationally expensive, reducing the training sample size can lead + to very large improvements in speed. +- **Decrease regularization**. Regularization is a technique used to + impose simplicity in some machine learning models, by adding a + penalty term that depends on the characteristics of the parameters. + If a model has high bias, decreasing the effect of regularization can + lead to better results. + +#### High Variance + +If a model shows **high variance**, the following actions might +help: + +- **Use fewer features**. Using a feature selection technique may be + useful, and decrease the over-fitting of the estimator. +- **Use a simpler model**. Model complexity and over-fitting go + hand-in-hand. +- **Use more training samples**. Adding training samples can reduce the + effect of over-fitting, and lead to improvements in a high variance + estimator. +- **Increase Regularization**. Regularization is designed to prevent + over-fitting. In a high-variance model, increasing regularization can + lead to better results. + +These choices become very important in real-world situations. For +example, due to limited telescope time, astronomers must seek a balance +between observing a large number of objects, and observing a large +number of features for each object. Determining which is more important +for a particular learning task can inform the observing strategy that +the astronomer employs. + +### A last word of caution: separate validation and test set + +Using validation schemes to determine hyper-parameters means that we are +fitting the hyper-parameters to the particular validation set. In the +same way that parameters can be over-fit to the training set, +hyperparameters can be over-fit to the validation set. Because of this, +the validation error tends to under-predict the classification error of +new data. + +For this reason, it is recommended to split the data into three sets: + +- The **training set**, used to train the model (usually ~60% of the + data) +- The **validation set**, used to validate the model (usually ~20% of + the data) +- The **test set**, used to evaluate the expected error of the + validated model (usually ~20% of the data) + +Many machine learning practitioners do not separate test set and +validation set. But if your goal is to gauge the error of a model on +unknown data, using an independent test set is vital. + +:::{admonition} See also + +**Going further** + +- The [documentation of scikit-learn](https://scikit-learn.org) is + very complete and didactic. +- [Introduction to Machine Learning with Python](https://shop.oreilly.com/product/0636920030515.do), + by Sarah Guido, Andreas Müller + ([notebooks available here](https://github.com/amueller/introduction_to_ml_with_python)). + ::: diff --git a/packages/scikit-learn/index.rst b/packages/scikit-learn/index.rst deleted file mode 100644 index eeabed66e..000000000 --- a/packages/scikit-learn/index.rst +++ /dev/null @@ -1,1756 +0,0 @@ -.. _scikit-learn_chapter: - -======================================== -scikit-learn: machine learning in Python -======================================== - -**Authors**: *Gael Varoquaux* - -.. image:: images/scikit-learn-logo.png - :scale: 40 - :align: right - -.. topic:: Prerequisites - - .. rst-class:: horizontal - - * :ref:`numpy ` - * :ref:`scipy ` - * :ref:`matplotlib (optional) ` - * :ref:`ipython (the enhancements come handy) ` - -.. sidebar:: **Acknowledgements** - - This chapter is adapted from `a tutorial - `__ given by Gaël - Varoquaux, Jake Vanderplas, Olivier Grisel. - -.. seealso:: **Data science in Python** - - * The :ref:`statistics` chapter may also be of interest - for readers looking into machine learning. - - * The `documentation of scikit-learn `_ is - very complete and didactic. - -.. contents:: Chapters contents - :local: - :depth: 1 - -.. For doctests - >>> import numpy as np - >>> # For doctest on headless environments - >>> import matplotlib.pyplot as plt - -.. currentmodule:: sklearn - -Introduction: problem settings -============================== - -What is machine learning? -------------------------- - -.. tip:: - - Machine Learning is about building programs with **tunable - parameters** that are adjusted automatically so as to improve their - behavior by **adapting to previously seen data.** - - Machine Learning can be considered a subfield of **Artificial - Intelligence** since those algorithms can be seen as building blocks - to make computers learn to behave more intelligently by somehow - **generalizing** rather that just storing and retrieving data items - like a database system would do. - -.. figure:: auto_examples/images/sphx_glr_plot_separator_001.png - :align: right - :target: auto_examples/plot_separator.html - :width: 350 - - A classification problem - -We'll take a look at two very simple machine learning tasks here. The -first is a **classification** task: the figure shows a collection of -two-dimensional data, colored according to two different class labels. A -classification algorithm may be used to draw a dividing boundary between -the two clusters of points: - -By drawing this separating line, we have learned a model which can -**generalize** to new data: if you were to drop another point onto the -plane which is unlabeled, this algorithm could now **predict** whether -it's a blue or a red point. - -.. raw:: html - -
- -.. figure:: auto_examples/images/sphx_glr_plot_linear_regression_001.png - :align: right - :target: auto_examples/plot_linear_regression.html - :width: 350 - - A regression problem - -| - -The next simple task we'll look at is a **regression** task: a simple -best-fit line to a set of data. - -Again, this is an example of fitting a model to data, but our focus here -is that the model can make generalizations about new data. The model has -been **learned** from the training data, and can be used to predict the -result of test data: here, we might be given an x-value, and the model -would allow us to predict the y value. - -Data in scikit-learn --------------------- - -The data matrix -~~~~~~~~~~~~~~~ - -Machine learning algorithms implemented in scikit-learn expect data -to be stored in a **two-dimensional array or matrix**. The arrays can be -either ``numpy`` arrays, or in some cases ``scipy.sparse`` matrices. The -size of the array is expected to be ``[n_samples, n_features]`` - -- **n\_samples:** The number of samples: each sample is an item to - process (e.g. classify). A sample can be a document, a picture, a - sound, a video, an astronomical object, a row in database or CSV - file, or whatever you can describe with a fixed set of quantitative - traits. -- **n\_features:** The number of features or distinct traits that can - be used to describe each item in a quantitative manner. Features are - generally real-valued, but may be boolean or discrete-valued in some - cases. - -.. tip:: - - The number of features must be fixed in advance. However it can be - very high dimensional (e.g. millions of features) with most of them - being zeros for a given sample. This is a case where ``scipy.sparse`` - matrices can be useful, in that they are much more memory-efficient - than NumPy arrays. - -A Simple Example: the Iris Dataset -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The application problem -....................... - -As an example of a simple dataset, let us a look at the -iris data stored by scikit-learn. Suppose we want to recognize species of -irises. The data consists of measurements of -three different species of irises: - -.. |setosa_picture| image:: images/iris_setosa.jpg - -.. |versicolor_picture| image:: images/iris_versicolor.jpg - -.. |virginica_picture| image:: images/iris_virginica.jpg - -===================== ===================== ===================== -|setosa_picture| |versicolor_picture| |virginica_picture| -===================== ===================== ===================== -Setosa Iris Versicolor Iris Virginica Iris -===================== ===================== ===================== - - -.. topic:: **Quick Question:** - :class: green - - **If we want to design an algorithm to recognize iris species, what - might the data be?** - - Remember: we need a 2D array of size ``[n_samples x n_features]``. - - - What would the ``n_samples`` refer to? - - - What might the ``n_features`` refer to? - -Remember that there must be a **fixed** number of features for each -sample, and feature number ``i`` must be a similar kind of quantity for -each sample. - -Loading the Iris Data with Scikit-learn -....................................... - -Scikit-learn has a very straightforward set of data on these iris -species. The data consist of the following: - -- Features in the Iris dataset: - - .. rst-class:: horizontal - - * sepal length (cm) - * sepal width (cm) - * petal length (cm) - * petal width (cm) - -- Target classes to predict: - - .. rst-class:: horizontal - - * Setosa - * Versicolour - * Virginica - -:mod:`scikit-learn` embeds a copy of the iris CSV file along with a -function to load it into NumPy arrays:: - - >>> from sklearn.datasets import load_iris - >>> iris = load_iris() - -.. note:: - - **Import sklearn** Note that scikit-learn is imported as :mod:`sklearn` - -The features of each sample flower are stored in the ``data`` attribute -of the dataset:: - - >>> print(iris.data.shape) - (150, 4) - >>> n_samples, n_features = iris.data.shape - >>> print(n_samples) - 150 - >>> print(n_features) - 4 - >>> print(iris.data[0]) - [5.1 3.5 1.4 0.2] - -The information about the class of each sample is stored in the -``target`` attribute of the dataset:: - - >>> print(iris.target.shape) - (150,) - >>> print(iris.target) - [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 - 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 - 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 - 2 2] - -The names of the classes are stored in the last attribute, namely -``target_names``:: - - >>> print(iris.target_names) - ['setosa' 'versicolor' 'virginica'] - -This data is four-dimensional, but we can visualize two of the -dimensions at a time using a scatter plot: - -.. image:: auto_examples/images/sphx_glr_plot_iris_scatter_001.png - :align: left - :target: auto_examples/plot_iris_scatter.html - -.. topic:: **Exercise**: - :class: green - - Can you choose 2 features to find a plot where it is easier to - separate the different classes of irises? - - **Hint**: click on the figure above to see the code that generates it, - and modify this code. - - -Basic principles of machine learning with scikit-learn -====================================================== - -Introducing the scikit-learn estimator object ----------------------------------------------- - -Every algorithm is exposed in scikit-learn via an ''Estimator'' object. -For instance a linear regression is: :class:`sklearn.linear_model.LinearRegression` :: - - >>> from sklearn.linear_model import LinearRegression - -**Estimator parameters**: All the parameters of an estimator can be set -when it is instantiated:: - - >>> model = LinearRegression(n_jobs=1) - >>> print(model) - LinearRegression(n_jobs=1) - -Fitting on data -~~~~~~~~~~~~~~~ - -Let's create some simple data with :ref:`numpy `:: - - >>> import numpy as np - >>> x = np.array([0, 1, 2]) - >>> y = np.array([0, 1, 2]) - - >>> X = x[:, np.newaxis] # The input data for sklearn is 2D: (samples == 3 x features == 1) - >>> X - array([[0], - [1], - [2]]) - - >>> model.fit(X, y) - LinearRegression(n_jobs=1) - -**Estimated parameters**: When data is fitted with an estimator, -parameters are estimated from the data at hand. All the estimated -parameters are attributes of the estimator object ending by an -underscore:: - - >>> model.coef_ - array([1.]) - -Supervised Learning: Classification and regression --------------------------------------------------- - -In **Supervised Learning**, we have a dataset consisting of both -features and labels. The task is to construct an estimator which is able -to predict the label of an object given the set of features. A -relatively simple example is predicting the species of iris given a set -of measurements of its flower. This is a relatively simple task. Some -more complicated examples are: - -- given a multicolor image of an object through a telescope, determine - whether that object is a star, a quasar, or a galaxy. -- given a photograph of a person, identify the person in the photo. -- given a list of movies a person has watched and their personal rating - of the movie, recommend a list of movies they would like (So-called - *recommender systems*: a famous example is the `Netflix - Prize `__). - -.. tip:: - - What these tasks have in common is that there is one or more unknown - quantities associated with the object which needs to be determined from - other observed quantities. - -Supervised learning is further broken down into two categories, -**classification** and **regression**. In classification, the label is -discrete, while in regression, the label is continuous. For example, in -astronomy, the task of determining whether an object is a star, a -galaxy, or a quasar is a classification problem: the label is from three -distinct categories. On the other hand, we might wish to estimate the -age of an object based on such observations: this would be a regression -problem, because the label (age) is a continuous quantity. - -**Classification**: K nearest neighbors (kNN) is one of the simplest -learning strategies: given a new, unknown observation, look up in your -reference database which ones have the closest features and assign the -predominant class. Let's try it out on our iris classification problem:: - - from sklearn import neighbors, datasets - iris = datasets.load_iris() - X, y = iris.data, iris.target - knn = neighbors.KNeighborsClassifier(n_neighbors=1) - knn.fit(X, y) - # What kind of iris has 3cm x 5cm sepal and 4cm x 2cm petal? - print(iris.target_names[knn.predict([[3, 5, 4, 2]])]) - - -.. figure:: auto_examples/images/sphx_glr_plot_iris_knn_001.png - :align: center - :target: auto_examples/plot_iris_knn.html - - A plot of the sepal space and the prediction of the KNN - -**Regression**: The simplest possible regression setting is the linear -regression one: - -.. literalinclude:: examples/plot_linear_regression.py - :start-after: import matplotlib.pyplot as plt - :end-before: plot the results - -.. figure:: auto_examples/images/sphx_glr_plot_linear_regression_001.png - :align: center - :target: auto_examples/plot_linear_regression.html - - A plot of a simple linear regression. - -A recap on Scikit-learn's estimator interface ---------------------------------------------- - -Scikit-learn strives to have a uniform interface across all methods, and -we’ll see examples of these below. Given a scikit-learn *estimator* -object named ``model``, the following methods are available: - -:In **all Estimators**: - - - ``model.fit()`` : fit training data. For supervised learning - applications, this accepts two arguments: the data ``X`` and the - labels ``y`` (e.g. ``model.fit(X, y)``). For unsupervised learning - applications, this accepts only a single argument, the data ``X`` - (e.g. ``model.fit(X)``). - -:In **supervised estimators**: - - - ``model.predict()`` : given a trained model, predict the label of a - new set of data. This method accepts one argument, the new data - ``X_new`` (e.g. ``model.predict(X_new)``), and returns the learned - label for each object in the array. - - ``model.predict_proba()`` : For classification problems, some - estimators also provide this method, which returns the probability - that a new observation has each categorical label. In this case, the - label with the highest probability is returned by - ``model.predict()``. - - ``model.score()`` : for classification or regression problems, most - (all?) estimators implement a score method. Scores are between 0 and - 1, with a larger score indicating a better fit. - -:In **unsupervised estimators**: - - - ``model.transform()`` : given an unsupervised model, transform new - data into the new basis. This also accepts one argument ``X_new``, - and returns the new representation of the data based on the - unsupervised model. - - ``model.fit_transform()`` : some estimators implement this method, - which more efficiently performs a fit and a transform on the same - input data. - -Regularization: what it is and why it is necessary --------------------------------------------------- - -Preferring simpler models -~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Train errors** Suppose you are using a 1-nearest neighbor estimator. -How many errors do you expect on your train set? - -- Train set error is not a good measurement of prediction performance. - You need to leave out a test set. -- In general, we should accept errors on the train set. - -**An example of regularization** The core idea behind regularization is -that we are going to prefer models that are simpler, for a certain -definition of ''simpler'', even if they lead to more errors on the train -set. - -As an example, let's generate with a 9th order polynomial, with noise: - -.. figure:: auto_examples/images/sphx_glr_plot_polynomial_regression_001.png - :align: center - :scale: 90 - :target: auto_examples/plot_polynomial_regression.html - -And now, let's fit a 4th order and a 9th order polynomial to the data. - -.. figure:: auto_examples/images/sphx_glr_plot_polynomial_regression_002.png - :align: center - :scale: 90 - :target: auto_examples/plot_polynomial_regression.html - -With your naked eyes, which model do you prefer, the 4th order one, or -the 9th order one? - -Let's look at the ground truth: - -.. figure:: auto_examples/images/sphx_glr_plot_polynomial_regression_003.png - :align: center - :scale: 90 - :target: auto_examples/plot_polynomial_regression.html - -.. tip:: - - Regularization is ubiquitous in machine learning. Most scikit-learn - estimators have a parameter to tune the amount of regularization. For - instance, with k-NN, it is 'k', the number of nearest neighbors used to - make the decision. k=1 amounts to no regularization: 0 error on the - training set, whereas large k will push toward smoother decision - boundaries in the feature space. - -Simple versus complex models for classification -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. |linear| image:: auto_examples/images/sphx_glr_plot_svm_non_linear_001.png - :width: 400 - :target: auto_examples/plot_svm_non_linear.html - -.. |nonlinear| image:: auto_examples/images/sphx_glr_plot_svm_non_linear_002.png - :width: 400 - :target: auto_examples/plot_svm_non_linear.html - -========================== ========================== -|linear| |nonlinear| -========================== ========================== -A linear separation A non-linear separation -========================== ========================== - -.. tip:: - - For classification models, the decision boundary, that separates the - class expresses the complexity of the model. For instance, a linear - model, that makes a decision based on a linear combination of - features, is more complex than a non-linear one. - - -Supervised Learning: Classification of Handwritten Digits -========================================================= - -The nature of the data ------------------------ - -.. sidebar:: Code and notebook - - Python code and Jupyter notebook for this section are found - :ref:`here ` - - -In this section we'll apply scikit-learn to the classification of -handwritten digits. This will go a bit beyond the iris classification we -saw before: we'll discuss some of the metrics which can be used in -evaluating the effectiveness of a classification model. :: - - >>> from sklearn.datasets import load_digits - >>> digits = load_digits() - -.. image:: auto_examples/images/sphx_glr_plot_digits_simple_classif_001.png - :target: auto_examples/plot_digits_simple_classif.html - :align: center - -Let us visualize the data and remind us what we're looking at (click on -the figure for the full code):: - - # plot the digits: each image is 8x8 pixels - for i in range(64): - ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) - ax.imshow(digits.images[i], cmap=plt.cm.binary, interpolation='nearest') - -Visualizing the Data on its principal components -------------------------------------------------- - -A good first-step for many problems is to visualize the data using a -*Dimensionality Reduction* technique. We'll start with the most -straightforward one, `Principal Component Analysis (PCA) -`_. - -PCA seeks orthogonal linear combinations of the features which show the -greatest variance, and as such, can help give you a good idea of the -structure of the data set. :: - - >>> from sklearn.decomposition import PCA - >>> pca = PCA(n_components=2) - >>> proj = pca.fit_transform(digits.data) - >>> plt.scatter(proj[:, 0], proj[:, 1], c=digits.target) - - >>> plt.colorbar() - - -.. image:: auto_examples/images/sphx_glr_plot_digits_simple_classif_002.png - :align: center - :target: auto_examples/plot_digits_simple_classif.html - -.. topic:: **Question** - :class: green - - Given these projections of the data, which numbers do you think a - classifier might have trouble distinguishing? - -Gaussian Naive Bayes Classification ------------------------------------ - -For most classification problems, it's nice to have a simple, fast -method to provide a quick baseline classification. If the simple -and fast method is sufficient, then we don't have to waste CPU cycles on -more complex models. If not, we can use the results of the simple method -to give us clues about our data. - -One good method to keep in mind is Gaussian Naive Bayes -(:class:`sklearn.naive_bayes.GaussianNB`). - -.. sidebar:: Old scikit-learn versions - - :func:`~sklearn.model_selection.train_test_split` is imported from - ``sklearn.cross_validation`` - -.. tip:: - - Gaussian Naive Bayes fits a Gaussian distribution to each training label - independently on each feature, and uses this to quickly give a rough - classification. It is generally not sufficiently accurate for real-world - data, but can perform surprisingly well, for instance on text data. - -:: - - >>> from sklearn.naive_bayes import GaussianNB - >>> from sklearn.model_selection import train_test_split - - >>> # split the data into training and validation sets - >>> X_train, X_test, y_train, y_test = train_test_split( - ... digits.data, digits.target, random_state=42) - - >>> # train the model - >>> clf = GaussianNB() - >>> clf.fit(X_train, y_train) - GaussianNB() - - >>> # use the model to predict the labels of the test data - >>> predicted = clf.predict(X_test) - >>> expected = y_test - >>> print(predicted) - [6 9 3 7 2 2 5 8 5 2 1 1 7 0 4 8 3 7 8 8 4 3 9 7 5 6 3 5 6 3...] - >>> print(expected) - [6 9 3 7 2 1 5 2 5 2 1 9 4 0 4 2 3 7 8 8 4 3 9 7 5 6 3 5 6 3...] - -As above, we plot the digits with the predicted labels to get an idea of -how well the classification is working. - -.. image:: auto_examples/images/sphx_glr_plot_digits_simple_classif_003.png - :align: center - :target: auto_examples/plot_digits_simple_classif.html - - -.. topic:: **Question** - :class: green - - Why did we split the data into training and validation sets? - -Quantitative Measurement of Performance ---------------------------------------- - -We'd like to measure the performance of our estimator without having to -resort to plotting examples. A simple method might be to simply compare -the number of matches:: - - >>> matches = (predicted == expected) - >>> print(matches.sum()) - 385 - >>> print(len(matches)) - 450 - >>> matches.sum() / float(len(matches)) - np.float64(0.8555...) - -We see that more than 80% of the 450 predictions match the input. But -there are other more sophisticated metrics that can be used to judge the -performance of a classifier: several are available in the -:mod:`sklearn.metrics` submodule. - -One of the most useful metrics is the ``classification_report``, which -combines several measures and prints a table with the results:: - - >>> from sklearn import metrics - >>> print(metrics.classification_report(expected, predicted)) - precision recall f1-score support - - 0 1.00 0.95 0.98 43 - 1 0.85 0.78 0.82 37 - 2 0.85 0.61 0.71 38 - 3 0.97 0.83 0.89 46 - 4 0.98 0.84 0.90 55 - 5 0.90 0.95 0.93 59 - 6 0.90 0.96 0.92 45 - 7 0.71 0.98 0.82 41 - 8 0.60 0.89 0.72 38 - 9 0.90 0.73 0.80 48 - - accuracy 0.86 450 - macro avg 0.87 0.85 0.85 450 - weighted avg 0.88 0.86 0.86 450 - - - -Another enlightening metric for this sort of multi-label classification -is a *confusion matrix*: it helps us visualize which labels are being -interchanged in the classification errors:: - - >>> print(metrics.confusion_matrix(expected, predicted)) - [[41 0 0 0 0 1 0 1 0 0] - [ 0 29 2 0 0 0 0 0 4 2] - [ 0 2 23 0 0 0 1 0 12 0] - [ 0 0 1 38 0 1 0 0 5 1] - [ 0 0 0 0 46 0 2 7 0 0] - [ 0 0 0 0 0 56 1 1 0 1] - [ 0 0 0 0 1 1 43 0 0 0] - [ 0 0 0 0 0 1 0 40 0 0] - [ 0 2 0 0 0 0 0 2 34 0] - [ 0 1 1 1 0 2 1 5 2 35]] - -We see here that in particular, the numbers 1, 2, 3, and 9 are often -being labeled 8. - - -Supervised Learning: Regression of Housing Data -=============================================== - -Here we'll do a short example of a regression problem: learning a -continuous value from a set of features. - -A quick look at the data -------------------------- - -.. sidebar:: Code and notebook - - Python code and Jupyter notebook for this section are found - :ref:`here ` - - - -We'll use the California house prices set, available in scikit-learn. -This records measurements of 8 attributes of housing markets in -California, as well as the median price. The question is: can you predict -the price of a new market given its attributes?:: - - >>> from sklearn.datasets import fetch_california_housing - >>> data = fetch_california_housing(as_frame=True) - >>> print(data.data.shape) - (20640, 8) - >>> print(data.target.shape) - (20640,) - -We can see that there are just over 20000 data points. - -The ``DESCR`` variable has a long description of the dataset:: - - >>> print(data.DESCR) - .. _california_housing_dataset: - - California Housing dataset - -------------------------- - - **Data Set Characteristics:** - - :Number of Instances: 20640 - - :Number of Attributes: 8 numeric, predictive attributes and the target - - :Attribute Information: - - MedInc median income in block group - - HouseAge median house age in block group - - AveRooms average number of rooms per household - - AveBedrms average number of bedrooms per household - - Population block group population - - AveOccup average number of household members - - Latitude block group latitude - - Longitude block group longitude - - :Missing Attribute Values: None - - This dataset was obtained from the StatLib repository. - https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html - - The target variable is the median house value for California districts, - expressed in hundreds of thousands of dollars ($100,000). - - This dataset was derived from the 1990 U.S. census, using one row per census - block group. A block group is the smallest geographical unit for which the U.S. - Census Bureau publishes sample data (a block group typically has a population - of 600 to 3,000 people). - - A household is a group of people residing within a home. Since the average - number of rooms and bedrooms in this dataset are provided per household, these - columns may take surprisingly large values for block groups with few households - and many empty houses, such as vacation resorts. - - It can be downloaded/loaded using the - :func:`sklearn.datasets.fetch_california_housing` function. - - .. rubric:: References - - - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions, - Statistics and Probability Letters, 33 (1997) 291-297 - - -It often helps to quickly visualize pieces of the data using histograms, -scatter plots, or other plot types. With matplotlib, let us show a -histogram of the target values: the median price in each neighborhood:: - - >>> plt.hist(data.target) - (array([... - -.. image:: auto_examples/images/sphx_glr_plot_california_prediction_001.png - :align: center - :target: auto_examples/plot_california_prediction.html - :scale: 70 - - - -Let's have a quick look to see if some features are more relevant than -others for our problem:: - - >>> for index, feature_name in enumerate(data.feature_names): - ... plt.figure() - ... plt.scatter(data.data[feature_name], data.target) -
>> from sklearn.model_selection import train_test_split - >>> X_train, X_test, y_train, y_test = train_test_split(data.data, data.target) - >>> from sklearn.linear_model import LinearRegression - >>> clf = LinearRegression() - >>> clf.fit(X_train, y_train) - LinearRegression() - >>> predicted = clf.predict(X_test) - >>> expected = y_test - >>> print("RMS: %s" % np.sqrt(np.mean((predicted - expected) ** 2))) - RMS: 0.7... - -.. image:: auto_examples/images/sphx_glr_plot_california_prediction_010.png - :align: right - :target: auto_examples/plot_california_prediction.html - -We can plot the error: expected as a function of predicted:: - - >>> plt.scatter(expected, predicted) - - -.. tip:: - - The prediction at least correlates with the true price, though there are - clearly some biases. We could imagine evaluating the performance of the - regressor by, say, computing the RMS residuals between the true and - predicted price. There are some subtleties in this, however, which we'll - cover in a later section. - -.. topic:: **Exercise: Gradient Boosting Tree Regression** - :class: green - - There are many other types of regressors available in scikit-learn: - we'll try a more powerful one here. - - **Use the GradientBoostingRegressor class to fit the housing data**. - - **hint** You can copy and paste some of the above code, replacing - :class:`~sklearn.linear_model.LinearRegression` with - :class:`~sklearn.ensemble.GradientBoostingRegressor`:: - - from sklearn.ensemble import GradientBoostingRegressor - # Instantiate the model, fit the results, and scatter in vs. out - - **Solution** The solution is found in :ref:`the code of this chapter ` - - - -Measuring prediction performance -================================ - -A quick test on the K-neighbors classifier ------------------------------------------- - -Here we'll continue to look at the digits data, but we'll switch to the -K-Neighbors classifier. The K-neighbors classifier is an instance-based -classifier. The K-neighbors classifier predicts the label of -an unknown point based on the labels of the *K* nearest points in the -parameter space. :: - - >>> # Get the data - >>> from sklearn.datasets import load_digits - >>> digits = load_digits() - >>> X = digits.data - >>> y = digits.target - - >>> # Instantiate and train the classifier - >>> from sklearn.neighbors import KNeighborsClassifier - >>> clf = KNeighborsClassifier(n_neighbors=1) - >>> clf.fit(X, y) - KNeighborsClassifier(...) - - >>> # Check the results using metrics - >>> from sklearn import metrics - >>> y_pred = clf.predict(X) - - >>> print(metrics.confusion_matrix(y_pred, y)) - [[178 0 0 0 0 0 0 0 0 0] - [ 0 182 0 0 0 0 0 0 0 0] - [ 0 0 177 0 0 0 0 0 0 0] - [ 0 0 0 183 0 0 0 0 0 0] - [ 0 0 0 0 181 0 0 0 0 0] - [ 0 0 0 0 0 182 0 0 0 0] - [ 0 0 0 0 0 0 181 0 0 0] - [ 0 0 0 0 0 0 0 179 0 0] - [ 0 0 0 0 0 0 0 0 174 0] - [ 0 0 0 0 0 0 0 0 0 180]] - -Apparently, we've found a perfect classifier! But this is misleading for -the reasons we saw before: the classifier essentially "memorizes" all the -samples it has already seen. To really test how well this algorithm -does, we need to try some samples it *hasn't* yet seen. - -This problem also occurs with regression models. In the following we -fit an other instance-based model named "decision tree" to the California -Housing price dataset we introduced previously:: - - >>> from sklearn.datasets import fetch_california_housing - >>> from sklearn.tree import DecisionTreeRegressor - - >>> data = fetch_california_housing(as_frame=True) - >>> clf = DecisionTreeRegressor().fit(data.data, data.target) - >>> predicted = clf.predict(data.data) - >>> expected = data.target - - >>> plt.scatter(expected, predicted) - - >>> plt.plot([0, 50], [0, 50], '--k') - [>> from sklearn import model_selection - >>> X = digits.data - >>> y = digits.target - - >>> X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, - ... test_size=0.25, random_state=0) - - >>> print("%r, %r, %r" % (X.shape, X_train.shape, X_test.shape)) - (1797, 64), (1347, 64), (450, 64) - -Now we train on the training data, and test on the testing data:: - - >>> clf = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train) - >>> y_pred = clf.predict(X_test) - - >>> print(metrics.confusion_matrix(y_test, y_pred)) - [[37 0 0 0 0 0 0 0 0 0] - [ 0 43 0 0 0 0 0 0 0 0] - [ 0 0 43 1 0 0 0 0 0 0] - [ 0 0 0 45 0 0 0 0 0 0] - [ 0 0 0 0 38 0 0 0 0 0] - [ 0 0 0 0 0 47 0 0 0 1] - [ 0 0 0 0 0 0 52 0 0 0] - [ 0 0 0 0 0 0 0 48 0 0] - [ 0 0 0 0 0 0 0 0 48 0] - [ 0 0 0 1 0 1 0 0 0 45]] - >>> print(metrics.classification_report(y_test, y_pred)) - precision recall f1-score support - - 0 1.00 1.00 1.00 37 - 1 1.00 1.00 1.00 43 - 2 1.00 0.98 0.99 44 - 3 0.96 1.00 0.98 45 - 4 1.00 1.00 1.00 38 - 5 0.98 0.98 0.98 48 - 6 1.00 1.00 1.00 52 - 7 1.00 1.00 1.00 48 - 8 1.00 1.00 1.00 48 - 9 0.98 0.96 0.97 47 - - accuracy 0.99 450 - macro avg 0.99 0.99 0.99 450 - weighted avg 0.99 0.99 0.99 450 - - -The averaged f1-score is often used as a convenient measure of the -overall performance of an algorithm. It appears in the bottom row -of the classification report; it can also be accessed directly:: - - >>> metrics.f1_score(y_test, y_pred, average="macro") - 0.991367... - -The over-fitting we saw previously can be quantified by computing the -f1-score on the training data itself:: - - >>> metrics.f1_score(y_train, clf.predict(X_train), average="macro") - 1.0 - -.. note:: - - **Regression metrics** In the case of regression models, we - need to use different metrics, such as explained variance. - -Model Selection via Validation ------------------------------- - -.. tip:: - - We have applied Gaussian Naives, support vectors machines, and - K-nearest neighbors classifiers to the digits dataset. Now that we - have these validation tools in place, we can ask quantitatively which - of the three estimators works best for this dataset. - -* With the default hyper-parameters for each estimator, which gives the - best f1 score on the **validation set**? Recall that hyperparameters - are the parameters set when you instantiate the classifier: for - example, the ``n_neighbors`` in ``clf = - KNeighborsClassifier(n_neighbors=1)`` :: - - >>> from sklearn.naive_bayes import GaussianNB - >>> from sklearn.neighbors import KNeighborsClassifier - >>> from sklearn.svm import LinearSVC - - >>> X = digits.data - >>> y = digits.target - >>> X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, - ... test_size=0.25, random_state=0) - - >>> for Model in [GaussianNB(), KNeighborsClassifier(), LinearSVC(dual=False)]: - ... clf = Model.fit(X_train, y_train) - ... y_pred = clf.predict(X_test) - ... print('%s: %s' % - ... (Model.__class__.__name__, metrics.f1_score(y_test, y_pred, average="macro"))) - GaussianNB: 0.8... - KNeighborsClassifier: 0.9... - LinearSVC: 0.9... - -* For each classifier, which value for the hyperparameters gives the best - results for the digits data? For :class:`~sklearn.svm.LinearSVC`, use - ``loss='l2'`` and ``loss='l1'``. For - :class:`~sklearn.neighbors.KNeighborsClassifier` we use - ``n_neighbors`` between 1 and 10. Note that - :class:`~sklearn.naive_bayes.GaussianNB` does not have any adjustable - hyperparameters. :: - - LinearSVC(loss='l1'): 0.930570687535 - LinearSVC(loss='l2'): 0.933068826918 - ------------------- - KNeighbors(n_neighbors=1): 0.991367521884 - KNeighbors(n_neighbors=2): 0.984844206884 - KNeighbors(n_neighbors=3): 0.986775344954 - KNeighbors(n_neighbors=4): 0.980371905382 - KNeighbors(n_neighbors=5): 0.980456280495 - KNeighbors(n_neighbors=6): 0.975792419414 - KNeighbors(n_neighbors=7): 0.978064579214 - KNeighbors(n_neighbors=8): 0.978064579214 - KNeighbors(n_neighbors=9): 0.978064579214 - KNeighbors(n_neighbors=10): 0.975555089773 - - **Solution:** :ref:`code source ` - - -Cross-validation ----------------- - -Cross-validation consists in repeatedly splitting the data in pairs of -train and test sets, called 'folds'. Scikit-learn comes with a function -to automatically compute score on all these folds. Here we do -:class:`~sklearn.model_selection.KFold` with k=5. :: - - >>> clf = KNeighborsClassifier() - >>> from sklearn.model_selection import cross_val_score - >>> cross_val_score(clf, X, y, cv=5) #doctest: +ELLIPSIS - array([0.947..., 0.955..., 0.966..., 0.980..., 0.963... ]) - -We can use different splitting strategies, such as random splitting:: - - >>> from sklearn.model_selection import ShuffleSplit - >>> cv = ShuffleSplit(n_splits=5) - >>> cross_val_score(clf, X, y, cv=cv) - array([...]) - -.. tip:: - - There exists `many different cross-validation strategies - `_ - in scikit-learn. They are often useful to take in account non iid - datasets. - -Hyperparameter optimization with cross-validation -------------------------------------------------- - -Consider regularized linear models, such as *Ridge Regression*, which -uses l2 regularization, and *Lasso Regression*, which uses l1 -regularization. Choosing their regularization parameter is important. - -Let us set these parameters on the Diabetes dataset, a simple regression -problem. The diabetes data consists of 10 physiological variables (age, -sex, weight, blood pressure) measure on 442 patients, and an indication -of disease progression after one year:: - - >>> from sklearn.datasets import load_diabetes - >>> data = load_diabetes() - >>> X, y = data.data, data.target - >>> print(X.shape) - (442, 10) - -With the default hyper-parameters: we compute the cross-validation score:: - - >>> from sklearn.linear_model import Ridge, Lasso - - >>> for Model in [Ridge, Lasso]: - ... model = Model() - ... print('%s: %s' % (Model.__name__, cross_val_score(model, X, y).mean())) - Ridge: 0.4... - Lasso: 0.3... - -Basic Hyperparameter Optimization -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -We compute the cross-validation score as a function of alpha, the -strength of the regularization for :class:`~sklearn.linear_model.Lasso` -and :class:`~sklearn.linear_model.Ridge`. We choose 20 values of alpha -between 0.0001 and 1:: - - >>> alphas = np.logspace(-3, -1, 30) - - >>> for Model in [Lasso, Ridge]: - ... scores = [cross_val_score(Model(alpha), X, y, cv=3).mean() - ... for alpha in alphas] - ... plt.plot(alphas, scores, label=Model.__name__) - [>> from sklearn.model_selection import GridSearchCV - >>> for Model in [Ridge, Lasso]: - ... gscv = GridSearchCV(Model(), dict(alpha=alphas), cv=3).fit(X, y) - ... print('%s: %s' % (Model.__name__, gscv.best_params_)) - Ridge: {'alpha': np.float64(0.06210169418915616)} - Lasso: {'alpha': np.float64(0.01268961003167922)} - -Built-in Hyperparameter Search -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For some models within scikit-learn, cross-validation can be performed -more efficiently on large datasets. In this case, a cross-validated -version of the particular model is included. The cross-validated -versions of :class:`~sklearn.linear_model.Ridge` and -:class:`~sklearn.linear_model.Lasso` are -:class:`~sklearn.linear_model.RidgeCV` and -:class:`~sklearn.linear_model.LassoCV`, respectively. Parameter search -on these estimators can be performed as follows:: - - >>> from sklearn.linear_model import RidgeCV, LassoCV - >>> for Model in [RidgeCV, LassoCV]: - ... model = Model(alphas=alphas, cv=3).fit(X, y) - ... print('%s: %s' % (Model.__name__, model.alpha_)) - RidgeCV: 0.0621016941892 - LassoCV: 0.0126896100317 - -We see that the results match those returned by GridSearchCV - -Nested cross-validation -~~~~~~~~~~~~~~~~~~~~~~~ - -How do we measure the performance of these estimators? We have used data -to set the hyperparameters, so we need to test on actually new data. We -can do this by running :func:`~sklearn.model_selection.cross_val_score` -on our CV objects. Here there are 2 cross-validation loops going on, this -is called *'nested cross validation'*:: - - for Model in [RidgeCV, LassoCV]: - scores = cross_val_score(Model(alphas=alphas, cv=3), X, y, cv=3) - print(Model.__name__, np.mean(scores)) - - -.. note:: - - Note that these results do not match the best results of our curves - above, and :class:`~sklearn.linear_model.LassoCV` seems to - under-perform :class:`~sklearn.linear_model.RidgeCV`. The reason is - that setting the hyper-parameter is harder for Lasso, thus the - estimation error on this hyper-parameter is larger. - -Unsupervised Learning: Dimensionality Reduction and Visualization -================================================================= - -Unsupervised learning is applied on X without y: data without labels. A -typical use case is to find hidden structure in the data. - -Dimensionality Reduction: PCA ------------------------------ - -Dimensionality reduction derives a set of new artificial features smaller -than the original feature set. Here we'll use `Principal Component -Analysis (PCA) -`__, a -dimensionality reduction that strives to retain most of the variance of -the original data. We'll use :class:`sklearn.decomposition.PCA` on the -iris dataset:: - - >>> X = iris.data - >>> y = iris.target - -.. tip:: - - :class:`~sklearn.decomposition.PCA` computes linear combinations of - the original features using a truncated Singular Value Decomposition - of the matrix X, to project the data onto a base of the top singular - vectors. - -:: - - >>> from sklearn.decomposition import PCA - >>> pca = PCA(n_components=2, whiten=True) - >>> pca.fit(X) - PCA(n_components=2, whiten=True) - -Once fitted, :class:`~sklearn.decomposition.PCA` exposes the singular -vectors in the ``components_`` attribute:: - - >>> pca.components_ - array([[ 0.3..., -0.08..., 0.85..., 0.3...], - [ 0.6..., 0.7..., -0.1..., -0.07...]]) - -Other attributes are available as well:: - - >>> pca.explained_variance_ratio_ - array([0.92..., 0.053...]) - -Let us project the iris dataset along those first two dimensions::: - - >>> X_pca = pca.transform(X) - >>> X_pca.shape - (150, 2) - -:class:`~sklearn.decomposition.PCA` ``normalizes`` and ``whitens`` the data, which means that the data -is now centered on both components with unit variance:: - - >>> X_pca.mean(axis=0) - array([...e-15, ...e-15]) - >>> X_pca.std(axis=0, ddof=1) - array([1., 1.]) - -Furthermore, the samples components do no longer carry any linear -correlation:: - - >>> np.corrcoef(X_pca.T) # doctest: +SKIP - array([[1.00000000e+00, 0.0], - [0.0, 1.00000000e+00]]) - -With a number of retained components 2 or 3, PCA is useful to visualize -the dataset:: - - >>> target_ids = range(len(iris.target_names)) - >>> for i, c, label in zip(target_ids, 'rgbcmykw', iris.target_names): - ... plt.scatter(X_pca[y == i, 0], X_pca[y == i, 1], - ... c=c, label=label) - >> # Take the first 500 data points: it's hard to see 1500 points - >>> X = digits.data[:500] - >>> y = digits.target[:500] - - >>> # Fit and transform with a TSNE - >>> from sklearn.manifold import TSNE - >>> tsne = TSNE(n_components=2, learning_rate='auto', init='random', random_state=0) - >>> X_2d = tsne.fit_transform(X) - - >>> # Visualize the data - >>> plt.scatter(X_2d[:, 0], X_2d[:, 1], c=y) - - - -.. image:: auto_examples/images/sphx_glr_plot_tsne_001.png - :align: left - :target: auto_examples/plot_tsne.html - :scale: 70 - - -.. topic:: fit_transform - - As :class:`~sklearn.manifold.TSNE` cannot be applied to new data, we - need to use its `fit_transform` method. - -| - -:class:`sklearn.manifold.TSNE` separates quite well the different classes -of digits even though it had no access to the class information. - -.. raw:: html - -
- - -.. topic:: Exercise: Other dimension reduction of digits - :class: green - - :mod:`sklearn.manifold` has many other non-linear embeddings. Try - them out on the digits dataset. Could you judge their quality without - knowing the labels ``y``? :: - - >>> from sklearn.datasets import load_digits - >>> digits = load_digits() - >>> # ... - -Parameter selection, Validation, and Testing -============================================ - -Hyperparameters, Over-fitting, and Under-fitting ------------------------------------------------- - -.. seealso:: - - This section is adapted from `Andrew Ng's excellent - Coursera course `__ - -The issues associated with validation and cross-validation are some of -the most important aspects of the practice of machine learning. -Selecting the optimal model for your data is vital, and is a piece of -the problem that is not often appreciated by machine learning -practitioners. - -The central question is: **If our estimator is underperforming, how -should we move forward?** - -- Use simpler or more complicated model? -- Add more features to each observed data point? -- Add more training samples? - -The answer is often counter-intuitive. In particular, **Sometimes using -a more complicated model will give worse results.** Also, **Sometimes -adding training data will not improve your results.** The ability to -determine what steps will improve your model is what separates the -successful machine learning practitioners from the unsuccessful. - -Bias-variance trade-off: illustration on a simple regression problem -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. sidebar:: Code and notebook - - Python code and Jupyter notebook for this section are found - :ref:`here - ` - - -Let us start with a simple 1D regression problem. This -will help us to easily visualize the data and the model, and the results -generalize easily to higher-dimensional datasets. We'll explore a simple -**linear regression** problem, with :mod:`sklearn.linear_model`. - - -.. include:: auto_examples/plot_variance_linear_regr.rst - :start-after: We consider the situation where we have only 2 data point - :end-before: **Total running time of the script:** - - -As we can see, the estimator displays much less variance. However it -systematically under-estimates the coefficient. It displays a biased -behavior. - -This is a typical example of **bias/variance tradeof**: non-regularized -estimator are not biased, but they can display a lot of variance. -Highly-regularized models have little variance, but high bias. This bias -is not necessarily a bad thing: what matters is choosing the -tradeoff between bias and variance that leads to the best prediction -performance. For a specific dataset there is a sweet spot corresponding -to the highest complexity that the data can support, depending on the -amount of noise and of observations available. - -Visualizing the Bias/Variance Tradeoff --------------------------------------- - -.. tip:: - - Given a particular dataset and a model (e.g. a polynomial), we'd like to - understand whether bias (underfit) or variance limits prediction, and how - to tune the *hyperparameter* (here ``d``, the degree of the polynomial) - to give the best fit. - -On a given data, let us fit a simple polynomial regression model with -varying degrees: - -.. image:: auto_examples/images/sphx_glr_plot_bias_variance_001.png - :align: center - :target: auto_examples/plot_bias_variance.html - -.. tip:: - - In the above figure, we see fits for three different values of ``d``. - For ``d = 1``, the data is under-fit. This means that the model is too - simplistic: no straight line will ever be a good fit to this data. In - this case, we say that the model suffers from high bias. The model - itself is biased, and this will be reflected in the fact that the data - is poorly fit. At the other extreme, for ``d = 6`` the data is over-fit. - This means that the model has too many free parameters (6 in this case) - which can be adjusted to perfectly fit the training data. If we add a - new point to this plot, though, chances are it will be very far from the - curve representing the degree-6 fit. In this case, we say that the model - suffers from high variance. The reason for the term "high variance" is - that if any of the input points are varied slightly, it could result in - a very different model. - - In the middle, for ``d = 2``, we have found a good mid-point. It fits - the data fairly well, and does not suffer from the bias and variance - problems seen in the figures on either side. What we would like is a way - to quantitatively identify bias and variance, and optimize the - metaparameters (in this case, the polynomial degree d) in order to - determine the best algorithm. - -.. topic:: Polynomial regression with scikit-learn - - A polynomial regression is built by pipelining - :class:`~sklearn.preprocessing.PolynomialFeatures` - and a :class:`~sklearn.linear_model.LinearRegression`:: - - >>> from sklearn.pipeline import make_pipeline - >>> from sklearn.preprocessing import PolynomialFeatures - >>> from sklearn.linear_model import LinearRegression - >>> model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression()) - - -Validation Curves -~~~~~~~~~~~~~~~~~ - -Let us create a dataset like in the example above:: - - >>> def generating_func(x, rng, err=0.5): - ... return rng.normal(10 - 1. / (x + 0.1), err) - - >>> # randomly sample more data - >>> rng = np.random.default_rng(27446968) - >>> x = rng.random(size=200) - >>> y = generating_func(x, err=1., rng=rng) - -.. image:: auto_examples/images/sphx_glr_plot_bias_variance_002.png - :align: right - :target: auto_examples/plot_bias_variance.html - :scale: 60 - -Central to quantify bias and variance of a model is to apply it on *test -data*, sampled from the same distribution as the train, but that will -capture independent noise:: - - >>> xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.4) - - -.. raw:: html - -
- -**Validation curve** A validation curve consists in varying a model parameter -that controls its complexity (here the degree of the -polynomial) and measures both error of the model on training data, and on -test data (*eg* with cross-validation). The model parameter is then -adjusted so that the test error is minimized: - -We use :func:`sklearn.model_selection.validation_curve` to compute train -and test error, and plot it:: - - >>> from sklearn.model_selection import validation_curve - - >>> degrees = np.arange(1, 21) - - >>> model = make_pipeline(PolynomialFeatures(), LinearRegression()) - - >>> # Vary the "degrees" on the pipeline step "polynomialfeatures" - >>> train_scores, validation_scores = validation_curve( - ... model, x[:, np.newaxis], y, - ... param_name='polynomialfeatures__degree', - ... param_range=degrees) - - >>> # Plot the mean train score and validation score across folds - >>> plt.plot(degrees, validation_scores.mean(axis=1), label='cross-validation') - [] - >>> plt.plot(degrees, train_scores.mean(axis=1), label='training') - [] - >>> plt.legend(loc='best') - - -.. image:: auto_examples/images/sphx_glr_plot_bias_variance_003.png - :align: left - :target: auto_examples/plot_bias_variance.html - :scale: 60 - - -This figure shows why validation is important. On the left side of the -plot, we have very low-degree polynomial, which under-fit the data. This -leads to a low explained variance for both the training set and the -validation set. On the far right side of the plot, we have a very high -degree polynomial, which over-fits the data. This can be seen in the fact -that the training explained variance is very high, while on the -validation set, it is low. Choosing ``d`` around 4 or 5 gets us the best -tradeoff. - -.. tip:: - - The astute reader will realize that something is amiss here: in the - above plot, ``d = 4`` gives the best results. But in the previous plot, - we found that ``d = 6`` vastly over-fits the data. What’s going on here? - The difference is the **number of training points** used. In the - previous example, there were only eight training points. In this - example, we have 100. As a general rule of thumb, the more training - points used, the more complicated model can be used. But how can you - determine for a given model whether more training points will be - helpful? A useful diagnostic for this are learning curves. - -Learning Curves -~~~~~~~~~~~~~~~ - -A learning curve shows the training and validation score as a -function of the number of training points. Note that when we train on a -subset of the training data, the training score is computed using -this subset, not the full training set. This curve gives a -quantitative view into how beneficial it will be to add training -samples. - -.. topic:: **Questions:** - :class: green - - - As the number of training samples are increased, what do you expect - to see for the training score? For the validation score? - - Would you expect the training score to be higher or lower than the - validation score? Would you ever expect this to change? - - -:mod:`scikit-learn` provides -:func:`sklearn.model_selection.learning_curve`:: - - >>> from sklearn.model_selection import learning_curve - >>> train_sizes, train_scores, validation_scores = learning_curve( - ... model, x[:, np.newaxis], y, train_sizes=np.logspace(-1, 0, 20)) - - >>> # Plot the mean train score and validation score across folds - >>> plt.plot(train_sizes, validation_scores.mean(axis=1), label='cross-validation') - [] - >>> plt.plot(train_sizes, train_scores.mean(axis=1), label='training') - [] - - -.. figure:: auto_examples/images/sphx_glr_plot_bias_variance_004.png - :align: left - :target: auto_examples/plot_bias_variance.html - :scale: 60 - - For a ``degree=1`` model - -Note that the validation score *generally increases* with a growing -training set, while the training score *generally decreases* with a -growing training set. As the training size -increases, they will converge to a single value. - -From the above discussion, we know that ``d = 1`` is a high-bias -estimator which under-fits the data. This is indicated by the fact that -both the training and validation scores are low. When confronted -with this type of learning curve, we can expect that adding more -training data will not help: both lines converge to a -relatively low score. - -|clear-floats| - -**When the learning curves have converged to a low score, we have a -high bias model.** - -A high-bias model can be improved by: - -- Using a more sophisticated model (i.e. in this case, increase ``d``) -- Gather more features for each sample. -- Decrease regularization in a regularized model. - -Increasing the number of samples, however, does not improve a high-bias -model. - -Now let's look at a high-variance (i.e. over-fit) model: - -.. figure:: auto_examples/images/sphx_glr_plot_bias_variance_006.png - :align: left - :target: auto_examples/plot_bias_variance.html - :scale: 60 - - For a ``degree=15`` model - - -Here we show the learning curve for ``d = 15``. From the above -discussion, we know that ``d = 15`` is a **high-variance** estimator -which **over-fits** the data. This is indicated by the fact that the -training score is much higher than the validation score. As we add more -samples to this training set, the training score will continue to -decrease, while the cross-validation error will continue to increase, until they -meet in the middle. - -|clear-floats| - -**Learning curves that have not yet converged with the full training -set indicate a high-variance, over-fit model.** - -A high-variance model can be improved by: - -- Gathering more training samples. -- Using a less-sophisticated model (i.e. in this case, make ``d`` - smaller) -- Increasing regularization. - -In particular, gathering more features for each sample will not help the -results. - -Summary on model selection --------------------------- - -We’ve seen above that an under-performing algorithm can be due to two -possible situations: high bias (under-fitting) and high variance -(over-fitting). In order to evaluate our algorithm, we set aside a -portion of our training data for cross-validation. Using the technique -of learning curves, we can train on progressively larger subsets of the -data, evaluating the training error and cross-validation error to -determine whether our algorithm has high variance or high bias. But what -do we do with this information? - -High Bias -~~~~~~~~~ - -If a model shows high **bias**, the following actions might help: - -- **Add more features**. In our example of predicting home prices, it - may be helpful to make use of information such as the neighborhood - the house is in, the year the house was built, the size of the lot, - etc. Adding these features to the training and test sets can improve - a high-bias estimator -- **Use a more sophisticated model**. Adding complexity to the model - can help improve on bias. For a polynomial fit, this can be - accomplished by increasing the degree d. Each learning technique has - its own methods of adding complexity. -- **Use fewer samples**. Though this will not improve the - classification, a high-bias algorithm can attain nearly the same - error with a smaller training sample. For algorithms which are - computationally expensive, reducing the training sample size can lead - to very large improvements in speed. -- **Decrease regularization**. Regularization is a technique used to - impose simplicity in some machine learning models, by adding a - penalty term that depends on the characteristics of the parameters. - If a model has high bias, decreasing the effect of regularization can - lead to better results. - -High Variance -~~~~~~~~~~~~~ - -If a model shows **high variance**, the following actions might -help: - -- **Use fewer features**. Using a feature selection technique may be - useful, and decrease the over-fitting of the estimator. -- **Use a simpler model**. Model complexity and over-fitting go - hand-in-hand. -- **Use more training samples**. Adding training samples can reduce the - effect of over-fitting, and lead to improvements in a high variance - estimator. -- **Increase Regularization**. Regularization is designed to prevent - over-fitting. In a high-variance model, increasing regularization can - lead to better results. - -These choices become very important in real-world situations. For -example, due to limited telescope time, astronomers must seek a balance -between observing a large number of objects, and observing a large -number of features for each object. Determining which is more important -for a particular learning task can inform the observing strategy that -the astronomer employs. - -A last word of caution: separate validation and test set --------------------------------------------------------- - -Using validation schemes to determine hyper-parameters means that we are -fitting the hyper-parameters to the particular validation set. In the -same way that parameters can be over-fit to the training set, -hyperparameters can be over-fit to the validation set. Because of this, -the validation error tends to under-predict the classification error of -new data. - -For this reason, it is recommended to split the data into three sets: - -- The **training set**, used to train the model (usually ~60% of the - data) -- The **validation set**, used to validate the model (usually ~20% of - the data) -- The **test set**, used to evaluate the expected error of the - validated model (usually ~20% of the data) - -Many machine learning practitioners do not separate test set and -validation set. But if your goal is to gauge the error of a model on -unknown data, using an independent test set is vital. - -| - -.. include:: auto_examples/index.rst - :start-line: 1 - -.. seealso:: **Going further** - - * The `documentation of scikit-learn `__ is - very complete and didactic. - - * `Introduction to Machine Learning with Python - `_, - by Sarah Guido, Andreas Müller - (`notebooks available here `_). diff --git a/packages/scikit-learn/index_examples.md b/packages/scikit-learn/index_examples.md new file mode 100644 index 000000000..c62211894 --- /dev/null +++ b/packages/scikit-learn/index_examples.md @@ -0,0 +1,1578 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +# Examples for packages/scikit-learn/index.md + ++++ + +(simple-picture-of-the-formal-problem-of-machine-learning)= + +## Simple picture of the formal problem of machine learning + + + ++++ + +This example generates simple synthetic data points and shows a +separating hyperplane on them. + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +from sklearn.linear_model import SGDClassifier +from sklearn.datasets import make_blobs +``` + +```{code-cell} +# we create 50 separable synthetic points +X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) +``` + +```{code-cell} +# fit the model +clf = SGDClassifier(loss="hinge", alpha=0.01, fit_intercept=True) +clf.fit(X, Y) +``` + +```{code-cell} +# plot the line, the points, and the nearest vectors to the plane +xx = np.linspace(-1, 5, 10) +yy = np.linspace(-1, 5, 10) +``` + +```{code-cell} +X1, X2 = np.meshgrid(xx, yy) +Z = np.empty(X1.shape) +for (i, j), val in np.ndenumerate(X1): + x1 = val + x2 = X2[i, j] + p = clf.decision_function([[x1, x2]]) + Z[i, j] = p[0] +``` + +```{code-cell} +plt.figure(figsize=(4, 3)) +ax = plt.axes() +ax.contour( + X1, X2, Z, [-1.0, 0.0, 1.0], colors="k", linestyles=["dashed", "solid", "dashed"] +) +ax.scatter(X[:, 0], X[:, 1], c=Y, cmap="Paired") +ax.axis("tight") +``` + +(linear-regression)= + +## linear_regression + + + ++++ + +**A simple linear regression** + +```{code-cell} +from sklearn.linear_model import LinearRegression +``` + +```{code-cell} +# x from 0 to 30 +rng = np.random.default_rng() +x = 30 * rng.random((20, 1)) +``` + +```{code-cell} +# y = a*x + b with noise +y = 0.5 * x + 1.0 + rng.normal(size=x.shape) +``` + +```{code-cell} +# create a linear regression model +model = LinearRegression() +model.fit(x, y) +``` + +```{code-cell} +# predict y from the data +x_new = np.linspace(0, 30, 100) +y_new = model.predict(x_new[:, np.newaxis]) +``` + +```{code-cell} +# plot the results +plt.figure(figsize=(4, 3)) +ax = plt.axes() +ax.scatter(x, y) +ax.plot(x_new, y_new) +ax.set_xlabel("x") +ax.set_ylabel("y") +ax.axis("tight") +``` + +(plot-2d-views-of-the-iris-dataset)= + +## Plot 2D views of the iris dataset + + + ++++ + +Plot a simple scatter plot of 2 features of the iris dataset. + +Note that more elaborate visualization of this dataset is detailed +in the {ref}`statistics` chapter. + +```{code-cell} +# Load the data +from sklearn.datasets import load_iris +``` + +```{code-cell} +iris = load_iris() +``` + +```{code-cell} +from matplotlib import ticker +``` + +```{code-cell} +# The indices of the features that we are plotting +x_index = 0 +y_index = 1 +``` + +```{code-cell} +# this formatter will label the colorbar with the correct target names +formatter = ticker.FuncFormatter(lambda i, *args: iris.target_names[int(i)]) +``` + +```{code-cell} +plt.figure(figsize=(5, 4)) +plt.scatter(iris.data[:, x_index], iris.data[:, y_index], c=iris.target) +plt.colorbar(ticks=[0, 1, 2], format=formatter) +plt.xlabel(iris.feature_names[x_index]) +plt.ylabel(iris.feature_names[y_index]) +plt.tight_layout() +``` + +(nearest-neighbor-prediction-on-iris)= + +## Nearest-neighbor prediction on iris + + + ++++ + +Plot the decision boundary of nearest neighbor decision on iris, first +with a single nearest neighbor, and then using 3 nearest neighbors. + +```{code-cell} +from sklearn import neighbors, datasets +from matplotlib.colors import ListedColormap +``` + +```{code-cell} +# Create color maps for 3-class classification problem, as with iris +cmap_light = ListedColormap(["#FFAAAA", "#AAFFAA", "#AAAAFF"]) +cmap_bold = ListedColormap(["#FF0000", "#00FF00", "#0000FF"]) +``` + +```{code-cell} +iris = datasets.load_iris() +X = iris.data[:, :2] # we only take the first two features. We could +# avoid this ugly slicing by using a two-dim dataset +y = iris.target +``` + +```{code-cell} +knn = neighbors.KNeighborsClassifier(n_neighbors=1) +knn.fit(X, y) +``` + +```{code-cell} +x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1 +y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1 +xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100)) +Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) +``` + +```{code-cell} +# Put the result into a color plot +Z = Z.reshape(xx.shape) +plt.figure() +plt.pcolormesh(xx, yy, Z, cmap=cmap_light) +``` + +```{code-cell} +# Plot also the training points +plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) +plt.xlabel("sepal length (cm)") +plt.ylabel("sepal width (cm)") +plt.axis("tight") +``` + +```{code-cell} +# And now, redo the analysis with 3 neighbors +knn = neighbors.KNeighborsClassifier(n_neighbors=3) +knn.fit(X, y) +``` + +```{code-cell} +Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) +``` + +```{code-cell} +# Put the result into a color plot +Z = Z.reshape(xx.shape) +plt.figure() +plt.pcolormesh(xx, yy, Z, cmap=cmap_light) +``` + +```{code-cell} +# Plot also the training points +plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) +plt.xlabel("sepal length (cm)") +plt.ylabel("sepal width (cm)") +plt.axis("tight") +``` + +(plot-fitting-a-9th-order-polynomial)= + +## Plot fitting a 9th order polynomial + + + ++++ + +Fits data generated from a 9th order polynomial with model of 4th order +and 9th order polynomials, to demonstrate that often simpler models are +to be preferred + +```{code-cell} +from matplotlib.colors import ListedColormap +``` + +```{code-cell} +from sklearn import linear_model +``` + +```{code-cell} +# Create color maps for 3-class classification problem, as with iris +cmap_light = ListedColormap(["#FFAAAA", "#AAFFAA", "#AAAAFF"]) +cmap_bold = ListedColormap(["#FF0000", "#00FF00", "#0000FF"]) +``` + +```{code-cell} +rng = np.random.default_rng(27446968) +x = 2 * rng.random(100) - 1 +``` + +```{code-cell} +f = lambda t: 1.2 * t**2 + 0.1 * t**3 - 0.4 * t**5 - 0.5 * t**9 +y = f(x) + 0.4 * rng.normal(size=100) +``` + +```{code-cell} +x_test = np.linspace(-1, 1, 100) +``` + +```{code-cell} +# The data +plt.figure(figsize=(6, 4)) +plt.scatter(x, y, s=4) +``` + +```{code-cell} +# Fitting 4th and 9th order polynomials +# +# For this we need to engineer features: the n_th powers of x: +plt.figure(figsize=(6, 4)) +plt.scatter(x, y, s=4) + +X = np.array([x**i for i in range(5)]).T +X_test = np.array([x_test**i for i in range(5)]).T +regr = linear_model.LinearRegression() +regr.fit(X, y) +plt.plot(x_test, regr.predict(X_test), label="4th order") + +X = np.array([x**i for i in range(10)]).T +X_test = np.array([x_test**i for i in range(10)]).T +regr = linear_model.LinearRegression() +regr.fit(X, y) +plt.plot(x_test, regr.predict(X_test), label="9th order") + +plt.legend(loc="best") +plt.axis("tight") +plt.title("Fitting a 4th and a 9th order polynomial") +``` + +```{code-cell} +# Ground truth +plt.figure(figsize=(6, 4)) +plt.scatter(x, y, s=4) +plt.plot(x_test, f(x_test), label="truth") +plt.axis("tight") +plt.title("Ground truth (9th order polynomial)") +``` + +(simple-visualization-and-classification-of-the-digits-dataset)= + +## Simple visualization and classification of the digits dataset + + + ++++ + +Plot the first few samples of the digits dataset and a 2D representation +built using PCA, then do a simple classification + +```{code-cell} +from sklearn.datasets import load_digits +``` + +```{code-cell} +digits = load_digits() +``` + +```{code-cell} +# Plot the data: images of digits +# ------------------------------- +# +# Each data in a 8x8 image +``` + +```{code-cell} +fig = plt.figure(figsize=(6, 6)) # figure size in inches +fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) +for i in range(64): + ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) + ax.imshow(digits.images[i], cmap="binary", interpolation="nearest") + # label the image with the target value + ax.text(0, 7, str(digits.target[i])) +``` + +Plot a projection on the 2 first principal axis + +```{code-cell} +from sklearn.decomposition import PCA + +plt.figure() +pca = PCA(n_components=2) +proj = pca.fit_transform(digits.data) +plt.scatter(proj[:, 0], proj[:, 1], c=digits.target, cmap="Paired") +plt.colorbar() +``` + +Classify with Gaussian naive Bayes + +```{code-cell} +from sklearn.naive_bayes import GaussianNB +from sklearn.model_selection import train_test_split +``` + +```{code-cell} +# split the data into training and validation sets +X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target) +``` + +```{code-cell} +# train the model +clf = GaussianNB() +clf.fit(X_train, y_train) +``` + +```{code-cell} +# use the model to predict the labels of the test data +predicted = clf.predict(X_test) +expected = y_test +``` + +```{code-cell} +# Plot the prediction +fig = plt.figure(figsize=(6, 6)) # figure size in inches +fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) +``` + +```{code-cell} +# plot the digits: each image is 8x8 pixels +for i in range(64): + ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) + ax.imshow(X_test.reshape(-1, 8, 8)[i], cmap="binary", interpolation="nearest") + + # label the image with the target value + if predicted[i] == expected[i]: + ax.text(0, 7, str(predicted[i]), color="green") + else: + ax.text(0, 7, str(predicted[i]), color="red") +``` + +```{code-cell} +# Quantify the performance +# ------------------------ +# +# First print the number of correct matches +matches = predicted == expected +print(matches.sum()) + +# The total number of data points +print(len(matches)) + +# And now, the ratio of correct predictions +matches.sum() / float(len(matches)) +``` + +```{code-cell} +# Print the classification report +from sklearn import metrics +``` + +```{code-cell} +print(metrics.classification_report(expected, predicted)) +``` + +```{code-cell} +# Print the confusion matrix +print(metrics.confusion_matrix(expected, predicted)) +``` + +(a-simple-regression-analysis-on-the-california-housing-data)= + +## A simple regression analysis on the California housing data + + + ++++ + +Here we perform a simple regression analysis on the California housing +data, exploring two types of regressors. + +```{code-cell} +from sklearn.datasets import fetch_california_housing +``` + +```{code-cell} +data = fetch_california_housing(as_frame=True) +``` + +```{code-cell} +# Print a histogram of the quantity to predict: price +plt.figure(figsize=(4, 3)) +plt.hist(data.target) +plt.xlabel("price ($100k)") +plt.ylabel("count") +plt.tight_layout() +``` + +Print the joint histogram for each feature + +```{code-cell} +for index, feature_name in enumerate(data.feature_names): + plt.figure(figsize=(4, 3)) + plt.scatter(data.data[feature_name], data.target) + plt.ylabel("Price", size=15) + plt.xlabel(feature_name, size=15) + plt.tight_layout() +``` + +### Simple prediction + +```{code-cell} +from sklearn.model_selection import train_test_split +``` + +```{code-cell} +X_train, X_test, y_train, y_test = train_test_split(data.data, data.target) +``` + +```{code-cell} +from sklearn.linear_model import LinearRegression +``` + +```{code-cell} +clf = LinearRegression() +clf.fit(X_train, y_train) +predicted = clf.predict(X_test) +expected = y_test +``` + +```{code-cell} +plt.figure(figsize=(4, 3)) +plt.scatter(expected, predicted) +plt.plot([0, 8], [0, 8], "--k") +plt.axis("tight") +plt.xlabel("True price ($100k)") +plt.ylabel("Predicted price ($100k)") +plt.tight_layout() +``` + +Prediction with gradient boosted tree + +```{code-cell} +from sklearn.ensemble import GradientBoostingRegressor +``` + +```{code-cell} +clf = GradientBoostingRegressor() +clf.fit(X_train, y_train) +``` + +```{code-cell} +predicted = clf.predict(X_test) +expected = y_test +``` + +```{code-cell} +plt.figure(figsize=(4, 3)) +plt.scatter(expected, predicted) +plt.plot([0, 5], [0, 5], "--k") +plt.axis("tight") +plt.xlabel("True price ($100k)") +plt.ylabel("Predicted price ($100k)") +plt.tight_layout() +``` + +```{code-cell} +# Print the error rate +print(f"RMS: {np.sqrt(np.mean((predicted - expected) ** 2))!r} ") +``` + +(measuring-decision-tree-performance)= + +## Measuring Decision Tree performance + + + ++++ + +Demonstrates overfit when testing on train set. + +Get the data + +```{code-cell} +from sklearn.datasets import fetch_california_housing +``` + +```{code-cell} +data = fetch_california_housing(as_frame=True) +``` + +```{code-cell} +# Train and test a model +from sklearn.tree import DecisionTreeRegressor +``` + +```{code-cell} +clf = DecisionTreeRegressor().fit(data.data, data.target) +``` + +```{code-cell} +predicted = clf.predict(data.data) +expected = data.target +``` + +Plot predicted as a function of expected + +```{code-cell} +plt.figure(figsize=(4, 3)) +plt.scatter(expected, predicted) +plt.plot([0, 5], [0, 5], "--k") +plt.axis("tight") +plt.xlabel("True price ($100k)") +plt.ylabel("Predicted price ($100k)") +plt.tight_layout() +``` + +Pretty much no errors! + +This is too good to be true: we are testing the model on the train +data, which is not a measure of generalization. + +**The results are not valid** + ++++ + +(linear-model-cv)= + +## linear_model_cv + + + ++++ + +Use the RidgeCV and LassoCV to set the regularization parameter + +```{code-cell} +# Load the diabetes dataset +from sklearn.datasets import load_diabetes +``` + +```{code-cell} +data = load_diabetes() +X, y = data.data, data.target +print(X.shape) +``` + +```{code-cell} +# Compute the cross-validation score with the default hyper-parameters +from sklearn.model_selection import cross_val_score +from sklearn.linear_model import Ridge, Lasso +``` + +```{code-cell} +for Model in [Ridge, Lasso]: + model = Model() + print(f"{Model.__name__}: {cross_val_score(model, X, y).mean()}") +``` + +```{code-cell} +# We compute the cross-validation score as a function of alpha, the +# strength of the regularization for Lasso and Ridge +``` + +```{code-cell} +alphas = np.logspace(-3, -1, 30) +``` + +```{code-cell} +plt.figure(figsize=(5, 3)) +``` + +```{code-cell} +for Model in [Lasso, Ridge]: + scores = [cross_val_score(Model(alpha), X, y, cv=3).mean() for alpha in alphas] + plt.plot(alphas, scores, label=Model.__name__) +plt.legend(loc="lower left") +plt.xlabel("alpha") +plt.ylabel("cross validation score") +plt.tight_layout() +``` + +(pca)= + +## pca + + + ++++ + +Demo PCA in 2D + +```{code-cell} +# Load the iris data +from sklearn import datasets + +iris = datasets.load_iris() +X = iris.data +y = iris.target +``` + +```{code-cell} +# Fit a PCA +from sklearn.decomposition import PCA +``` + +```{code-cell} +pca = PCA(n_components=2, whiten=True) +pca.fit(X) +``` + +```{code-cell} +# Project the data in 2D +X_pca = pca.transform(X) +``` + +```{code-cell} +# Visualize the data +target_ids = range(len(iris.target_names)) +``` + +```{code-cell} +:tags: [hide-input] + +plt.figure(figsize=(6, 5)) +for i, c, label in zip(target_ids, "rgbcmykw", iris.target_names, strict=False): + plt.scatter(X_pca[y == i, 0], X_pca[y == i, 1], c=c, label=label) +plt.legend() +``` + +(tsne)= + +## tSNE to visualize digits + + + +Here we use {class}`sklearn.manifold.TSNE` to visualize the digits +datasets. Indeed, the digits are vectors in a 8\*8 = 64 dimensional space. +We want to project them in 2D for visualization. tSNE is often a good +solution, as it groups and separates data points based on their local +relationship. + +```{code-cell} +# Load the iris data +from sklearn import datasets +``` + +```{code-cell} +digits = datasets.load_digits() +# Take the first 500 data points: it's hard to see 1500 points +X = digits.data[:500] +y = digits.target[:500] +``` + +```{code-cell} +# Fit and transform with a TSNE +from sklearn.manifold import TSNE +``` + +```{code-cell} +tsne = TSNE(n_components=2, random_state=0) +``` + +```{code-cell} +# Project the data in 2D +X_2d = tsne.fit_transform(X) +``` + +```{code-cell} +# Visualize the data +target_ids = range(len(digits.target_names)) +plt.figure(figsize=(6, 5)) +colors = "r", "g", "b", "c", "m", "y", "k", "w", "orange", "purple" +for i, c, label in zip(target_ids, colors, digits.target_names, strict=True): + plt.scatter(X_2d[y == i, 0], X_2d[y == i, 1], c=c, label=label) +plt.legend() +``` + +(bias-variance)= + +## Bias and variance of polynomial fit + + + +Demo overfitting, underfitting, and validation and learning curves with +polynomial regression. + +Fit polynomes of different degrees to a dataset: for too small a degree, +the model _underfits_, while for too large a degree, it overfits. + +```{code-cell} +def generating_func(x, rng=None, error=0.5): + rng = np.random.default_rng(rng) + return rng.normal(10 - 1.0 / (x + 0.1), error) +``` + +```{code-cell} +# A polynomial regression +from sklearn.pipeline import make_pipeline +from sklearn.linear_model import LinearRegression +from sklearn.preprocessing import PolynomialFeatures +``` + +A simple figure to illustrate the problem + +```{code-cell} +n_samples = 8 +``` + +```{code-cell} +rng = np.random.default_rng(27446968) +x = 10 ** np.linspace(-2, 0, n_samples) +y = generating_func(x, rng=rng) +``` + +```{code-cell} +x_test = np.linspace(-0.2, 1.2, 1000) +``` + +```{code-cell} +titles = ["d = 1 (under-fit; high bias)", "d = 2", "d = 6 (over-fit; high variance)"] +degrees = [1, 2, 6] +``` + +```{code-cell} +fig = plt.figure(figsize=(9, 3.5)) +fig.subplots_adjust(left=0.06, right=0.98, bottom=0.15, top=0.85, wspace=0.05) +``` + +```{code-cell} +for i, d in enumerate(degrees): + ax = fig.add_subplot(131 + i, xticks=[], yticks=[]) + ax.scatter(x, y, marker="x", c="k", s=50) + + model = make_pipeline(PolynomialFeatures(d), LinearRegression()) + model.fit(x[:, np.newaxis], y) + ax.plot(x_test, model.predict(x_test[:, np.newaxis]), "-b") + + ax.set_xlim(-0.2, 1.2) + ax.set_ylim(0, 12) + ax.set_xlabel("house size") + if i == 0: + ax.set_ylabel("price") + + ax.set_title(titles[i]) +``` + +```{code-cell} +# Generate a larger dataset +from sklearn.model_selection import train_test_split +``` + +```{code-cell} +n_samples = 200 +test_size = 0.4 +error = 1.0 +``` + +```{code-cell} +# randomly sample the data +x = rng.random(n_samples) +y = generating_func(x, rng=rng, error=error) +``` + +```{code-cell} +# split into training, validation, and testing sets. +x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size) +``` + +```{code-cell} +# show the training and validation sets +plt.figure(figsize=(6, 4)) +plt.scatter(x_train, y_train, color="red", label="Training set") +plt.scatter(x_test, y_test, color="blue", label="Test set") +plt.title("The data") +plt.legend(loc="best") +``` + +```{code-cell} +# Plot a validation curve +from sklearn.model_selection import validation_curve +``` + +```{code-cell} +degrees = list(range(1, 21)) +``` + +```{code-cell} +model = make_pipeline(PolynomialFeatures(), LinearRegression()) +``` + +```{code-cell} +# The parameter to vary is the "degrees" on the pipeline step +# "polynomialfeatures" +train_scores, validation_scores = validation_curve( + model, + x[:, np.newaxis], + y, + param_name="polynomialfeatures__degree", + param_range=degrees, +) +``` + +```{code-cell} +# Plot the mean train error and validation error across folds +plt.figure(figsize=(6, 4)) +plt.plot(degrees, validation_scores.mean(axis=1), lw=2, label="cross-validation") +plt.plot(degrees, train_scores.mean(axis=1), lw=2, label="training") +plt.legend(loc="best") +plt.xlabel("degree of fit") +plt.ylabel("explained variance") +plt.title("Validation curve") +plt.tight_layout() +``` + +## Learning curves + +Plot train and test error with an increasing number of samples + +```{code-cell} +# A learning curve for d=1, 5, 15 +for d in [1, 5, 15]: + model = make_pipeline(PolynomialFeatures(degree=d), LinearRegression()) + + from sklearn.model_selection import learning_curve + + train_sizes, train_scores, validation_scores = learning_curve( + model, x[:, np.newaxis], y, train_sizes=np.logspace(-1, 0, 20) + ) + + # Plot the mean train error and validation error across folds + plt.figure(figsize=(6, 4)) + plt.plot( + train_sizes, validation_scores.mean(axis=1), lw=2, label="cross-validation" + ) + plt.plot(train_sizes, train_scores.mean(axis=1), lw=2, label="training") + plt.ylim(ymin=-0.1, ymax=1) + + plt.legend(loc="best") + plt.xlabel("number of train samples") + plt.ylabel("explained variance") + plt.title(f"Learning curve (degree={d})") + plt.tight_layout() +``` + +## Other examples + ++++ + +(tutorial-diagrams)= + +### Tutorial Diagrams + + + ++++ + +This script plots the flow-charts used in the scikit-learn tutorials. + +```{code-cell} +from matplotlib.patches import Circle, Rectangle, Polygon, Arrow, FancyArrow +``` + +```{code-cell} +def create_base(box_bg="#CCCCCC", arrow1="#88CCFF", arrow2="#88FF88", supervised=True): + fig = plt.figure(figsize=(9, 6), facecolor="w") + ax = plt.axes((0, 0, 1, 1), xticks=[], yticks=[], frameon=False) + ax.set_xlim(0, 9) + ax.set_ylim(0, 6) + + patches = [ + Rectangle((0.3, 3.6), 1.5, 1.8, zorder=1, fc=box_bg), + Rectangle((0.5, 3.8), 1.5, 1.8, zorder=2, fc=box_bg), + Rectangle((0.7, 4.0), 1.5, 1.8, zorder=3, fc=box_bg), + Rectangle((2.9, 3.6), 0.2, 1.8, fc=box_bg), + Rectangle((3.1, 3.8), 0.2, 1.8, fc=box_bg), + Rectangle((3.3, 4.0), 0.2, 1.8, fc=box_bg), + Rectangle((0.3, 0.2), 1.5, 1.8, fc=box_bg), + Rectangle((2.9, 0.2), 0.2, 1.8, fc=box_bg), + Circle((5.5, 3.5), 1.0, fc=box_bg), + Polygon([[5.5, 1.7], [6.1, 1.1], [5.5, 0.5], [4.9, 1.1]], fc=box_bg), + FancyArrow( + 2.3, 4.6, 0.35, 0, fc=arrow1, width=0.25, head_width=0.5, head_length=0.2 + ), + FancyArrow( + 3.75, 4.2, 0.5, -0.2, fc=arrow1, width=0.25, head_width=0.5, head_length=0.2 + ), + FancyArrow( + 5.5, 2.4, 0, -0.4, fc=arrow1, width=0.25, head_width=0.5, head_length=0.2 + ), + FancyArrow( + 2.0, 1.1, 0.5, 0, fc=arrow2, width=0.25, head_width=0.5, head_length=0.2 + ), + FancyArrow( + 3.3, 1.1, 1.3, 0, fc=arrow2, width=0.25, head_width=0.5, head_length=0.2 + ), + FancyArrow( + 6.2, 1.1, 0.8, 0, fc=arrow2, width=0.25, head_width=0.5, head_length=0.2 + ), + ] + + if supervised: + patches += [ + Rectangle((0.3, 2.4), 1.5, 0.5, zorder=1, fc=box_bg), + Rectangle((0.5, 2.6), 1.5, 0.5, zorder=2, fc=box_bg), + Rectangle((0.7, 2.8), 1.5, 0.5, zorder=3, fc=box_bg), + FancyArrow( + 2.3, 2.9, 2.0, 0, fc=arrow1, width=0.25, head_width=0.5, head_length=0.2 + ), + Rectangle((7.3, 0.85), 1.5, 0.5, fc=box_bg), + ] + else: + patches += [Rectangle((7.3, 0.2), 1.5, 1.8, fc=box_bg)] + + for p in patches: + ax.add_patch(p) + + plt.text( + 1.45, + 4.9, + "Training\nText,\nDocuments,\nImages,\netc.", + ha="center", + va="center", + fontsize=14, + ) + + plt.text(3.6, 4.9, "Feature\nVectors", ha="left", va="center", fontsize=14) + + plt.text( + 5.5, 3.5, "Machine\nLearning\nAlgorithm", ha="center", va="center", fontsize=14 + ) + + plt.text( + 1.05, + 1.1, + "New Text,\nDocument,\nImage,\netc.", + ha="center", + va="center", + fontsize=14, + ) + + plt.text(3.3, 1.7, "Feature\nVector", ha="left", va="center", fontsize=14) + + plt.text(5.5, 1.1, "Predictive\nModel", ha="center", va="center", fontsize=12) + + if supervised: + plt.text(1.45, 3.05, "Labels", ha="center", va="center", fontsize=14) + + plt.text(8.05, 1.1, "Expected\nLabel", ha="center", va="center", fontsize=14) + plt.text( + 8.8, 5.8, "Supervised Learning Model", ha="right", va="top", fontsize=18 + ) + + else: + plt.text( + 8.05, + 1.1, + "Likelihood\nor Cluster ID\nor Better\nRepresentation", + ha="center", + va="center", + fontsize=12, + ) + plt.text( + 8.8, 5.8, "Unsupervised Learning Model", ha="right", va="top", fontsize=18 + ) +``` + +```{code-cell} +def plot_supervised_chart(annotate=False): + create_base(supervised=True) + if annotate: + fontdict = {"color": "r", "weight": "bold", "size": 14} + plt.text( + 1.9, + 4.55, + "X = vec.fit_transform(input)", + fontdict=fontdict, + rotation=20, + ha="left", + va="bottom", + ) + plt.text( + 3.7, + 3.2, + "clf.fit(X, y)", + fontdict=fontdict, + rotation=20, + ha="left", + va="bottom", + ) + plt.text( + 1.7, + 1.5, + "X_new = vec.transform(input)", + fontdict=fontdict, + rotation=20, + ha="left", + va="bottom", + ) + plt.text( + 6.1, + 1.5, + "y_new = clf.predict(X_new)", + fontdict=fontdict, + rotation=20, + ha="left", + va="bottom", + ) +``` + +```{code-cell} +def plot_unsupervised_chart(): + create_base(supervised=False) +``` + +```{code-cell} +:tags: [hide-input] + +if __name__ == "__main__": + plot_supervised_chart(False) + plot_supervised_chart(True) + plot_unsupervised_chart() +``` + +(compare-classifiers-on-the-digits-data)= + +### Compare classifiers on the digits data + + + ++++ + +Compare the performance of a variety of classifiers on a test set for the +digits data. + +```{code-cell} +from sklearn import model_selection, datasets, metrics +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB +from sklearn.neighbors import KNeighborsClassifier +``` + +```{code-cell} +digits = datasets.load_digits() +X = digits.data +y = digits.target +X_train, X_test, y_train, y_test = model_selection.train_test_split( + X, y, test_size=0.25, random_state=0 +) +``` + +```{code-cell} +for Model in [LinearSVC, GaussianNB, KNeighborsClassifier]: + clf = Model().fit(X_train, y_train) + y_pred = clf.predict(X_test) + print(f"{Model.__name__}: {metrics.f1_score(y_test, y_pred, average='macro')}") +``` + +```{code-cell} +print("------------------") +``` + +```{code-cell} +# test SVC loss +for loss in ["hinge", "squared_hinge"]: + clf = LinearSVC(loss=loss).fit(X_train, y_train) + y_pred = clf.predict(X_test) + print( + f"LinearSVC(loss='{loss}'): {metrics.f1_score(y_test, y_pred, average='macro')}" + ) +``` + +```{code-cell} +print("-------------------") +``` + +```{code-cell} +# test the number of neighbors +for n_neighbors in range(1, 11): + clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X_train, y_train) + y_pred = clf.predict(X_test) + print( + f"KNeighbors(n_neighbors={n_neighbors}): {metrics.f1_score(y_test, y_pred, average='macro')}" + ) +``` + +(the-eigenfaces-example-chaining-pca-and-svms)= + +### The eigenfaces example: chaining PCA and SVMs + + + ++++ + +The goal of this example is to show how an unsupervised method and a +supervised one can be chained for better prediction. It starts with a +didactic but lengthy way of doing things, and finishes with the +idiomatic approach to pipelining in scikit-learn. + +Here we'll take a look at a simple facial recognition example. Ideally, +we would use a dataset consisting of a subset of the [Labeled Faces in +the Wild](http://vis-www.cs.umass.edu/lfw) data that is available with +{func}`sklearn.datasets.fetch_lfw_people`. However, this is a relatively large +download (~200MB) so we will do the tutorial on a simpler, less rich dataset. +Feel free to explore the LFW dataset. + +```{code-cell} +from sklearn import datasets +``` + +```{code-cell} +faces = datasets.fetch_olivetti_faces() +faces.data.shape +``` + +Let's visualize these faces to see what we're working with + +```{code-cell} +fig = plt.figure(figsize=(8, 6)) +# plot several images +for i in range(15): + ax = fig.add_subplot(3, 5, i + 1, xticks=[], yticks=[]) + ax.imshow(faces.images[i], cmap="bone") +``` + +::: {note} + +Note that these faces have already been localized and scaled to a common size. +This is an important preprocessing piece for facial recognition, and is +a process that can require a large collection of training data. This can be +done in scikit-learn, but the challenge is gathering a sufficient amount of +training data for the algorithm to work. Fortunately, this piece is common +enough that it has been done. One good resource is [OpenCV](https://docs.opencv.org/2.4/modules/contrib/doc/facerec/facerec_tutorial.html) +— the _Open Computer Vision Library_. + +::: + +We'll perform a Support Vector classification of the images. We'll do a +typical train-test split on the images: + +```{code-cell} +from sklearn.model_selection import train_test_split +``` + +```{code-cell} +X_train, X_test, y_train, y_test = train_test_split( + faces.data, faces.target, random_state=0 +) +``` + +```{code-cell} +print(X_train.shape, X_test.shape) +``` + +### Preprocessing: Principal Component Analysis + +1850 dimensions is a lot for SVM. We can use PCA to reduce these 1850 +features to a manageable size, while maintaining most of the information +in the dataset. + +```{code-cell} +from sklearn import decomposition +``` + +```{code-cell} +pca = decomposition.PCA(n_components=150, whiten=True) +pca.fit(X_train) +``` + +One interesting part of PCA is that it computes the "mean" face, which +can be interesting to examine: + +```{code-cell} +plt.imshow(pca.mean_.reshape(faces.images[0].shape), cmap="bone") +``` + +The principal components measure deviations about this mean along +orthogonal axes. + +```{code-cell} +print(pca.components_.shape) +``` + +It is also interesting to visualize these principal components: + +```{code-cell} +fig = plt.figure(figsize=(16, 6)) +for i in range(30): + ax = fig.add_subplot(3, 10, i + 1, xticks=[], yticks=[]) + ax.imshow(pca.components_[i].reshape(faces.images[0].shape), cmap="bone") +``` + +The components ("eigenfaces") are ordered by their importance from +top-left to bottom-right. We see that the first few components seem to +primarily take care of lighting conditions; the remaining components +pull out certain identifying features: the nose, eyes, eyebrows, etc. + +With this projection computed, we can now project our original training +and test data onto the PCA basis: + +```{code-cell} +X_train_pca = pca.transform(X_train) +X_test_pca = pca.transform(X_test) +print(X_train_pca.shape) +print(X_test_pca.shape) +``` + +These projected components correspond to factors in a linear combination +of component images such that the combination approaches the original +face. + +### Doing the Learning: Support Vector Machines + +Now we'll perform support-vector-machine classification on this reduced +dataset: + +```{code-cell} +from sklearn import svm +``` + +```{code-cell} +clf = svm.SVC(C=5.0, gamma=0.001) +clf.fit(X_train_pca, y_train) +``` + +Finally, we can evaluate how well this classification did. First, we +might plot a few of the test-cases with the labels learned from the +training set: + +```{code-cell} +fig = plt.figure(figsize=(8, 6)) +for i in range(15): + ax = fig.add_subplot(3, 5, i + 1, xticks=[], yticks=[]) + ax.imshow(X_test[i].reshape(faces.images[0].shape), cmap="bone") + y_pred = clf.predict(X_test_pca[i, np.newaxis])[0] + color = "black" if y_pred == y_test[i] else "red" + ax.set_title(y_pred, fontsize="small", color=color) +``` + +The classifier is correct on an impressive number of images given the +simplicity of its learning model! Using a linear classifier on 150 +features derived from the pixel-level data, the algorithm correctly +identifies a large number of the people in the images. + +Again, we can quantify this effectiveness using one of several measures +from {mod}`sklearn.metrics`. First we can do the classification +report, which shows the precision, recall and other measures of the +"goodness" of the classification: + +```{code-cell} +from sklearn import metrics +``` + +```{code-cell} +y_pred = clf.predict(X_test_pca) +print(metrics.classification_report(y_test, y_pred)) +``` + +Another interesting metric is the _confusion matrix_, which indicates +how often any two items are mixed-up. The confusion matrix of a perfect +classifier would only have nonzero entries on the diagonal, with zeros +on the off-diagonal: + +```{code-cell} +print(metrics.confusion_matrix(y_test, y_pred)) +``` + +### Pipelining + +Above we used PCA as a pre-processing step before applying our support +vector machine classifier. Plugging the output of one estimator directly +into the input of a second estimator is a commonly used pattern; for +this reason scikit-learn provides a `Pipeline` object which automates +this process. The above problem can be re-expressed as a pipeline as +follows: + +```{code-cell} +from sklearn.pipeline import Pipeline +``` + +```{code-cell} +clf = Pipeline( + [ + ("pca", decomposition.PCA(n_components=150, whiten=True)), + ("svm", svm.LinearSVC(C=1.0)), + ] +) +``` + +```{code-cell} +clf.fit(X_train, y_train) +``` + +```{code-cell} +:tags: [hide-input] + +y_pred = clf.predict(X_test) +print(metrics.confusion_matrix(y_pred, y_test)) +``` + +### A Note on Facial Recognition + +Here we have used PCA "eigenfaces" as a pre-processing step for facial +recognition. The reason we chose this is because PCA is a +broadly-applicable technique, which can be useful for a wide array of +data types. Research in the field of facial recognition in particular, +however, has shown that other more specific feature extraction methods +are can be much more effective. + ++++ + +(example-of-linear-and-non-linear-models)= + +### Example of linear and non-linear models + + + ++++ + +This is an example plot from the tutorial which accompanies an explanation +of the support vector machine GUI. + +```{code-cell} +from sklearn import svm +``` + +```{code-cell} +rng = np.random.default_rng(27446968) +``` + +Data that is linearly separable + +```{code-cell} +def linear_model(rseed=42, n_samples=30): + "Generate data according to a linear model" + np.random.seed(rseed) + + data = np.random.normal(0, 10, (n_samples, 2)) + data[: n_samples // 2] -= 15 + data[n_samples // 2 :] += 15 + + labels = np.ones(n_samples) + labels[: n_samples // 2] = -1 + + return data, labels +``` + +```{code-cell} +X, y = linear_model() +clf = svm.SVC(kernel="linear") +clf.fit(X, y) +``` + +```{code-cell} +plt.figure(figsize=(6, 4)) +ax = plt.subplot(111, xticks=[], yticks=[]) +ax.scatter(X[:, 0], X[:, 1], c=y, cmap="bone") +ax.scatter( + clf.support_vectors_[:, 0], + clf.support_vectors_[:, 1], + s=80, + edgecolors="k", + facecolors="none", +) +delta = 1 +y_min, y_max = -50, 50 +x_min, x_max = -50, 50 +x = np.arange(x_min, x_max + delta, delta) +y = np.arange(y_min, y_max + delta, delta) +X1, X2 = np.meshgrid(x, y) +Z = clf.decision_function(np.c_[X1.ravel(), X2.ravel()]) +Z = Z.reshape(X1.shape) +ax.contour( + X1, X2, Z, [-1.0, 0.0, 1.0], colors="k", linestyles=["dashed", "solid", "dashed"] +) +``` + +Data with a non-linear separation + +```{code-cell} +def nonlinear_model(rseed=27446968, n_samples=30): + rng = np.random.default_rng(rseed) + + radius = 40 * rng.random(n_samples) + far_pts = radius > 20 + radius[far_pts] *= 1.2 + radius[~far_pts] *= 1.1 + + theta = rng.random(n_samples) * np.pi * 2 + + data = np.empty((n_samples, 2)) + data[:, 0] = radius * np.cos(theta) + data[:, 1] = radius * np.sin(theta) + + labels = np.ones(n_samples) + labels[far_pts] = -1 + + return data, labels +``` + +```{code-cell} +X, y = nonlinear_model() +clf = svm.SVC(kernel="rbf", gamma=0.001, coef0=0, degree=3) +clf.fit(X, y) +``` + +```{code-cell} +plt.figure(figsize=(6, 4)) +ax = plt.subplot(1, 1, 1, xticks=[], yticks=[]) +ax.scatter(X[:, 0], X[:, 1], c=y, cmap="bone", zorder=2) +ax.scatter( + clf.support_vectors_[:, 0], + clf.support_vectors_[:, 1], + s=80, + edgecolors="k", + facecolors="none", +) +delta = 1 +y_min, y_max = -50, 50 +x_min, x_max = -50, 50 +x = np.arange(x_min, x_max + delta, delta) +y = np.arange(y_min, y_max + delta, delta) +X1, X2 = np.meshgrid(x, y) +Z = clf.decision_function(np.c_[X1.ravel(), X2.ravel()]) +Z = Z.reshape(X1.shape) +ax.contour( + X1, + X2, + Z, + [-1.0, 0.0, 1.0], + colors="k", + linestyles=["dashed", "solid", "dashed"], + zorder=1, +) +``` + +(variance-linear-regr)= + +### variance_linear_regr + + + ++++ + +Plot variance and regularization in linear models + +```{code-cell} +# Smaller figures +``` + +```{code-cell} +plt.rcParams["figure.figsize"] = (3, 2) +``` + +```{code-cell} +# We consider the situation where we have only 2 data point +X = np.c_[0.5, 1].T +y = [0.5, 1] +X_test = np.c_[0, 2].T +``` + +```{code-cell} +# Without noise, as linear regression fits the data perfectly +from sklearn import linear_model +``` + +```{code-cell} +regr = linear_model.LinearRegression() +regr.fit(X, y) +plt.plot(X, y, "o") +plt.plot(X_test, regr.predict(X_test)) +``` + +```{code-cell} +# In real life situation, we have noise (e.g. measurement noise) in our data: +rng = np.random.default_rng(27446968) +for _ in range(6): + noisy_X = X + np.random.normal(loc=0, scale=0.1, size=X.shape) + plt.plot(noisy_X, y, "o") + regr.fit(noisy_X, y) + plt.plot(X_test, regr.predict(X_test)) +``` + +As we can see, our linear model captures and amplifies the noise in the +data. It displays a lot of variance. + +We can use another linear estimator that uses regularization, the +{class}`~sklearn.linear_model.Ridge` estimator. This estimator regularizes the +coefficients by shrinking them to zero, under the assumption that very high +correlations are often spurious. The alpha parameter controls the amount of +shrinkage used. + +```{code-cell} +regr = linear_model.Ridge(alpha=0.1) +np.random.seed(0) +for _ in range(6): + noisy_X = X + np.random.normal(loc=0, scale=0.1, size=X.shape) + plt.plot(noisy_X, y, "o") + regr.fit(noisy_X, y) + plt.plot(X_test, regr.predict(X_test)) +``` diff --git a/packages/statistics/index.md b/packages/statistics/index.md new file mode 100644 index 000000000..d2d33a373 --- /dev/null +++ b/packages/statistics/index.md @@ -0,0 +1,841 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + +(statistics)= + +# Statistics in Python + +**Author**: _Gaël Varoquaux_ + +:::{admonition} Requirements + +- Standard scientific Python environment (NumPy, SciPy, matplotlib) +- [Pandas](https://pandas.pydata.org/) +- [Statsmodels](https://www.statsmodels.org/) +- [Seaborn](https://seaborn.pydata.org) + +To install Python and these dependencies, we recommend that you +download [Anaconda Python](https://www.anaconda.com/distribution/) or, +preferably, use the package manager if you are under Ubuntu or other linux. +::: + +:::{admonition} See also + +- **Bayesian statistics in Python**: + This chapter does not cover tools for Bayesian statistics. Of + particular interest for Bayesian modelling is [PyMC](https://docs.pymc.io/), which implements a probabilistic + programming language in Python. +- **Read a statistics book**: + The [Think stats](https://greenteapress.com/wp/think-stats-2e) book is + available as free PDF or in print and is a great introduction to + statistics. + ::: + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +``` + +::: {note} +:class: dropdown + +**Why Python for statistics?** + +R is a language dedicated to statistics. Python is a general-purpose +language with statistics modules. R has more statistical analysis +features than Python, and specialized syntaxes. However, when it +comes to building complex analysis pipelines that mix statistics with +e.g. image analysis, text mining, or control of a physical +experiment, the richness of Python is an invaluable asset. +::: + +::: {note} +:class: dropdown + +In this document, the Python inputs are represented with the sign +">>>". + +**Disclaimer: Gender questions** + +Some of the examples of this tutorial are chosen around gender +questions. The reason is that on such questions controlling the truth +of a claim actually matters to many people. +::: + +## Data representation and interaction + +### Data as a table + +The setting that we consider for statistical analysis is that of multiple +_observations_ or _samples_ described by a set of different _attributes_ +or _features_. The data can than be seen as a 2D table, or matrix, with +columns giving the different attributes of the data, and rows the +observations. For instance, the data contained in +{download}`examples/brain_size.csv`: + ++++ + +:::{include} examples/brain_size.csv +:literal: +:end-line: 6 +::: + ++++ + +### The pandas data-frame + +::: {note} +:class: dropdown + +We will store and manipulate this data in a {class}`pandas.DataFrame`, from +the [pandas](https://pandas.pydata.org) module. It is the Python equivalent of +the spreadsheet table. It is different from a 2D `numpy` array as it has named +columns, can contain a mixture of different data types by column, and has +elaborate selection and pivotal mechanisms. + +::: + ++++ + +#### Creating dataframes: reading data files or converting arrays + +:::{sidebar} Separator +It is a CSV file, but the separator is ";" +::: + +**Reading from a CSV file:** Using the above CSV file that gives +observations of brain size and weight and IQ (Willerman et al. 1991), the +data are a mixture of numerical and categorical values: + +```{code-cell} +data = pd.read_csv('examples/brain_size.csv', sep=';', na_values=".", index_col=0) +data +``` + +:::{warning} +**Missing values** + +The weight of the second individual is missing in the CSV file. If we +don't specify the missing value (NA = not available) marker, we will +not be able to do statistical analysis. +::: + +**Creating from arrays**: A {class}`pandas.DataFrame` can also be seen +as a dictionary of 1D 'series', eg arrays or lists. If we have 3 +`numpy` arrays: + +```{code-cell} +t = np.linspace(-6, 6, 20) +sin_t = np.sin(t) +cos_t = np.cos(t) +``` + +We can expose them as a `pd.DataFrame` + +```{code-cell} +pd.DataFrame({'t': t, 'sin': sin_t, 'cos': cos_t}) +``` + +**Other inputs**: [pandas](https://pandas.pydata.org) can input data from +SQL, excel files, or other formats. See the [pandas documentation](https://pandas.pydata.org). + ++++ + +#### Manipulating data + +`data` is a {class}`pandas.DataFrame`, that resembles R's dataframe: + +```{code-cell} +data.shape # 40 rows and 8 columns +``` + +```{code-cell} +data.columns # It has columns +``` + +```{code-cell} +data['Gender'] # Columns can be addressed by name +``` + +```{code-cell} +# Simpler selector +data[data['Gender'] == 'Female']['VIQ'].mean() +``` + +:::{note} +For a quick view on a large dataframe, use its `describe` +method: {meth}`pandas.DataFrame.describe`. +::: + +**groupby**: splitting a dataframe on values of categorical variables: + +```{code-cell} +groupby_gender = data.groupby('Gender') +for gender, value in groupby_gender['VIQ']: + print((gender, value.mean())) +``` + +`groupby_gender` is a powerful object that exposes many +operations on the resulting group of dataframes: + +```{code-cell} +groupby_gender.mean() +``` + +::: {note} +:class: dropdown + +Use tab-completion on `groupby_gender` to find more. Other common +grouping functions are median, count (useful for checking to see the +amount of missing values in different subsets) or sum. Groupby +evaluation is lazy, no work is done until an aggregation function is +applied. +::: + +```{code-cell} +data = pd.read_csv("examples/brain_size.csv", sep=";", na_values=".") + +# Box plots of different columns for each gender +groupby_gender = data.groupby("Gender") +groupby_gender.boxplot(column=["FSIQ", "VIQ", "PIQ"]); +``` + +::: {exercise-start} +:label: stats-brain-basic +:class: dropdown +::: + +- What is the mean value for VIQ for the full population? + +- How many males/females were included in this study? + + **Hint** use 'tab completion' to find out the methods that can be + called, instead of 'mean' in the above example. + +- What is the average value of MRI counts expressed in log units, for + males and females? + +:::{note} +`groupby_gender.boxplot` is used for the plots above (see the plot code +above). +::: + +::: {exercise-end} +::: + ++++ + +#### Plotting data + ++++ + +Pandas comes with some plotting tools ({mod}`pandas.plotting`, using +matplotlib behind the scene) to display statistics of the data in +dataframes: + +**Scatter matrices**: + +```{code-cell} +pd.plotting.scatter_matrix(data[['Weight', 'Height', 'MRI_Count']]); +``` + +:::{sidebar} Two populations +The IQ metrics are bimodal, as if there are 2 sub-populations. +::: + +```{code-cell} +pd.plotting.scatter_matrix(data[['PIQ', 'VIQ', 'FSIQ']]); +``` + +::: {exercise-start} +:label: stats-bimodel-ex +:class: dropdown +::: + +Plot the scatter matrix for males only, and for females only. Do you +think that the 2 sub-populations correspond to gender? + +::: {exercise-end} +::: + ++++ + +## Hypothesis testing: comparing two groups + +For simple [statistical tests](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing), we will +use the {mod}`scipy.stats` sub-module of [SciPy](https://docs.scipy.org/doc/): + +```{code-cell} +import scipy as sp +``` + +:::{admonition} See also + +SciPy is a vast library. For a quick summary to the whole library, see +the {ref}`scipy ` chapter. +::: + ++++ + +### Student's t-test: the simplest statistical test + +#### One-sample tests: testing the value of a population mean + +![](two_sided.png) + +{func}`scipy.stats.ttest_1samp` tests the null hypothesis that the mean +of the population underlying the data is equal to a given value. It returns +the [T statistic](https://en.wikipedia.org/wiki/Student%27s_t-test), +and the [p-value](https://en.wikipedia.org/wiki/P-value) (see the +function's help): + +```{code-cell} +sp.stats.ttest_1samp(data['VIQ'], 0) +``` + +The p-value of $10^-28$ indicates that such an extreme value of the statistic +is unlikely to be observed under the null hypothesis. This may be taken as +evidence that the null hypothesis is false and that the population mean IQ +(VIQ measure) is not 0. + +Technically, the p-value of the t-test is derived under the assumption that +the means of samples drawn from the population are normally distributed. +This condition is exactly satisfied when the population itself is normally +distributed; however, due to the central limit theorem, the condition is +nearly true for reasonably large samples drawn from populations that follow +a variety of non-normal distributions. + +Nonetheless, if we are concerned that violation of the normality assumptions +will affect the conclusions of the test, we can use a [Wilcoxon signed-rank test](https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test), which relaxes +this assumption at the expense of test power: + +```{code-cell} +sp.stats.wilcoxon(data['VIQ']) +``` + +#### Two-sample t-test: testing for difference across populations + +We have seen above that the mean VIQ in the male and female samples +were different. To test whether this difference is significant (and +suggests that there is a difference in population means), we perform +a two-sample t-test using {func}`scipy.stats.ttest_ind`: + +```{code-cell} +female_viq = data[data['Gender'] == 'Female']['VIQ'] +male_viq = data[data['Gender'] == 'Male']['VIQ'] +sp.stats.ttest_ind(female_viq, male_viq) +``` + +The corresponding non-parametric test is the [Mann–Whitney U +test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U), +{func}`scipy.stats.mannwhitneyu`. + +```{code-cell} +sp.stats.mannwhitneyu(female_viq, male_viq) +``` + +### Paired tests: repeated measurements on the same individuals + +```{code-cell} +# Box plot of FSIQ and PIQ (different measures of IQ) +plt.figure(figsize=(4, 3)) +data.boxplot(column=["FSIQ", "PIQ"]) +``` + +PIQ, VIQ, and FSIQ give three measures of IQ. Let us test whether FISQ and PIQ +are significantly different. We can use an "independent sample" test: + +```{code-cell} +sp.stats.ttest_ind(data['FSIQ'], data['PIQ']) +``` + +The problem with this approach is that it ignores an important relationship +between observations: FSIQ and PIQ are measured on the same individuals. Thus, +the variance due to inter-subject variability is confounding, reducing the +power of the test. This variability can be removed using a "paired test" or +["repeated measures +test"](https://en.wikipedia.org/wiki/Repeated_measures_design): + +```{code-cell} +sp.stats.ttest_rel(data['FSIQ'], data['PIQ']) +``` + +```{code-cell} +# Boxplot of the difference +plt.figure(figsize=(4, 3)) +plt.boxplot(data["FSIQ"] - data["PIQ"]) +plt.xticks((1,), ("FSIQ - PIQ",)); +``` + +This is equivalent to a one-sample test on the differences between paired +observations: + +```{code-cell} +sp.stats.ttest_1samp(data['FSIQ'] - data['PIQ'], 0) +``` + +Accordingly, we can perform a nonparametric version of the test with +`wilcoxon`. + +```{code-cell} +sp.stats.wilcoxon(data['FSIQ'], data['PIQ'], method="approx") +``` + +::: {exercise-start} +:label: stats-weights-ex +:class: dropdown +::: + +- Test the difference between weights in males and females. +- Use non parametric statistics to test the difference between VIQ in + males and females. + +::: {exercise-end} +::: + +::: {solution-start} stats-weights-ex +:class: dropdown +::: + +**Conclusion**: we find that the data does not support the hypothesis +that males and females have different VIQ. + +::: {solution-end} +::: + ++++ + +## Linear models, multiple factors, and analysis of variance + ++++ + +### "formulas" to specify statistical models in Python + ++++ + +#### A simple linear regression + +::: {note} + +From an original example by **Thomas Haslwanter**. + +::: + +Given two set of observations, `x` and `y`, we want to test the +hypothesis that `y` is a linear function of `x`. In other terms: + +$$ +y = x * \textit{coef} + \textit{intercept} + e +$$ + +where $e$ is observation noise. We will use the [statsmodels](https://www.statsmodels.org/) module to: + +1. Fit a linear model. We will use the simplest strategy, [ordinary least + squares](https://en.wikipedia.org/wiki/Ordinary_least_squares) (OLS). +2. Test that `coef` is non zero. + +First, we generate simulated data according to the model: + +```{code-cell} +x = np.linspace(-5, 5, 20) + +# To get reproducible values, provide a seed value +rng = np.random.default_rng(27446968) + +# normal distributed noise +y = -5 + 3 * x + 4 * rng.normal(size=x.shape) + +# Create a data frame containing all the relevant variables +data = pd.DataFrame({'x': x, 'y': y}) + +# Plot the data +plt.figure(figsize=(5, 4)) +plt.plot(x, y, "o"); +``` + +:::{sidebar} "formulas" for statistics in Python +[See the statsmodels documentation](https://www.statsmodels.org/stable/example_formulas.html) +::: + +Then we specify an OLS model and fit it: + +```{code-cell} +import statsmodels.formula.api as smf +model = smf.ols("y ~ x", data).fit() +``` + +We can inspect the various statistics derived from the fit: + +```{code-cell} +model.summary() +``` + +:::{admonition} Terminology: +Statsmodels uses a statistical terminology: the `y` variable in +statsmodels is called 'endogenous' while the `x` variable is called +exogenous. This is discussed in more detail [here](https://www.statsmodels.org/devel/endog_exog.html). + +To simplify, `y` (endogenous) is the value you are trying to predict, +while `x` (exogenous) represents the features you are using to make +the prediction. + +If the terminology is unfamiliar, you might be able to remember which way +round these go by noticing that there is an `x` in exogenous. +::: + +::: {exercise-start} +:label: stats-estimated-params-ex +:class: dropdown +::: + +Retrieve the estimated parameters from the model above. + +**Hint**: use tab-completion to find the relevant attribute. + +::: {exercise-end} +::: + ++++ + +#### Categorical variables: comparing groups or multiple categories + +Let us go back the data on brain size: + +```{code-cell} +data = pd.read_csv('examples/brain_size.csv', sep=';', na_values=".") +``` + +We can write a comparison between IQ of male and female using a linear +model: + +```{code-cell} +model = smf.ols("VIQ ~ Gender + 1", data).fit() +model.summary() +``` + +::::{admonition} Tips on specifying models + +**Forcing categorical**: the 'Gender' is automatically detected as a +categorical variable, and thus each of its different values are +treated as different entities. + +An integer column can be forced to be treated as categorical using: + +```python +model = ols('VIQ ~ C(Gender)', data).fit() +``` + +**Intercept**: We can remove the intercept using `- 1` in the formula, +or force the use of an intercept using `+ 1`. + +::: {note} +:class: dropdown + +By default, statsmodels treats a categorical variable with K possible +values as K-1 'dummy' boolean variables (the last level being +absorbed into the intercept term). This is almost always a good +default choice - however, it is possible to specify different +encodings for categorical variables +(). +::: +:::: + ++++ + +#### Link to t-tests between different FSIQ and PIQ + +To compare different types of IQ, we need to create a "long-form" +table, listing IQs, where the type of IQ is indicated by a +categorical variable: + +```{code-cell} +data_fisq = pd.DataFrame({'iq': data['FSIQ'], 'type': 'fsiq'}) +data_piq = pd.DataFrame({'iq': data['PIQ'], 'type': 'piq'}) +data_long = pd.concat((data_fisq, data_piq)) +data_long +``` + +```{code-cell} +model = smf.ols("iq ~ type", data_long).fit() +model.summary() +``` + +We can see that we retrieve the same values for t-test and +corresponding p-values for the effect of the type of iq than the +previous t-test: + +```{code-cell} +sp.stats.ttest_ind(data['FSIQ'], data['PIQ']) +``` + +### Multiple Regression: including multiple factors + +::: {note} + +From an original example by _Thomas Haslwanter_ + +::: + +Consider a linear model explaining a variable `z` (the dependent +variable) with 2 variables `x` and `y`: + +$$ +z = x \, c_1 + y \, c_2 + i + e +$$ + +Such a model can be seen in 3D as fitting a plane to a cloud of (`x`, `y`, +`z`) points. + +```{code-cell} +# Generate and show the data +x = np.linspace(-5, 5, 21) +# We generate a 2D grid +X, Y = np.meshgrid(x, x) + +# To get reproducible values, provide a seed value +rng = np.random.default_rng(27446968) + +# Z is the elevation of this 2D grid +Z = -5 + 3 * X - 0.5 * Y + 8 * rng.normal(size=X.shape) + +# Plot the data +ax = plt.figure().add_subplot(projection="3d") +surf = ax.plot_surface(X, Y, Z, cmap="coolwarm", rstride=1, cstride=1) +ax.view_init(20, -120) +ax.set_xlabel("X") +ax.set_ylabel("Y") +ax.set_zlabel("Z"); +``` + +**Example: the iris data** ({download}`examples/iris.csv`) + +::: {note} +:class: dropdown + +Sepal and petal size tend to be related: bigger flowers are bigger! +But is there in addition a systematic effect of species? +::: + +```{code-cell} +data = pd.read_csv('examples/iris.csv') +# Express the names as categories +categories = pd.Categorical(data["name"]) +# The parameter 'c' is passed to plt.scatter and will control the color +pd.plotting.scatter_matrix(data, c=categories.codes, marker="o") +fig = plt.gcf() +fig.suptitle("blue: setosa, green: versicolor, red: virginica", size=13); +``` + +Let us try to explain the sepal length as a function of the petal +width and the category of iris + +```{code-cell} +model = smf.ols("sepal_width ~ name + petal_length", data).fit() +model.summary() +``` + +### Post-hoc hypothesis testing: analysis of variance (ANOVA) + +In the above iris example, we wish to test if the petal length is +different between versicolor and virginica, after removing the effect of +sepal width. This can be formulated as testing the difference between the +coefficient associated to versicolor and virginica in the linear model +estimated above (it is an Analysis of Variance, [ANOVA](https://en.wikipedia.org/wiki/Analysis_of_variance)). For this, we +write a **vector of 'contrast'** on the parameters estimated: we want to +test `"name[T.versicolor] - name[T.virginica]"`, with an [F-test](https://en.wikipedia.org/wiki/F-test): + +```{code-cell} +print(model.f_test([0, 1, -1, 0])) +``` + +Is this difference significant? + +::: {exercise-start} +:label: stats-male-female-ex +:class: dropdown +::: + +Going back to the brain size + IQ data, test if the VIQ of male and +female are different after removing the effect of brain size, height +and weight. + +::: {exercise-end} +::: + ++++ + +## More visualization: Seaborn for statistical exploration + +[Seaborn](https://seaborn.pydata.org) combines +simple statistical fits with plotting on pandas dataframes. + +```{code-cell} +import seaborn +``` + +Let us consider a data giving wages and many other personal information +on 500 individuals ([Berndt, ER. The Practice of Econometrics. 1991. NY: +Addison-Wesley](https://lib.stat.cmu.edu/datasets/CPS_85_Wages)). + +We first load and arrange the data — view the code for details: + +```{code-cell} +:tags: [hide-input] + +data = pd.read_csv("examples/wages.txt", + skiprows=27, + skipfooter=6, + sep=None, + header=None, + engine="python" # To allow use of skipfooter. +) +# Give names to the columns +names = [ + "education: Number of years of education", + "south: 1=person lives in South, 0=Person lives elsewhere", + "sex: 1=female, 0=Male", + "experience: Number of years of work experience", + "union: 1=union member, 0=Not union member", + "wage: wage (dollars per hour)", + "age: years", + "race: 1=other, 2=Hispanic, 3=White", + "occupation: 1=Management, 2=Sales, 3=Clerical, 4=Service, 5=Professional, 6=Other", + "sector: 0=Other, 1=Manufacturing, 2=Construction", + "marr: 0=unmarried, 1=Married", +] +short_names = [n.split(":")[0] for n in names] +data.columns = pd.Index(short_names) +# Log-transform the wages, because they typically are increased with +# multiplicative factors +data["wage"] = np.log10(data["wage"]) +# Convert genders to strings (this is particularly useful so that the +# statsmodels formulas detects that `sex` is a categorical variable) +data["sex"] = np.choose(data['sex'], ["male", "female"]) +``` + +Here are the resulting loaded data. + +```{code-cell} +data +``` + +### Pairplot: scatter matrices + +We can easily have an intuition on the interactions between continuous +variables using {func}`seaborn.pairplot` to display a scatter matrix: + +```{code-cell} +seaborn.pairplot(data, vars=['wage', 'age', 'education'], kind='reg'); +``` + +Categorical variables can be plotted as the hue: + +```{code-cell} +seaborn.pairplot(data, vars=['wage', 'age', 'education'], + kind='reg', hue='sex'); +``` + +::::{topic} **Look and feel and matplotlib settings** +Seaborn changes the default of matplotlib figures to achieve a more +"modern", "excel-like" look. It does that upon import. You can reset +the default using: + +```python +plt.rcdefaults() +``` + +::: {note} +:class: dropdown + +To switch back to seaborn settings, or understand better styling in +seaborn, see the [relevant section of the seaborn documentation](https://seaborn.pydata.org/tutorial/aesthetics.html). +::: + +:::: + +### lmplot: plotting a univariate regression + +A regression capturing the relation between one variable and another, eg +wage, and education, can be plotted using {func}`seaborn.lmplot`: + +```{code-cell} +seaborn.lmplot(y='wage', x='education', data=data); +``` + +::::{topic} **Robust regression** + +::: {note} +:class: dropdown + +Given that, in the above plot, there seems to be a couple of data +points that are outside of the main cloud to the right, they might be +outliers, not representative of the population, but driving the +regression. + +::: + +To compute a regression that is less sensitive to outliers, one must use +a [robust model](https://en.wikipedia.org/wiki/Robust_statistics). This is +done in seaborn using `robust=True` in the plotting functions, or in +statsmodels by replacing the use of the OLS by a "Robust Linear Model", +{func}`statsmodels.formula.api.rlm`. :::: + ++++ + +## Testing for interactions + +```{code-cell} +seaborn.lmplot(y="wage", x="education", hue="sex", data=data); +``` + +We can first ask do `education` and `sex` separately contribute to `wage`: + +```{code-cell} +result = smf.ols(formula="wage ~ education + sex", data=data).fit() +result.summary() +``` + +Our next question is — do wages _increase more_ with education for males than +females? + ++++ + +::: {note} +:class: dropdown + +The plot above is made of two different fits. We need to formulate a +single model that tests for a variance of slope across the two +populations. This is done via an ["interaction"](https://www.statsmodels.org/devel/example_formulas.html#multiplicative-interactions). +::: + +```{code-cell} +result = smf.ols(formula='wage ~ education + sex + education * sex', + data=data).fit() +result.summary() +``` + +Can we conclude that education benefits males more than females? + +:::{admonition} Take home messages + +- Hypothesis testing and p-values give you the **significance** of an + effect / difference. +- **Formulas** (with categorical variables) enable you to express rich + links in your data. +- **Visualizing** your data and fitting simple models give insight into the + data. +- **Conditionning** (adding factors that can explain all or part of + the variation) is an important modeling aspect that changes the + interpretation. + ::: diff --git a/packages/statistics/index.rst b/packages/statistics/index.rst deleted file mode 100644 index 4da8397b9..000000000 --- a/packages/statistics/index.rst +++ /dev/null @@ -1,910 +0,0 @@ -.. for doctests - >>> import matplotlib.pyplot as plt - >>> import numpy as np - >>> import pandas - >>> pandas.options.display.width = 0 - -.. also switch current directory from the root directory (where the tests - are run) to be able to load the data - >>> import os - >>> os.chdir('packages/statistics') - - -.. _statistics: - -===================== -Statistics in Python -===================== - -**Author**: *Gaël Varoquaux* - -.. topic:: **Requirements** - - * Standard scientific Python environment (NumPy, SciPy, matplotlib) - - * `Pandas `__ - - * `Statsmodels `__ - - * `Seaborn `__ - - To install Python and these dependencies, we recommend that you - download `Anaconda Python `_ or, - preferably, use the package manager if you are under Ubuntu or other linux. - -.. seealso:: - - * **Bayesian statistics in Python**: - This chapter does not cover tools for Bayesian statistics. Of - particular interest for Bayesian modelling is `PyMC - `_, which implements a probabilistic - programming language in Python. - - * **Read a statistics book**: - The `Think stats `_ book is - available as free PDF or in print and is a great introduction to - statistics. - - -| - -.. tip:: - - **Why Python for statistics?** - - R is a language dedicated to statistics. Python is a general-purpose - language with statistics modules. R has more statistical analysis - features than Python, and specialized syntaxes. However, when it - comes to building complex analysis pipelines that mix statistics with - e.g. image analysis, text mining, or control of a physical - experiment, the richness of Python is an invaluable asset. - - -.. contents:: Contents - :local: - :depth: 2 - -.. tip:: - - In this document, the Python inputs are represented with the sign - ">>>". - - | - - **Disclaimer: Gender questions** - - Some of the examples of this tutorial are chosen around gender - questions. The reason is that on such questions controlling the truth - of a claim actually matters to many people. - - -Data representation and interaction -==================================== - -Data as a table ----------------- - -The setting that we consider for statistical analysis is that of multiple -*observations* or *samples* described by a set of different *attributes* -or *features*. The data can than be seen as a 2D table, or matrix, with -columns giving the different attributes of the data, and rows the -observations. For instance, the data contained in -:download:`examples/brain_size.csv`: - -.. include:: examples/brain_size.csv - :literal: - :end-line: 6 - - -The pandas data-frame ------------------------- - -.. tip:: - - We will store and manipulate this data in a - :class:`pandas.DataFrame`, from the `pandas - `__ module. It is the Python equivalent of - the spreadsheet table. It is different from a 2D ``numpy`` array as it - has named columns, can contain a mixture of different data types by - column, and has elaborate selection and pivotal mechanisms. - -Creating dataframes: reading data files or converting arrays -............................................................ - -.. sidebar:: **Separator** - - It is a CSV file, but the separator is ";" - -**Reading from a CSV file:** Using the above CSV file that gives -observations of brain size and weight and IQ (Willerman et al. 1991), the -data are a mixture of numerical and categorical values:: - - >>> import pandas - >>> data = pandas.read_csv('examples/brain_size.csv', sep=';', na_values=".") - >>> data - Unnamed: 0 Gender FSIQ VIQ PIQ Weight Height MRI_Count - 0 1 Female 133 132 124 118.0 64.5 816932 - 1 2 Male 140 150 124 NaN 72.5 1001121 - 2 3 Male 139 123 150 143.0 73.3 1038437 - 3 4 Male 133 129 128 172.0 68.8 965353 - 4 5 Female 137 132 134 147.0 65.0 951545 - ... - -.. warning:: **Missing values** - - The weight of the second individual is missing in the CSV file. If we - don't specify the missing value (NA = not available) marker, we will - not be able to do statistical analysis. - -| - -**Creating from arrays**: A :class:`pandas.DataFrame` can also be seen -as a dictionary of 1D 'series', eg arrays or lists. If we have 3 -``numpy`` arrays:: - - >>> import numpy as np - >>> t = np.linspace(-6, 6, 20) - >>> sin_t = np.sin(t) - >>> cos_t = np.cos(t) - -We can expose them as a :class:`pandas.DataFrame`:: - - >>> pandas.DataFrame({'t': t, 'sin': sin_t, 'cos': cos_t}) - t sin cos - 0 -6.000000 0.279415 0.960170 - 1 -5.368421 0.792419 0.609977 - 2 -4.736842 0.999701 0.024451 - 3 -4.105263 0.821291 -0.570509 - 4 -3.473684 0.326021 -0.945363 - 5 -2.842105 -0.295030 -0.955488 - 6 -2.210526 -0.802257 -0.596979 - 7 -1.578947 -0.999967 -0.008151 - 8 -0.947368 -0.811882 0.583822 - ... - -| - -**Other inputs**: `pandas `__ can input data from -SQL, excel files, or other formats. See the `pandas documentation -`__. - -| - -Manipulating data -.................. - -`data` is a :class:`pandas.DataFrame`, that resembles R's dataframe:: - - >>> data.shape # 40 rows and 8 columns - (40, 8) - - >>> data.columns # It has columns - Index(['Unnamed: 0', 'Gender', 'FSIQ', 'VIQ', 'PIQ', 'Weight', 'Height', - 'MRI_Count'], - dtype='object') - - >>> print(data['Gender']) # Columns can be addressed by name - 0 Female - 1 Male - 2 Male - 3 Male - 4 Female - ... - - >>> # Simpler selector - >>> data[data['Gender'] == 'Female']['VIQ'].mean() - np.float64(109.45) - -.. note:: For a quick view on a large dataframe, use its `describe` - method: :meth:`pandas.DataFrame.describe`. - -| - -**groupby**: splitting a dataframe on values of categorical variables:: - - >>> groupby_gender = data.groupby('Gender') - >>> for gender, value in groupby_gender['VIQ']: - ... print((gender, value.mean())) - ('Female', np.float64(109.45)) - ('Male', np.float64(115.25)) - - -`groupby_gender` is a powerful object that exposes many -operations on the resulting group of dataframes:: - - >>> groupby_gender.mean() - Unnamed: 0 FSIQ VIQ PIQ Weight Height MRI_Count - Gender - Female 19.65 111.9 109.45 110.45 137.200000 65.765000 862654.6 - Male 21.35 115.0 115.25 111.60 166.444444 71.431579 954855.4 - - -.. tip:: - - Use tab-completion on `groupby_gender` to find more. Other common - grouping functions are median, count (useful for checking to see the - amount of missing values in different subsets) or sum. Groupby - evaluation is lazy, no work is done until an aggregation function is - applied. - - -| - -.. image:: auto_examples/images/sphx_glr_plot_pandas_001.png - :target: auto_examples/plot_pandas.html - :align: right - :scale: 42 - - -.. topic:: **Exercise** - :class: green - - * What is the mean value for VIQ for the full population? - * How many males/females were included in this study? - - **Hint** use 'tab completion' to find out the methods that can be - called, instead of 'mean' in the above example. - - * What is the average value of MRI counts expressed in log units, for - males and females? - -.. note:: - - `groupby_gender.boxplot` is used for the plots above (see `this - example `_). - -| - -Plotting data -.............. - -.. currentmodule:: pandas - -Pandas comes with some plotting tools (:mod:`pandas.plotting`, using -matplotlib behind the scene) to display statistics of the data in -dataframes: - -**Scatter matrices**:: - - >>> from pandas import plotting - >>> plotting.scatter_matrix(data[['Weight', 'Height', 'MRI_Count']]) - array([[, - , - ], - [, - , - ], - [, - , - ]], dtype=object) - -.. image:: auto_examples/images/sphx_glr_plot_pandas_002.png - :target: auto_examples/plot_pandas.html - :scale: 70 - :align: center - -:: - - >>> plotting.scatter_matrix(data[['PIQ', 'VIQ', 'FSIQ']]) - array([[, - , - ], - [, - , - ], - [, - , - ]], dtype=object) - -.. sidebar:: **Two populations** - - The IQ metrics are bimodal, as if there are 2 sub-populations. - -.. image:: auto_examples/images/sphx_glr_plot_pandas_003.png - :target: auto_examples/plot_pandas.html - :scale: 70 - :align: center - -.. topic:: **Exercise** - :class: green - - Plot the scatter matrix for males only, and for females only. Do you - think that the 2 sub-populations correspond to gender? - - -Hypothesis testing: comparing two groups -========================================== - -For simple `statistical tests -`_, we will -use the :mod:`scipy.stats` sub-module of `SciPy -`_:: - - >>> import scipy as sp - -.. seealso:: - - SciPy is a vast library. For a quick summary to the whole library, see - the :ref:`scipy ` chapter. - - -Student's t-test: the simplest statistical test ------------------------------------------------- - -One-sample tests: testing the value of a population mean -........................................................ - -.. image:: two_sided.png - :scale: 50 - :align: right - -:func:`scipy.stats.ttest_1samp` tests the null hypothesis that the mean -of the population underlying the data is equal to a given value. It returns -the `T statistic `_, -and the `p-value `_ (see the -function's help):: - - >>> sp.stats.ttest_1samp(data['VIQ'], 0) - TtestResult(statistic=np.float64(30.088099970...), pvalue=np.float64(1.32891964...e-28), df=np.int64(39)) - -The p-value of :math:`10^-28` indicates that such an extreme value of the statistic -is unlikely to be observed under the null hypothesis. This may be taken as -evidence that the null hypothesis is false and that the population mean IQ -(VIQ measure) is not 0. - -Technically, the p-value of the t-test is derived under the assumption that -the means of samples drawn from the population are normally distributed. -This condition is exactly satisfied when the population itself is normally -distributed; however, due to the central limit theorem, the condition is -nearly true for reasonably large samples drawn from populations that follow -a variety of non-normal distributions. - -Nonetheless, if we are concerned that violation of the normality assumptions -will affect the conclusions of the test, we can use a `Wilcoxon signed-rank test -`_, which relaxes -this assumption at the expense of test power:: - - >>> sp.stats.wilcoxon(data['VIQ']) - WilcoxonResult(statistic=np.float64(0.0), pvalue=np.float64(3.4881726...e-08)) - -Two-sample t-test: testing for difference across populations -............................................................ - -We have seen above that the mean VIQ in the male and female samples -were different. To test whether this difference is significant (and -suggests that there is a difference in population means), we perform -a two-sample t-test using :func:`scipy.stats.ttest_ind`:: - - >>> female_viq = data[data['Gender'] == 'Female']['VIQ'] - >>> male_viq = data[data['Gender'] == 'Male']['VIQ'] - >>> sp.stats.ttest_ind(female_viq, male_viq) - TtestResult(statistic=np.float64(-0.77261617232...), pvalue=np.float64(0.4445287677858...), df=np.float64(38.0)) - -The corresponding non-parametric test is the `Mann–Whitney U -test `_, -:func:`scipy.stats.mannwhitneyu`. - - >>> sp.stats.mannwhitneyu(female_viq, male_viq) - MannwhitneyuResult(statistic=np.float64(164.5), pvalue=np.float64(0.34228868687...)) - -Paired tests: repeated measurements on the same individuals ------------------------------------------------------------ - -.. image:: auto_examples/images/sphx_glr_plot_paired_boxplots_001.png - :target: auto_examples/plot_pandas.html - :scale: 70 - :align: right - -PIQ, VIQ, and FSIQ give three measures of IQ. Let us test whether FISQ -and PIQ are significantly different. We can use an "independent sample" test:: - - >>> sp.stats.ttest_ind(data['FSIQ'], data['PIQ']) - TtestResult(statistic=np.float64(0.46563759638...), pvalue=np.float64(0.64277250...), df=np.float64(78.0)) - -The problem with this approach is that it ignores an important relationship -between observations: FSIQ and PIQ are measured on the same individuals. -Thus, the variance due to inter-subject variability is confounding, reducing -the power of the test. This variability can be removed using a "paired test" -or `"repeated measures test" -`_:: - - >>> sp.stats.ttest_rel(data['FSIQ'], data['PIQ']) - TtestResult(statistic=np.float64(1.784201940...), pvalue=np.float64(0.082172638183...), df=np.int64(39)) - -.. image:: auto_examples/images/sphx_glr_plot_paired_boxplots_002.png - :target: auto_examples/plot_pandas.html - :scale: 60 - :align: right - -This is equivalent to a one-sample test on the differences between paired -observations:: - - >>> sp.stats.ttest_1samp(data['FSIQ'] - data['PIQ'], 0) - TtestResult(statistic=np.float64(1.784201940...), pvalue=np.float64(0.082172638...), df=np.int64(39)) - -Accordingly, we can perform a nonparametric version of the test with -``wilcoxon``. - - >>> sp.stats.wilcoxon(data['FSIQ'], data['PIQ'], method="approx") - WilcoxonResult(statistic=np.float64(274.5), pvalue=np.float64(0.106594927135...)) - -.. topic:: **Exercise** - :class: green - - * Test the difference between weights in males and females. - - * Use non parametric statistics to test the difference between VIQ in - males and females. - - **Conclusion**: we find that the data does not support the hypothesis - that males and females have different VIQ. - -| - -Linear models, multiple factors, and analysis of variance -========================================================== - -"formulas" to specify statistical models in Python --------------------------------------------------- - -A simple linear regression -........................... - -.. image:: auto_examples/images/sphx_glr_plot_regression_001.png - :target: auto_examples/plot_regression.html - :scale: 60 - :align: right - -Given two set of observations, `x` and `y`, we want to test the -hypothesis that `y` is a linear function of `x`. In other terms: - - :math:`y = x * \textit{coef} + \textit{intercept} + e` - -where `e` is observation noise. We will use the `statsmodels -`_ module to: - -#. Fit a linear model. We will use the simplest strategy, `ordinary least - squares `_ (OLS). - -#. Test that `coef` is non zero. - -| - -First, we generate simulated data according to the model:: - - >>> import numpy as np - >>> x = np.linspace(-5, 5, 20) - >>> rng = np.random.default_rng(27446968) - >>> # normal distributed noise - >>> y = -5 + 3*x + 4 * rng.normal(size=x.shape) - >>> # Create a data frame containing all the relevant variables - >>> data = pandas.DataFrame({'x': x, 'y': y}) - - -.. sidebar:: **"formulas" for statistics in Python** - - `See the statsmodels documentation - `_ - -| - -Then we specify an OLS model and fit it:: - - >>> from statsmodels.formula.api import ols - >>> model = ols("y ~ x", data).fit() - -We can inspect the various statistics derived from the fit:: - - >>> print(model.summary()) # doctest: +REPORT_UDIFF - OLS Regression Results - ============================================================================== - Dep. Variable: y R-squared: 0.901 - Model: OLS Adj. R-squared: 0.896 - Method: Least Squares F-statistic: 164.5 - Date: ... Prob (F-statistic): 1.72e-10 - Time: ... Log-Likelihood: -51.758 - No. Observations: 20 AIC: 107.5 - Df Residuals: 18 BIC: 109.5 - Df Model: 1 - Covariance Type: nonrobust - ============================================================================== - coef std err t P>|t| [0.025 0.975] - ------------------------------------------------------------------------------ - Intercept -4.2948 0.759 -5.661 0.000 -5.889 -2.701 - x 3.2060 0.250 12.825 0.000 2.681 3.731 - ============================================================================== - Omnibus: 1.218 Durbin-Watson: 1.796 - Prob(Omnibus): 0.544 Jarque-Bera (JB): 0.999 - Skew: 0.503 Prob(JB): 0.607 - Kurtosis: 2.568 Cond. No. 3.03 - ============================================================================== - - Notes: - [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. - -.. topic:: Terminology: - - Statsmodels uses a statistical terminology: the `y` variable in - statsmodels is called 'endogenous' while the `x` variable is called - exogenous. This is discussed in more detail `here - `_. - - To simplify, `y` (endogenous) is the value you are trying to predict, - while `x` (exogenous) represents the features you are using to make - the prediction. - - -.. topic:: **Exercise** - :class: green - - Retrieve the estimated parameters from the model above. **Hint**: - use tab-completion to find the relevant attribute. - -| - -Categorical variables: comparing groups or multiple categories -............................................................... - -Let us go back the data on brain size:: - - >>> data = pandas.read_csv('examples/brain_size.csv', sep=';', na_values=".") - -We can write a comparison between IQ of male and female using a linear -model:: - - >>> model = ols("VIQ ~ Gender + 1", data).fit() - >>> print(model.summary()) # doctest: +REPORT_UDIFF - OLS Regression Results - ============================================================================== - Dep. Variable: VIQ R-squared: 0.015 - Model: OLS Adj. R-squared: -0.010 - Method: Least Squares F-statistic: 0.5969 - Date: ... Prob (F-statistic): 0.445 - Time: ... Log-Likelihood: -182.42 - No. Observations: 40 AIC: 368.8 - Df Residuals: 38 BIC: 372.2 - Df Model: 1 - Covariance Type: nonrobust - ================================================================================== - coef std err t P>|t| [0.025 0.975] - ---------------------------------------------------------------------------------- - Intercept 109.4500 5.308 20.619 0.000 98.704 120.196 - Gender[T.Male] 5.8000 7.507 0.773 0.445 -9.397 20.997 - ============================================================================== - Omnibus: 26.188 Durbin-Watson: 1.709 - Prob(Omnibus): 0.000 Jarque-Bera (JB): 3.703 - Skew: 0.010 Prob(JB): 0.157 - Kurtosis: 1.510 Cond. No. 2.62 - ============================================================================== - - Notes: - [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. - -.. topic:: **Tips on specifying model** - - **Forcing categorical**: the 'Gender' is automatically detected as a - categorical variable, and thus each of its different values are - treated as different entities. - - An integer column can be forced to be treated as categorical using:: - - >>> model = ols('VIQ ~ C(Gender)', data).fit() - - **Intercept**: We can remove the intercept using `- 1` in the formula, - or force the use of an intercept using `+ 1`. - - .. tip:: - - By default, statsmodels treats a categorical variable with K possible - values as K-1 'dummy' boolean variables (the last level being - absorbed into the intercept term). This is almost always a good - default choice - however, it is possible to specify different - encodings for categorical variables - (https://www.statsmodels.org/devel/contrasts.html). - - -| - -.. topic:: **Link to t-tests between different FSIQ and PIQ** - - To compare different types of IQ, we need to create a "long-form" - table, listing IQs, where the type of IQ is indicated by a - categorical variable:: - - >>> data_fisq = pandas.DataFrame({'iq': data['FSIQ'], 'type': 'fsiq'}) - >>> data_piq = pandas.DataFrame({'iq': data['PIQ'], 'type': 'piq'}) - >>> data_long = pandas.concat((data_fisq, data_piq)) - >>> print(data_long) - iq type - 0 133 fsiq - 1 140 fsiq - 2 139 fsiq - 3 133 fsiq - 4 137 fsiq - ... ... ... - 35 128 piq - 36 124 piq - 37 94 piq - 38 74 piq - 39 89 piq - - [80 rows x 2 columns] - - >>> model = ols("iq ~ type", data_long).fit() - >>> print(model.summary()) # doctest: +REPORT_UDIFF - OLS Regression Results - ... - ==========================... - coef std err t P>|t| [0.025 0.975] - ------------------------------------------... - Intercept 113.4500 3.683 30.807 0.000 106.119 120.781 - type[T.piq] -2.4250 5.208 -0.466 0.643 -12.793 7.943 - ... - - We can see that we retrieve the same values for t-test and - corresponding p-values for the effect of the type of iq than the - previous t-test:: - - >>> sp.stats.ttest_ind(data['FSIQ'], data['PIQ']) - TtestResult(statistic=np.float64(0.46563759638...), pvalue=np.float64(0.64277250...), df=np.float64(78.0)) - - -Multiple Regression: including multiple factors -------------------------------------------------- - -.. image:: auto_examples/images/sphx_glr_plot_regression_3d_001.png - :target: auto_examples/plot_regression_3d.html - :scale: 45 - :align: right - -| - -Consider a linear model explaining a variable `z` (the dependent -variable) with 2 variables `x` and `y`: - - :math:`z = x \, c_1 + y \, c_2 + i + e` - -Such a model can be seen in 3D as fitting a plane to a cloud of (`x`, -`y`, `z`) points. - -| -| - -**Example: the iris data** (:download:`examples/iris.csv`) - -.. tip:: - - Sepal and petal size tend to be related: bigger flowers are bigger! - But is there in addition a systematic effect of species? - -.. image:: auto_examples/images/sphx_glr_plot_iris_analysis_001.png - :target: auto_examples/plot_iris_analysis_1.html - :scale: 80 - :align: center - -:: - - >>> data = pandas.read_csv('examples/iris.csv') - >>> model = ols('sepal_width ~ name + petal_length', data).fit() - >>> print(model.summary()) # doctest: +REPORT_UDIFF - OLS Regression Results - ==========================... - Dep. Variable: sepal_width R-squared: 0.478 - Model: OLS Adj. R-squared: 0.468 - Method: Least Squares F-statistic: 44.63 - Date: ... Prob (F-statistic): 1.58e-20 - Time: ... Log-Likelihood: -38.185 - No. Observations: 150 AIC: 84.37 - Df Residuals: 146 BIC: 96.41 - Df Model: 3 - Covariance Type: nonrobust - ==========================... - coef std err t P>|t| [0.025 0.975] - ------------------------------------------... - Intercept 2.9813 0.099 29.989 0.000 2.785 3.178 - name[T.versicolor] -1.4821 0.181 -8.190 0.000 -1.840 -1.124 - name[T.virginica] -1.6635 0.256 -6.502 0.000 -2.169 -1.158 - petal_length 0.2983 0.061 4.920 0.000 0.178 0.418 - ==========================... - Omnibus: 2.868 Durbin-Watson: 1.753 - Prob(Omnibus): 0.238 Jarque-Bera (JB): 2.885 - Skew: -0.082 Prob(JB): 0.236 - Kurtosis: 3.659 Cond. No. 54.0 - ==========================... - - Notes: - [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. - -| - -Post-hoc hypothesis testing: analysis of variance (ANOVA) ----------------------------------------------------------- - -In the above iris example, we wish to test if the petal length is -different between versicolor and virginica, after removing the effect of -sepal width. This can be formulated as testing the difference between the -coefficient associated to versicolor and virginica in the linear model -estimated above (it is an Analysis of Variance, `ANOVA -`_). For this, we -write a **vector of 'contrast'** on the parameters estimated: we want to -test ``"name[T.versicolor] - name[T.virginica]"``, with an `F-test -`_:: - - >>> print(model.f_test([0, 1, -1, 0])) - - -Is this difference significant? - -| - - -.. topic:: **Exercise** - :class: green - - Going back to the brain size + IQ data, test if the VIQ of male and - female are different after removing the effect of brain size, height - and weight. - -| - -More visualization: seaborn for statistical exploration -======================================================= - -`Seaborn `_ combines -simple statistical fits with plotting on pandas dataframes. - -Let us consider a data giving wages and many other personal information -on 500 individuals (`Berndt, ER. The Practice of Econometrics. 1991. NY: -Addison-Wesley `_). - -.. tip:: - - The full code loading and plotting of the wages data is found in - `corresponding example `_. - -:: - - >>> print(data) # doctest: +SKIP - EDUCATION SOUTH SEX EXPERIENCE UNION WAGE AGE RACE \ - 0 8 0 1 21 0 0.707570 35 2 - 1 9 0 1 42 0 0.694605 57 3 - 2 12 0 0 1 0 0.824126 19 3 - 3 12 0 0 4 0 0.602060 22 3 - ... - -Pairplot: scatter matrices --------------------------- - -We can easily have an intuition on the interactions between continuous -variables using :func:`seaborn.pairplot` to display a scatter matrix:: - - >>> import seaborn - >>> seaborn.pairplot(data, vars=['WAGE', 'AGE', 'EDUCATION'], - ... kind='reg') # doctest: +SKIP - - -.. image:: auto_examples/images/sphx_glr_plot_wage_data_001.png - :target: auto_examples/plot_wage_data.html - :align: center - :scale: 60 - -Categorical variables can be plotted as the hue:: - - >>> seaborn.pairplot(data, vars=['WAGE', 'AGE', 'EDUCATION'], - ... kind='reg', hue='SEX') # doctest: +SKIP - - -.. image:: auto_examples/images/sphx_glr_plot_wage_data_002.png - :target: auto_examples/plot_wage_data.html - :align: center - :scale: 60 - -.. topic:: **Look and feel and matplotlib settings** - - Seaborn changes the default of matplotlib figures to achieve a more - "modern", "excel-like" look. It does that upon import. You can reset - the default using:: - - >>> import matplotlib.pyplot as plt - >>> plt.rcdefaults() - - .. tip:: - - To switch back to seaborn settings, or understand better styling in - seaborn, see the `relevant section of the seaborn documentation - `_. - - -lmplot: plotting a univariate regression ------------------------------------------ - -.. image:: auto_examples/images/sphx_glr_plot_wage_data_005.png - :target: auto_examples/plot_wage_data.html - :align: right - :scale: 60 - -A regression capturing the relation between one variable and another, eg -wage, and education, can be plotted using :func:`seaborn.lmplot`:: - - >>> seaborn.lmplot(y='WAGE', x='EDUCATION', data=data) # doctest: +SKIP - -.. raw:: html - -
- -.. topic:: **Robust regression** - - .. tip:: - - Given that, in the above plot, there seems to be a couple of data - points that are outside of the main cloud to the right, they might be - outliers, not representative of the population, but driving the - regression. - - To compute a regression that is less sensitive to outliers, one must - use a `robust model - `_. This is done in - seaborn using ``robust=True`` in the plotting functions, or in - statsmodels by replacing the use of the OLS by a "Robust Linear - Model", :func:`statsmodels.formula.api.rlm`. - - -Testing for interactions -========================= - -.. image:: auto_examples/images/sphx_glr_plot_wage_education_gender_001.png - :target: auto_examples/plot_wage_education_gender.html - :align: center - :scale: 70 - -Do wages increase more with education for males than females? - -.. tip:: - - The plot above is made of two different fits. We need to formulate a - single model that tests for a variance of slope across the two - populations. This is done via an `"interaction" - `_. - - -:: - - >>> result = sm.ols(formula='wage ~ education + gender + education * gender', - ... data=data).fit() # doctest: +SKIP - >>> print(result.summary()) # doctest: +SKIP - ... - coef std err t P>|t| [0.025 0.975] - ------------------------------------------------------------------------------ - Intercept 0.2998 0.072 4.173 0.000 0.159 0.441 - gender[T.male] 0.2750 0.093 2.972 0.003 0.093 0.457 - education 0.0415 0.005 7.647 0.000 0.031 0.052 - education:gender[T.male] -0.0134 0.007 -1.919 0.056 -0.027 0.000 - ==========================... - ... - -Can we conclude that education benefits males more than females? - -| - -.. topic:: **Take home messages** - - * Hypothesis testing and p-values give you the **significance** of an - effect / difference. - - * **Formulas** (with categorical variables) enable you to express rich - links in your data. - - * **Visualizing** your data and fitting simple models give insight into the - data. - - * **Conditionning** (adding factors that can explain all or part of - the variation) is an important modeling aspect that changes the - interpretation. - -| - -.. include the gallery. Skip the first line to avoid the "orphan" - declaration - -.. include:: auto_examples/index.rst - :start-line: 1 diff --git a/packages/statistics/stats_examples.md b/packages/statistics/stats_examples.md new file mode 100644 index 000000000..0b705759c --- /dev/null +++ b/packages/statistics/stats_examples.md @@ -0,0 +1,626 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +orphan: true +--- + +# Examples for packages/statistics/index.md + +(plotting-simple-quantities-of-a-pandas-dataframe)= + +```{code-cell} +import numpy as np +import matplotlib.pyplot as plt + +# For loading data as data frames. +import pandas as pd +``` + +## Plotting simple quantities of a pandas dataframe + + + ++++ + +This example loads from a CSV file data with mixed numerical and +categorical entries, and plots a few quantities, separately for females +and males, thanks to the pandas integrated plotting tool (that uses +matplotlib behind the scene). + +See http://pandas.pydata.org/pandas-docs/stable/visualization.html + +```{code-cell} +data = pd.read_csv("examples/brain_size.csv", sep=";", na_values=".") + +# Box plots of different columns for each sex +groupby_sex = data.groupby("Gender") +groupby_sex.boxplot(column=["FSIQ", "VIQ", "PIQ"]) + +# Scatter matrices for different columns +pd.plotting.scatter_matrix(data[["Weight", "Height", "MRI_Count"]]) +pd.plotting.scatter_matrix(data[["PIQ", "VIQ", "FSIQ"]]); +``` + +(boxplots-and-paired-differences)= + +## Boxplots and paired differences + + + ++++ + +Plot boxplots for FSIQ, PIQ, and the paired difference between the two: +while the spread (error bars) for FSIQ and PIQ are very large, there is a +systematic (common) effect due to the subjects. This effect is cancelled +out in the difference and the spread of the difference ("paired" by +subject) is much smaller than the spread of the individual measures. + +```{code-cell} +data = pd.read_csv("examples/brain_size.csv", sep=";", na_values=".") +# Box plot of FSIQ and PIQ (different measures od IQ) +plt.figure(figsize=(4, 3)) +data.boxplot(column=["FSIQ", "PIQ"]) +# Boxplot of the difference +plt.figure(figsize=(4, 3)) +plt.boxplot(data["FSIQ"] - data["PIQ"]) +plt.xticks((1,), ("FSIQ - PIQ",)) +``` + +(simple-regression)= + +## Simple Regression + + + ++++ + +Fit a simple linear regression using 'statsmodels', compute corresponding +p-values. + ++++ + +**Original author: Thomas Haslwanter** + +```{code-cell} +# For statistics. +# Import the formula interface to Statsmodels. +import statsmodels.formula.api as smf + +# Analysis of Variance (ANOVA) on linear models +from statsmodels.stats.anova import anova_lm + +# Generate and show the data +x = np.linspace(-5, 5, 20) + +# To get reproducible values, provide a seed value +rng = np.random.default_rng(27446968) + +y = -5 + 3 * x + 4 * rng.normal(size=x.shape) + +# Plot the data +plt.figure(figsize=(5, 4)) +plt.plot(x, y, "o"); +``` + +Multilinear regression model, calculating fit, P-values, confidence +intervals etc. + +```{code-cell} +# Convert the data into a Pandas DataFrame to use the formulas framework +# in statsmodels +data = pd.DataFrame({"x": x, "y": y}) +``` + +```{code-cell} +# Fit the model +model = smf.ols("y ~ x", data).fit() +``` + +```{code-cell} +# Show the summary +model.summary() +``` + +```{code-cell} +# Perform analysis of variance on fitted linear model +anova_results = anova_lm(model) +anova_results +``` + +Plot the fitted model + +```{code-cell} +# Retrieve the parameter estimates +offset, coef = model._results.params +plt.plot(x, x * coef + offset) +plt.xlabel("x") +plt.ylabel("y"); +``` + +(multiple-regression)= + +## Multiple Regression + + + ++++ + +Calculate using 'statsmodels' just the best fit, or all the corresponding +statistical parameters. + +Also shows how to make 3d plots. + ++++ + +Original author: Thomas Haslwanter + +```{code-cell} +# For 3d plots. This import is necessary to have 3D plotting below +from mpl_toolkits.mplot3d import Axes3D +``` + +```{code-cell} +# Generate and show the data +x = np.linspace(-5, 5, 21) +# We generate a 2D grid +X, Y = np.meshgrid(x, x) + +# To get reproducible values, provide a seed value +rng = np.random.default_rng(27446968) + +# Z is the elevation of this 2D grid +Z = -5 + 3 * X - 0.5 * Y + 8 * rng.normal(size=X.shape) + +# Plot the data +ax: Axes3D = plt.figure().add_subplot(projection="3d") +surf = ax.plot_surface(X, Y, Z, cmap="coolwarm", rstride=1, cstride=1) +ax.view_init(20, -120) +ax.set_xlabel("X") +ax.set_ylabel("Y") +ax.set_zlabel("Z"); +``` + +Multilinear regression model, calculating fit, P-values, confidence +intervals etc. + ++++ + +Convert the data into a Pandas DataFrame to use the formulas framework +in statsmodels + +```{code-cell} +# First we need to flatten the data: it's 2D layout is not relevant. +X = X.flatten() +Y = Y.flatten() +Z = Z.flatten() +``` + +```{code-cell} +data = pd.DataFrame({"x": X, "y": Y, "z": Z}) +``` + +```{code-cell} +# Fit the model +model = smf.ols("z ~ x + y", data).fit() +# Show the summary +model.summary() +``` + +```{code-cell} +print("\nRetrieving the parameter estimates manually:") +print(model._results.params) +``` + +```{code-cell} +# Perform analysis of variance on fitted linear model +anova_results = anova_lm(model) +anova_results +``` + +(analysis-of-iris-petal-and-sepal-sizes)= + +## Analysis of Iris petal and sepal sizes + + + ++++ + +Illustrate an analysis on a real dataset: + +- Visualizing the data to formulate intuitions +- Fitting of a linear model +- Hypothesis test of the effect of a categorical variable in the presence + of a continuous confound + +```{code-cell} +# Load the data +data = pd.read_csv("examples/iris.csv") +``` + +Plot a scatter matrix + +```{code-cell} +# Express the names as categories +categories = pd.Categorical(data["name"]) + +# The parameter 'c' is passed to plt.scatter and will control the color +pd.plotting.scatter_matrix(data, c=categories.codes, marker="o") + +fig = plt.gcf() +fig.suptitle("blue: setosa, green: versicolor, red: virginica", size=13) +``` + +Statistical analysis + ++++ + +Let us try to explain the sepal length as a function of the petal +width and the category of iris + +```{code-cell} +model = smf.ols("sepal_width ~ name + petal_length", data).fit() +model.summary() +``` + +Now formulate a "contrast", to test if the offset for versicolor and +virginica are identical + +```{code-cell} +:tags: [hide-input] + +print("Testing the difference between effect of versicolor and virginica") +print(model.f_test([0, 1, -1, 0])) +``` + +(visualizing-factors-influencing-wages)= + +## Visualizing factors influencing wages + + + ++++ + +This example uses Seaborn to quickly plot various factors relating wages, +experience, and education. + +Seaborn (https://seaborn.pydata.org) is a library that combines +visualization and statistical fits to show trends in data. + +Note that importing Seaborn changes the matplotlib style to have an +"excel-like" feeling. This changes affect other matplotlib figures. To +restore defaults once this example is run, we would need to call +`plt.rcdefaults()`. + +```{code-cell} +data = pd.read_csv("examples/wages.txt", + skiprows=27, + skipfooter=6, + sep=None, + header=None, + engine="python" # To allow use of skipfooter. +) +# Give names to the columns +names = [ + "education: Number of years of education", + "south: 1=person lives in South, 0=Person lives elsewhere", + "sex: 1=female, 0=Male", + "experience: Number of years of work experience", + "union: 1=union member, 0=Not union member", + "wage: wage (dollars per hour)", + "age: years", + "race: 1=other, 2=Hispanic, 3=White", + "occupation: 1=Management, 2=Sales, 3=Clerical, 4=Service, 5=Professional, 6=Other", + "sector: 0=Other, 1=Manufacturing, 2=Construction", + "marr: 0=unmarried, 1=Married", +] +short_names = [n.split(":")[0] for n in names] +data.columns = pd.Index(short_names) +# Log-transform the wages, because they typically are increased with +# multiplicative factors +data["wage"] = np.log10(data["wage"]) +# Convert genders to strings (this is particularly useful so that the +# statsmodels formulas detects that `sex` is a categorical variable) +data["sex"] = np.choose(data['sex'], ["male", "female"]) +``` + +Plot scatter matrices highlighting different aspects + +```{code-cell} +import seaborn +``` + +```{code-cell} +seaborn.pairplot(data, vars=["wage", "age", "education"], kind="reg") +``` + +```{code-cell} +seaborn.pairplot(data, vars=["wage", "age", "education"], kind="reg", hue="sex") +plt.suptitle("Effect of sex: 1=Female, 0=Male") +``` + +```{code-cell} +seaborn.pairplot(data, vars=["wage", "age", "education"], kind="reg", hue="race") +plt.suptitle("Effect of race: 1=Other, 2=Hispanic, 3=White") +``` + +```{code-cell} +seaborn.pairplot(data, vars=["wage", "age", "education"], kind="reg", hue="union") +plt.suptitle("Effect of union: 1=Union member, 0=Not union member") +``` + +Plot a simple regression + +```{code-cell} +seaborn.lmplot(y="wage", x="education", data=data) +``` + +(test-for-an-education-sex-interaction-in-wages)= + +## Test for an education/sex interaction in wages + + + ++++ + +Wages depend mostly on education. Here we investigate how this dependence +is related to gender: not only does gender create an offset in wages, it +also seems that wages increase more with education for males than +females. + +Does our data support this last hypothesis? We will test this using +statsmodels' formulas +(http://statsmodels.sourceforge.net/stable/example_formulas.html). + +```{code-cell} +# simple plotting + +# Plot 2 linear fits for male and female. +seaborn.lmplot(y="wage", x="education", hue="sex", data=data) + +# statistical analysis +import statsmodels.formula.api as sm + +# Note that this model is not the plot displayed above: it is one +# joined model for male and female, not separate models for male and +# female. The reason is that a single model enables statistical testing +result = sm.ols(formula="wage ~ education + sex", data=data).fit() +result.summary() +``` + +```{code-cell} +# The plots above highlight that there is not only a different offset in +# wage but also a different slope +# +# We need to model this using an interaction +result = sm.ols( + formula="wage ~ education + sex + education * sex", data=data +).fit() +result.summary() +``` + +Looking at the p-value of the interaction of sex and education, the +data does not support the hypothesis that education benefits males +more than female (p-value > 0.05). + ++++ + +## Other examples + ++++ + +(air-fares-before-and-after-9-11)= + +### Air fares before and after 9/11 + + + ++++ + +This is a business-intelligence (BI) like application. + +What is interesting here is that we may want to study fares as a function +of the year, paired accordingly to the trips, or forgetting the year, +only as a function of the trip endpoints. + +Using statsmodels' linear models, we find that both with an OLS (ordinary +least square) and a robust fit, the intercept and the slope are +significantly non-zero: the air fares have decreased between 2000 and +2001, and their dependence on distance travelled has also decreased + +```{code-cell} +:tags: [hide-input] + +# As a separator, '\s+' is a regular expression that means 'one or more +# spaces' +data = pd.read_csv( + "examples/airfares.txt", + sep=r'\s+', + header=0, + names=[ + "city1", + "city2", + "pop1", + "pop2", + "dist", + "fare_2000", + "nb_passengers_2000", + "fare_2001", + "nb_passengers_2001", + ], +) +``` + +```{code-cell} +# we log-transform the number of passengers +data["nb_passengers_2000"] = np.log10(data["nb_passengers_2000"]) +data["nb_passengers_2001"] = np.log10(data["nb_passengers_2001"]) +``` + +Make a dataframe with the year as an attribute, instead of separate columns + ++++ + +This involves a small danse in which we separate the dataframes in 2, +one for year 2000, and one for 2001, before concatenating again. + +```{code-cell} +# Make an index of each flight +data_flat = data.reset_index() +``` + +```{code-cell} +data_2000 = data_flat[ + ["city1", "city2", "pop1", "pop2", "dist", "fare_2000", "nb_passengers_2000"] +] +# Rename the columns +data_2000.columns = pd.Index( + ["city1", "city2", "pop1", "pop2", "dist", "fare", "nb_passengers"] +) +# Add a column with the year +data_2000.insert(0, "year", 2000) +``` + +```{code-cell} +data_2001 = data_flat[ + ["city1", "city2", "pop1", "pop2", "dist", "fare_2001", "nb_passengers_2001"] +] +# Rename the columns +data_2001.columns = pd.Index( + ["city1", "city2", "pop1", "pop2", "dist", "fare", "nb_passengers"] +) +# Add a column with the year +data_2001.insert(0, "year", 2001) +``` + +```{code-cell} +data_flat = pd.concat([data_2000, data_2001]) +``` + +Plot scatter matrices highlighting different aspects + +```{code-cell} +seaborn.pairplot( + data_flat, vars=["fare", "dist", "nb_passengers"], kind="reg", markers="." +) +``` + +```{code-cell} +# A second plot, to show the effect of the year (ie the 9/11 effect) +seaborn.pairplot( + data_flat, + vars=["fare", "dist", "nb_passengers"], + kind="reg", + hue="year", + markers=".", +) +``` + +Plot the difference in fare + +```{code-cell} +plt.figure(figsize=(5, 2)) +seaborn.boxplot(data.fare_2001 - data.fare_2000) +plt.title("Fare: 2001 - 2000") +plt.subplots_adjust() +``` + +```{code-cell} +plt.figure(figsize=(5, 2)) +seaborn.boxplot(data.nb_passengers_2001 - data.nb_passengers_2000) +plt.title("NB passengers: 2001 - 2000") +plt.subplots_adjust() +``` + +```{code-cell} +# Statistical testing: dependence of fare on distance and number of +# passengers +result = sm.ols(formula="fare ~ 1 + dist + nb_passengers", data=data_flat).fit() +result.summary() +``` + +```{code-cell} +# Using a robust fit +result = sm.rlm(formula="fare ~ 1 + dist + nb_passengers", data=data_flat).fit() +result.summary() +``` + +Statistical testing: regression of fare on distance: 2001/2000 difference + +```{code-cell} +result = sm.ols(formula="fare_2001 - fare_2000 ~ 1 + dist", data=data).fit() +result.summary() +``` + +```{code-cell} +# Plot the corresponding regression +data["fare_difference"] = data["fare_2001"] - data["fare_2000"] +seaborn.lmplot(x="dist", y="fare_difference", data=data) +``` + +(relating-gender-and-iq)= + +### Relating Gender and IQ + + + ++++ + +Going back to the brain size + IQ data, test if the VIQ of male and +female are different after removing the effect of brain size, height and +weight. + +Notice that here 'Gender' is a categorical value. As it is a non-float +data type, statsmodels is able to automatically infer this. + +```{code-cell} +data = pd.read_csv("examples/brain_size.csv", sep=";", na_values=".") + +model = smf.ols("VIQ ~ Gender + MRI_Count + Height", data).fit() +model.summary() +``` + +```{code-cell} +# Here, we don't need to define a contrast, as we are testing a single +# coefficient of our model, and not a combination of coefficients. +# However, defining a contrast, which would then be a 'unit contrast', +# will give us the same results +print(model.f_test([0, 1, 0, 0])) +``` + +Here we plot a scatter matrix to get intuitions on our results. +This goes beyond what was asked in the exercise + ++++ + +This plotting is useful to get an intuitions on the relationships between +our different variables + +```{code-cell} +# Fill in the missing values for Height for plotting +data["Height"] = data["Height"].ffill() +``` + +```{code-cell} +# The parameter 'c' is passed to plt.scatter and will control the color +# The same holds for parameters 'marker', 'alpha' and 'cmap', that +# control respectively the type of marker used, their transparency and +# the colormap +pd.plotting.scatter_matrix( + data[["VIQ", "MRI_Count", "Height"]], + c=(data["Gender"] == "Female"), + marker="o", + alpha=1, + cmap="winter", +) + +fig = plt.gcf() +fig.suptitle("blue: male, green: female", size=13); +``` diff --git a/packages/sympy.md b/packages/sympy.md new file mode 100644 index 000000000..3d309a86c --- /dev/null +++ b/packages/sympy.md @@ -0,0 +1,518 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.18.0-dev +kernelspec: + display_name: Python 3 (ipykernel) + language: python + name: python3 +--- + + + +(sympy)= + +# `sympy` : Symbolic Mathematics in Python + +**Author**: _Fabian Pedregosa_ + +:::{admonition} Objectives + +1. Evaluate expressions with arbitrary precision. +2. Perform algebraic manipulations on symbolic expressions. +3. Perform basic calculus tasks (limits, differentiation and + : integration) with symbolic expressions. +4. Solve polynomial and transcendental equations. +5. Solve some differential equations. + ::: + +**What is SymPy?** SymPy is a Python library for symbolic mathematics. It +aims to be an alternative to systems such as Mathematica or Maple while keeping +the code as simple as possible and easily +extensible. SymPy is written entirely in Python and does not require any +external libraries. + +Sympy documentation and packages for installation can be found on + + +## First Steps with SymPy + +### Using SymPy as a calculator + +SymPy defines three numerical types: `Real`, `Rational` and `Integer`. + +The Rational class represents a rational number as a pair of two +Integers: the numerator and the denominator, so `Rational(1, 2)` +represents 1/2, `Rational(5, 2)` 5/2 and so on: + +```{code-cell} +import sympy as sym +a = sym.Rational(1, 2) +``` + +```{code-cell} +a +``` + +```{code-cell} +a*2 +``` + +SymPy uses mpmath in the background, which makes it possible to +perform computations using arbitrary-precision arithmetic. That +way, some special constants, like $e$, $pi$, $oo$ (Infinity), +are treated as +symbols and can be evaluated with arbitrary precision: + +```{code-cell} +sym.pi**2 +``` + +```{code-cell} +sym.pi.evalf() +``` + +```{code-cell} +(sym.pi + sym.exp(1)).evalf() +``` + +as you see, `evalf` evaluates the expression to a floating-point number. + +There is also a class representing mathematical infinity, called +`oo`: + +```{code-cell} +sym.oo > 99999 +``` + +```{code-cell} +sym.oo + 1 +``` + +::: {exercise-start} +:label: sympy-root2-rational-ex +:class: dropdown +::: + +1. Calculate $\sqrt{2}$ with 100 decimals. +2. Calculate $1/2 + 1/3$ in rational arithmetic. + +::: {exercise-end} +::: + ++++ + +### Symbols + +In contrast to other Computer Algebra Systems, in SymPy you have to declare +symbolic variables explicitly: + +```{code-cell} +x = sym.Symbol('x') +y = sym.Symbol('y') +``` + +Then you can manipulate them: + +```{code-cell} +x + y + x - y +``` + +```{code-cell} +(x + y) ** 2 +``` + +Symbols can now be manipulated using some of python operators: `+`, `-`, +`*`, `**` (arithmetic), `&`, `|`, `~`, `>>`, `<<` (boolean). + +:::{admonition} Printing +Sympy allows for control of the display of the output. From here we use the +following setting for printing: + +```{code-cell} +sym.init_printing(use_unicode=False, wrap_line=True) +``` + +::: + +## Algebraic manipulations + +SymPy is capable of performing powerful algebraic manipulations. We'll +take a look into some of the most frequently used: expand and simplify. + +### Expand + +Use this to expand an algebraic expression. It will try to denest +powers and multiplications: + +```{code-cell} +sym.expand((x + y) ** 3) +``` + +```{code-cell} +3 * x * y ** 2 + 3 * y * x ** 2 + x ** 3 + y ** 3 +``` + +Further options can be given in form on keywords: + +```{code-cell} +sym.expand(x + y, complex=True) +``` + +```{code-cell} +sym.I * sym.im(x) + sym.I * sym.im(y) + sym.re(x) + sym.re(y) +``` + +```{code-cell} +sym.expand(sym.cos(x + y), trig=True) +``` + +```{code-cell} +sym.cos(x) * sym.cos(y) - sym.sin(x) * sym.sin(y) +``` + +### Simplify + +Use simplify if you would like to transform an expression into a +simpler form: + +```{code-cell} +sym.simplify((x + x * y) / x) +``` + +Simplification is a somewhat vague term, and more precises +alternatives to simplify exists: `powsimp` (simplification of +exponents), `trigsimp` (for trigonometric expressions) , `logcombine`, +`radsimp`, together. + +::: {exercise-start} +:label: sympy-simplify-expand-ex +:class: dropdown +::: + +1. Calculate the expanded form of $(x+y)^6$. +2. Simplify the trigonometric expression $\sin(x) / \cos(x)$ + +::: {exercise-end} +::: + ++++ + +## Calculus + +### Limits + +Limits are easy to use in SymPy, they follow the syntax `limit(function, +variable, point)`, so to compute the limit of $f(x)$ as +$x \rightarrow 0$, you would issue `limit(f, x, 0)`: + +```{code-cell} +sym.limit(sym.sin(x) / x, x, 0) +``` + +you can also calculate the limit at infinity: + +```{code-cell} +sym.limit(x, x, sym.oo) +``` + +```{code-cell} +sym.limit(1 / x, x, sym.oo) +``` + +```{code-cell} +sym.limit(x ** x, x, 0) +``` + +::: {index} differentiation, diff +::: + +### Differentiation + +You can differentiate any SymPy expression using `diff(func, +var)`. Examples: + +```{code-cell} +sym.diff(sym.sin(x), x) +``` + +```{code-cell} +sym.diff(sym.sin(2 * x), x) +``` + +```{code-cell} +sym.diff(sym.tan(x), x) +``` + +You can check that it is correct by: + +```{code-cell} +sym.limit((sym.tan(x + y) - sym.tan(x)) / y, y, 0) +``` + +Which is equivalent since + +$$ +\sec(x) = \frac{1}{\cos(x)} and \sec^2(x) = \tan^2(x) + 1. +$$ + +You can check this as well: + +```{code-cell} +sym.trigsimp(sym.diff(sym.tan(x), x)) +``` + +Higher derivatives can be calculated using the `diff(func, var, n)` method: + +```{code-cell} +sym.diff(sym.sin(2 * x), x, 1) +``` + +```{code-cell} +sym.diff(sym.sin(2 * x), x, 2) +``` + +```{code-cell} +sym.diff(sym.sin(2 * x), x, 3) +``` + +### Series expansion + +SymPy also knows how to compute the Taylor series of an expression at +a point. Use `series(expr, var)`: + +```{code-cell} +sym.series(sym.cos(x), x) +``` + +```{code-cell} +sym.series(1/sym.cos(x), x) +``` + +::: {exercise-start} +:label: sympy-lim-deriv-ex +:class: dropdown +::: + +1. Calculate $\lim_{x\rightarrow 0} \sin(x)/x$ +2. Calculate the derivative of $log(x)$ for $x$. + +::: {exercise-end} +::: + ++++ + +::: {index} integration +::: + +### Integration + +SymPy has support for indefinite and definite integration of transcendental +elementary and special functions via `integrate()` facility, which uses +the powerful extended Risch-Norman algorithm and some heuristics and pattern +matching. You can integrate elementary functions: + +```{code-cell} +sym.integrate(6 * x ** 5, x) +``` + +```{code-cell} +sym.integrate(sym.sin(x), x) +``` + +```{code-cell} +sym.integrate(sym.log(x), x) +``` + +```{code-cell} +sym.integrate(2 * x + sym.sinh(x), x) +``` + +Also special functions are handled easily: + +```{code-cell} +sym.integrate(sym.exp(-x ** 2) * sym.erf(x), x) +``` + +It is possible to compute definite integral: + +```{code-cell} +sym.integrate(x**3, (x, -1, 1)) +``` + +```{code-cell} +sym.integrate(sym.sin(x), (x, 0, sym.pi / 2)) +``` + +```{code-cell} +sym.integrate(sym.cos(x), (x, -sym.pi / 2, sym.pi / 2)) +``` + +Also improper integrals are supported as well: + +```{code-cell} +sym.integrate(sym.exp(-x), (x, 0, sym.oo)) +``` + +```{code-cell} +sym.integrate(sym.exp(-x ** 2), (x, -sym.oo, sym.oo)) +``` + +::: {index} equations; algebraic, solve +::: + +## Equation solving + +SymPy is able to solve algebraic equations, in one and several +variables using {func}`~sympy.solveset`: + +```{code-cell} +sym.solveset(x ** 4 - 1, x) +``` + +As you can see it takes as first argument an expression that is +supposed to be equaled to 0. It also has (limited) support for transcendental +equations: + +```{code-cell} +sym.solveset(sym.exp(x) + 1, x) +``` + +:::{admonition} Systems of linear equations +Sympy is able to solve a large part of +polynomial equations, and is also capable of solving multiple +equations with respect to multiple variables giving a tuple as second +argument. To do this you use the {func}`~sympy.solve` command: + +```{code-cell} +solution = sym.solve((x + 5 * y - 2, -3 * x + 6 * y - 15), (x, y)) +solution[x], solution[y] +``` + +::: + +Another alternative in the case of polynomial equations is +`factor`. `factor` returns the polynomial factorized into irreducible +terms, and is capable of computing the factorization over various +domains: + +```{code-cell} +f = x ** 4 - 3 * x ** 2 + 1 +sym.factor(f) +``` + +```{code-cell} +sym.factor(f, modulus=5) +``` + +SymPy is also able to solve boolean equations, that is, to decide if a +certain boolean expression is satisfiable or not. For this, we use the +function satisfiable: + +```{code-cell} +sym.satisfiable(x & y) +``` + +This tells us that `(x & y)` is True whenever `x` and `y` are both True. +If an expression cannot be true, i.e. no values of its arguments can make +the expression True, it will return False: + +```{code-cell} +sym.satisfiable(x & ~x) +``` + +::: {exercise-start} +:label: sympy-solve-roots-ex +:class: dropdown +::: + +1. Solve the system of equations $x + y = 2$, $2\cdot x + y = 0$ +2. Are there boolean values `x`, `y` that make `(~x | y) & (~y | x)` true? + +::: {exercise-end} +::: + ++++ + +## Linear Algebra + +:::{index} Matrix +::: + +### Matrices + +Matrices are created as instances from the Matrix class: + +```{code-cell} +sym.Matrix([[1, 0], [0, 1]]) +``` + +unlike a NumPy array, you can also put Symbols in it: + +```{code-cell} +x, y = sym.symbols('x, y') +A = sym.Matrix([[1, x], [y, 1]]) +A +``` + +```{code-cell} +A**2 +``` + +::: {index} equations; differential, diff, dsolve +::: + +### Differential Equations + +SymPy is capable of solving (some) Ordinary Differential. +To solve differential equations, use dsolve. First, create +an undefined function by passing cls=Function to the symbols function: + +```{code-cell} +f, g = sym.symbols('f g', cls=sym.Function) +``` + +f and g are now undefined functions. We can call f(x), and it will represent +an unknown function: + +```{code-cell} +f(x) +``` + +```{code-cell} +f(x).diff(x, x) + f(x) +``` + +```{code-cell} +sym.dsolve(f(x).diff(x, x) + f(x), f(x)) +``` + +Keyword arguments can be given to this function in order to help if +find the best possible resolution system. For example, if you know +that it is a separable equations, you can use keyword `hint='separable'` +to force dsolve to resolve it as a separable equation: + +```{code-cell} +sym.dsolve(sym.sin(x) * sym.cos(f(x)) + sym.cos(x) * sym.sin(f(x)) * f(x).diff(x), f(x), hint='separable') +``` + +::: {exercise-start} +:label: sympy-solve-bernoulli +:class: dropdown +::: + +1. Solve the Bernoulli differential equation + + $$ + x \frac{d f(x)}{x} + f(x) - f(x)^2=0 + $$ + +2. Solve the same equation using `hint='Bernoulli'`. What do you observe ? + +::: {exercise-end} +::: diff --git a/packages/sympy.rst b/packages/sympy.rst deleted file mode 100644 index 8f1db841e..000000000 --- a/packages/sympy.rst +++ /dev/null @@ -1,466 +0,0 @@ - -.. TODO: bench and fit in 1:30 - -.. _sympy: - -====================================== -Sympy : Symbolic Mathematics in Python -====================================== - -**Author**: *Fabian Pedregosa* - -.. topic:: Objectives - - 1. Evaluate expressions with arbitrary precision. - 2. Perform algebraic manipulations on symbolic expressions. - 3. Perform basic calculus tasks (limits, differentiation and - integration) with symbolic expressions. - 4. Solve polynomial and transcendental equations. - 5. Solve some differential equations. - -.. role:: input(strong) - -**What is SymPy?** SymPy is a Python library for symbolic mathematics. It -aims to be an alternative to systems such as Mathematica or Maple while keeping -the code as simple as possible and easily -extensible. SymPy is written entirely in Python and does not require any -external libraries. - -Sympy documentation and packages for installation can be found on -https://www.sympy.org/ - -.. contents:: Chapters contents - :local: - :depth: 4 - - -First Steps with SymPy -====================== - - -Using SymPy as a calculator ---------------------------- - -SymPy defines three numerical types: ``Real``, ``Rational`` and ``Integer``. - -The Rational class represents a rational number as a pair of two -Integers: the numerator and the denominator, so ``Rational(1, 2)`` -represents 1/2, ``Rational(5, 2)`` 5/2 and so on:: - - >>> import sympy as sym - >>> a = sym.Rational(1, 2) - - >>> a - 1/2 - - >>> a*2 - 1 - -SymPy uses mpmath in the background, which makes it possible to -perform computations using arbitrary-precision arithmetic. That -way, some special constants, like :math:`e`, :math:`pi`, :math:`oo` (Infinity), -are treated as -symbols and can be evaluated with arbitrary precision:: - - >>> sym.pi**2 - pi**2 - - >>> sym.pi.evalf() - 3.14159265358979 - - >>> (sym.pi + sym.exp(1)).evalf() - 5.85987448204884 - -as you see, ``evalf`` evaluates the expression to a floating-point number. - -There is also a class representing mathematical infinity, called -``oo``:: - - >>> sym.oo > 99999 - True - >>> sym.oo + 1 - oo - - -.. topic:: **Exercises** - :class: green - - 1. Calculate :math:`\sqrt{2}` with 100 decimals. - 2. Calculate :math:`1/2 + 1/3` in rational arithmetic. - - -Symbols -------- - -In contrast to other Computer Algebra Systems, in SymPy you have to declare -symbolic variables explicitly:: - - >>> x = sym.Symbol('x') - >>> y = sym.Symbol('y') - -Then you can manipulate them:: - - >>> x + y + x - y - 2*x - - >>> (x + y) ** 2 - (x + y)**2 - -Symbols can now be manipulated using some of python operators: ``+``, ``-``, -``*``, ``**`` (arithmetic), ``&``, ``|``, ``~``, ``>>``, ``<<`` (boolean). - - -.. topic:: **Printing** - - Sympy allows for control of the display of the output. From here we use the - following setting for printing:: - - >>> sym.init_printing(use_unicode=False, wrap_line=True) - - - -Algebraic manipulations -======================= - -SymPy is capable of performing powerful algebraic manipulations. We'll -take a look into some of the most frequently used: expand and simplify. - -Expand ------- - -Use this to expand an algebraic expression. It will try to denest -powers and multiplications:: - - >>> sym.expand((x + y) ** 3) - 3 2 2 3 - x + 3*x *y + 3*x*y + y - >>> 3 * x * y ** 2 + 3 * y * x ** 2 + x ** 3 + y ** 3 - 3 2 2 3 - x + 3*x *y + 3*x*y + y - - -Further options can be given in form on keywords:: - - >>> sym.expand(x + y, complex=True) - re(x) + re(y) + I*im(x) + I*im(y) - >>> sym.I * sym.im(x) + sym.I * sym.im(y) + sym.re(x) + sym.re(y) - re(x) + re(y) + I*im(x) + I*im(y) - - >>> sym.expand(sym.cos(x + y), trig=True) - -sin(x)*sin(y) + cos(x)*cos(y) - >>> sym.cos(x) * sym.cos(y) - sym.sin(x) * sym.sin(y) - -sin(x)*sin(y) + cos(x)*cos(y) - -Simplify --------- - -Use simplify if you would like to transform an expression into a -simpler form:: - - >>> sym.simplify((x + x * y) / x) - y + 1 - - -Simplification is a somewhat vague term, and more precises -alternatives to simplify exists: ``powsimp`` (simplification of -exponents), ``trigsimp`` (for trigonometric expressions) , ``logcombine``, -``radsimp``, together. - -.. topic:: **Exercises** - :class: green - - 1. Calculate the expanded form of :math:`(x+y)^6`. - 2. Simplify the trigonometric expression :math:`\sin(x) / \cos(x)` - - -Calculus -======== - -Limits ------- - -Limits are easy to use in SymPy, they follow the syntax ``limit(function, -variable, point)``, so to compute the limit of :math:`f(x)` as -:math:`x \rightarrow 0`, you would issue ``limit(f, x, 0)``:: - - >>> sym.limit(sym.sin(x) / x, x, 0) - 1 - -you can also calculate the limit at infinity:: - - >>> sym.limit(x, x, sym.oo) - oo - - >>> sym.limit(1 / x, x, sym.oo) - 0 - - >>> sym.limit(x ** x, x, 0) - 1 - - -.. index:: differentiation, diff - -Differentiation ---------------- - -You can differentiate any SymPy expression using ``diff(func, -var)``. Examples:: - - >>> sym.diff(sym.sin(x), x) - cos(x) - >>> sym.diff(sym.sin(2 * x), x) - 2*cos(2*x) - - >>> sym.diff(sym.tan(x), x) - 2 - tan (x) + 1 - -You can check that it is correct by:: - - >>> sym.limit((sym.tan(x + y) - sym.tan(x)) / y, y, 0) - 1 - ------- - 2 - cos (x) - -Which is equivalent since - -.. math:: \sec(x) = \frac{1}{\cos(x)} and \sec^2(x) = \tan^2(x) + 1. - -You can check this as well:: - - >>> sym.trigsimp(sym.diff(sym.tan(x), x)) - 1 - ------- - 2 - cos (x) - -Higher derivatives can be calculated using the ``diff(func, var, n)`` method:: - - >>> sym.diff(sym.sin(2 * x), x, 1) - 2*cos(2*x) - - >>> sym.diff(sym.sin(2 * x), x, 2) - -4*sin(2*x) - - >>> sym.diff(sym.sin(2 * x), x, 3) - -8*cos(2*x) - - -Series expansion ----------------- - -SymPy also knows how to compute the Taylor series of an expression at -a point. Use ``series(expr, var)``:: - - >>> sym.series(sym.cos(x), x) - 2 4 - x x / 6\ - 1 - -- + -- + O\x / - 2 24 - >>> sym.series(1/sym.cos(x), x) - 2 4 - x 5*x / 6\ - 1 + -- + ---- + O\x / - 2 24 - - -.. topic:: **Exercises** - :class: green - - 1. Calculate :math:`\lim_{x\rightarrow 0} \sin(x)/x` - 2. Calculate the derivative of :math:`log(x)` for :math:`x`. - -.. index:: integration - -Integration ------------ - -SymPy has support for indefinite and definite integration of transcendental -elementary and special functions via ``integrate()`` facility, which uses -the powerful extended Risch-Norman algorithm and some heuristics and pattern -matching. You can integrate elementary functions:: - - >>> sym.integrate(6 * x ** 5, x) - 6 - x - >>> sym.integrate(sym.sin(x), x) - -cos(x) - >>> sym.integrate(sym.log(x), x) - x*log(x) - x - >>> sym.integrate(2 * x + sym.sinh(x), x) - 2 - x + cosh(x) - -Also special functions are handled easily:: - - >>> sym.integrate(sym.exp(-x ** 2) * sym.erf(x), x) - ____ 2 - \/ pi *erf (x) - -------------- - 4 - -It is possible to compute definite integral:: - - >>> sym.integrate(x**3, (x, -1, 1)) - 0 - >>> sym.integrate(sym.sin(x), (x, 0, sym.pi / 2)) - 1 - >>> sym.integrate(sym.cos(x), (x, -sym.pi / 2, sym.pi / 2)) - 2 - -Also improper integrals are supported as well:: - - >>> sym.integrate(sym.exp(-x), (x, 0, sym.oo)) - 1 - >>> sym.integrate(sym.exp(-x ** 2), (x, -sym.oo, sym.oo)) - ____ - \/ pi - - -.. index:: equations; algebraic, solve - - -Equation solving -================ - -SymPy is able to solve algebraic equations, in one and several -variables using :func:`~sympy.solveset`:: - - >>> sym.solveset(x ** 4 - 1, x) - {-1, 1, -I, I} - -As you can see it takes as first argument an expression that is -supposed to be equaled to 0. It also has (limited) support for transcendental -equations:: - - >>> sym.solveset(sym.exp(x) + 1, x) - {I*(2*n*pi + pi) | n in Integers} - -.. topic:: **Systems of linear equations** - - Sympy is able to solve a large part of - polynomial equations, and is also capable of solving multiple - equations with respect to multiple variables giving a tuple as second - argument. To do this you use the :func:`~sympy.solve` command:: - - >>> solution = sym.solve((x + 5 * y - 2, -3 * x + 6 * y - 15), (x, y)) - >>> solution[x], solution[y] - (-3, 1) - -Another alternative in the case of polynomial equations is -`factor`. `factor` returns the polynomial factorized into irreducible -terms, and is capable of computing the factorization over various -domains:: - - >>> f = x ** 4 - 3 * x ** 2 + 1 - >>> sym.factor(f) - / 2 \ / 2 \ - \x - x - 1/*\x + x - 1/ - - >>> sym.factor(f, modulus=5) - 2 2 - (x - 2) *(x + 2) - -SymPy is also able to solve boolean equations, that is, to decide if a -certain boolean expression is satisfiable or not. For this, we use the -function satisfiable:: - - >>> sym.satisfiable(x & y) - {x: True, y: True} - -This tells us that ``(x & y)`` is True whenever ``x`` and ``y`` are both True. -If an expression cannot be true, i.e. no values of its arguments can make -the expression True, it will return False:: - - >>> sym.satisfiable(x & ~x) - False - - - -.. topic:: **Exercises** - :class: green - - 1. Solve the system of equations :math:`x + y = 2`, :math:`2\cdot x + y = 0` - 2. Are there boolean values ``x``, ``y`` that make ``(~x | y) & (~y | x)`` true? - - -Linear Algebra -============== - -.. index:: Matrix - -Matrices --------- - -Matrices are created as instances from the Matrix class:: - - >>> sym.Matrix([[1, 0], [0, 1]]) - [1 0] - [ ] - [0 1] - -unlike a NumPy array, you can also put Symbols in it:: - - >>> x, y = sym.symbols('x, y') - >>> A = sym.Matrix([[1, x], [y, 1]]) - >>> A - [1 x] - [ ] - [y 1] - - >>> A**2 - [x*y + 1 2*x ] - [ ] - [ 2*y x*y + 1] - - -.. index:: equations; differential, diff, dsolve - -Differential Equations ----------------------- - -SymPy is capable of solving (some) Ordinary Differential. -To solve differential equations, use dsolve. First, create -an undefined function by passing cls=Function to the symbols function:: - - >>> f, g = sym.symbols('f g', cls=sym.Function) - -f and g are now undefined functions. We can call f(x), and it will represent -an unknown function:: - - >>> f(x) - f(x) - - >>> f(x).diff(x, x) + f(x) - 2 - d - f(x) + ---(f(x)) - 2 - dx - - >>> sym.dsolve(f(x).diff(x, x) + f(x), f(x)) - f(x) = C1*sin(x) + C2*cos(x) - - -Keyword arguments can be given to this function in order to help if -find the best possible resolution system. For example, if you know -that it is a separable equations, you can use keyword ``hint='separable'`` -to force dsolve to resolve it as a separable equation:: - - >>> sym.dsolve(sym.sin(x) * sym.cos(f(x)) + sym.cos(x) * sym.sin(f(x)) * f(x).diff(x), f(x), hint='separable') - / C1 \ / C1 \ - [f(x) = - acos|------| + 2*pi, f(x) = acos|------|] - \cos(x)/ \cos(x)/ - - - -.. topic:: **Exercises** - :class: green - - 1. Solve the Bernoulli differential equation - - .. math:: - x \frac{d f(x)}{x} + f(x) - f(x)^2=0 - - 2. Solve the same equation using ``hint='Bernoulli'``. What do you observe ? diff --git a/preface.rst b/preface.rst deleted file mode 100644 index e3754f411..000000000 --- a/preface.rst +++ /dev/null @@ -1,60 +0,0 @@ -==================================== -About the Scientific Python Lectures -==================================== - -.. contents:: - :local: - :depth: 1 - -.. Hack to have multi-column layout in authors list - -*Release:* |release| - -.. image:: https://zenodo.org/badge/doi/10.5281/zenodo.594102.svg - :target: http://dx.doi.org/10.5281/zenodo.594102 - - -.. raw:: html - - - - - -.. include:: AUTHORS.rst - -.. include:: CHANGES.rst - -.. include:: LICENSE.rst - -.. include:: CONTRIBUTING.rst diff --git a/pyproject.toml b/pyproject.toml index d1c086fbc..353fe8eeb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,8 @@ exclude = ''' | .*/setup.*\.py$ | .*/demo.py$ | .*/auto_examples/ + | _scripts/examples2nb.py$ + | _scripts/post_parser.py$ | advanced/mathematical_optimization/examples/plot_gradient_descent\.py$ | advanced/mathematical_optimization/examples/helper/compare_optimizers\.py$ | advanced/advanced_numpy/examples/view-colors\.py$ diff --git a/pyximages/README.md b/pyximages/README.md index 49a1984a8..0ef675a68 100644 --- a/pyximages/README.md +++ b/pyximages/README.md @@ -1,10 +1,14 @@ +--- +orphan: true +--- + # Content of directory pyximages -This directory contains files related to schematic drawings in the -Scientific Python Lectures which cannot be produced by means of matplotlib in a simple way -and for which no source exists in the repository so far. For each image, a -Python source using PyX, a bitmap image for the HTML version, and a PDF -image for the PDF version of the lectures are present. +This directory contains files related to schematic drawings in the Scientific +Python Lectures which cannot be produced by means of Matplotlib in a simple way +and for which no source exists in the repository so far. For each image, +a Python source using PyX, a bitmap image for the HTML version, and a PDF image +for the PDF version of the lectures are present. The Python source requires the pip installable PyX package and a TeX installation. The image sources should compile with PyX version 0.14+. Note diff --git a/requirements.txt b/requirements.txt index cfbb7c657..f6efb49f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,23 +1,26 @@ +# Requirements for notebooks / Binderhub numpy==2.2.5 scipy==1.15.2 matplotlib==3.10.1 pandas==2.2.3 -patsy==1.0.1 -pyarrow==20.0.0 scikit-learn==1.6.1 scikit-image==0.25.2 sympy==1.14.0 statsmodels==0.14.4 seaborn==0.13.2 pytest>=8.3 -sphinx>=8.2 -sphinx-gallery>=0.19 +sphinx sphinx-copybutton coverage>=7.6 Pillow pooch ipython pickleshare -pre-commit==4.2.0 requests -sphinxcontrib-jquery +xlrd +openpyxl +jupytext +# For pretty rendering in local JupyterLab or JupyterLite. +jupyterlab_myst +# For glue markup in notebooks. +myst_nb diff --git a/sp_lectures.bib b/sp_lectures.bib new file mode 100644 index 000000000..e69de29bb diff --git a/test_requirements.txt b/test_requirements.txt new file mode 100644 index 000000000..6b993fcb1 --- /dev/null +++ b/test_requirements.txt @@ -0,0 +1,6 @@ +# Test requirements +-r requirements.txt +myst_parser +# Needed by markdown-it-py, needed from myst_parser +linkify-it-py +pytest diff --git a/todo.md b/todo.md new file mode 100644 index 000000000..44916ba4b --- /dev/null +++ b/todo.md @@ -0,0 +1,5 @@ +# Outstanding tasks + +- Review which examples can be deleted, now they are included in the main + pages, or in the examples notebooks. +- Consider any examples we can remove from the example notebooks.