|
98 | 98 | "source": [ |
99 | 99 | "# Load data with h5py\n", |
100 | 100 | "# this creates a pointer to the data, but does not actually load\n", |
101 | | - "import h5py\n", |
102 | 101 | "import os\n", |
103 | | - "f = h5py.File(os.path.join('data', 'random.hdf5'), mode='r')\n", |
104 | | - "dset = f['/x']" |
| 102 | + "\n", |
| 103 | + "import h5py\n", |
| 104 | + "\n", |
| 105 | + "f = h5py.File(os.path.join(\"data\", \"random.hdf5\"), mode=\"r\")\n", |
| 106 | + "dset = f[\"/x\"]" |
105 | 107 | ] |
106 | 108 | }, |
107 | 109 | { |
|
134 | 136 | "# Compute sum of large array, one million numbers at a time\n", |
135 | 137 | "sums = []\n", |
136 | 138 | "for i in range(0, 1_000_000_000, 1_000_000):\n", |
137 | | - " chunk = dset[i: i + 1_000_000] # pull out numpy array\n", |
| 139 | + " chunk = dset[i : i + 1_000_000] # pull out numpy array\n", |
138 | 140 | " sums.append(chunk.sum())\n", |
139 | 141 | "\n", |
140 | 142 | "total = sum(sums)\n", |
|
174 | 176 | "cell_type": "code", |
175 | 177 | "execution_count": null, |
176 | 178 | "metadata": { |
177 | | - "jupyter": { |
178 | | - "source_hidden": true |
179 | | - } |
| 179 | + "tags": [] |
180 | 180 | }, |
181 | 181 | "outputs": [], |
182 | 182 | "source": [ |
183 | 183 | "sums = []\n", |
184 | 184 | "lengths = []\n", |
185 | 185 | "for i in range(0, 1_000_000_000, 1_000_000):\n", |
186 | | - " chunk = dset[i: i + 1_000_000] # pull out numpy array\n", |
| 186 | + " chunk = dset[i : i + 1_000_000] # pull out numpy array\n", |
187 | 187 | " sums.append(chunk.sum())\n", |
188 | 188 | " lengths.append(len(chunk))\n", |
189 | 189 | "\n", |
|
226 | 226 | "outputs": [], |
227 | 227 | "source": [ |
228 | 228 | "import dask.array as da\n", |
| 229 | + "\n", |
229 | 230 | "x = da.from_array(dset, chunks=(1_000_000,))\n", |
230 | 231 | "x" |
231 | 232 | ] |
|
379 | 380 | "metadata": {}, |
380 | 381 | "outputs": [], |
381 | 382 | "source": [ |
382 | | - "import numpy as np\n", |
383 | 383 | "import dask.array as da\n", |
| 384 | + "import numpy as np\n", |
384 | 385 | "\n", |
385 | | - "x = da.random.normal(10, 0.1, size=(20000, 20000), # 400 million element array \n", |
386 | | - " chunks=(1000, 1000)) # Cut into 1000x1000 sized chunks\n", |
387 | | - "y = x.mean(axis=0)[::100] # Perform NumPy-style operations" |
| 386 | + "x = da.random.normal(\n", |
| 387 | + " 10, 0.1, size=(20000, 20000), chunks=(1000, 1000) # 400 million element array\n", |
| 388 | + ") # Cut into 1000x1000 sized chunks\n", |
| 389 | + "y = x.mean(axis=0)[::100] # Perform NumPy-style operations" |
388 | 390 | ] |
389 | 391 | }, |
390 | 392 | { |
|
403 | 405 | "outputs": [], |
404 | 406 | "source": [ |
405 | 407 | "%%time\n", |
406 | | - "y.compute() # Time to compute the result" |
| 408 | + "y.compute() # Time to compute the result" |
407 | 409 | ] |
408 | 410 | }, |
409 | 411 | { |
|
535 | 537 | "metadata": {}, |
536 | 538 | "outputs": [], |
537 | 539 | "source": [ |
538 | | - "import h5py\n", |
539 | | - "from glob import glob\n", |
540 | 540 | "import os\n", |
| 541 | + "from glob import glob\n", |
541 | 542 | "\n", |
542 | | - "filenames = sorted(glob(os.path.join('data', 'weather-big', '*.hdf5')))\n", |
543 | | - "dsets = [h5py.File(filename, mode='r')['/t2m'] for filename in filenames]\n", |
| 543 | + "import h5py\n", |
| 544 | + "\n", |
| 545 | + "filenames = sorted(glob(os.path.join(\"data\", \"weather-big\", \"*.hdf5\")))\n", |
| 546 | + "dsets = [h5py.File(filename, mode=\"r\")[\"/t2m\"] for filename in filenames]\n", |
544 | 547 | "dsets[0]" |
545 | 548 | ] |
546 | 549 | }, |
|
563 | 566 | "import matplotlib.pyplot as plt\n", |
564 | 567 | "\n", |
565 | 568 | "fig = plt.figure(figsize=(16, 8))\n", |
566 | | - "plt.imshow(dsets[0][::4, ::4], cmap='RdBu_r');" |
| 569 | + "plt.imshow(dsets[0][::4, ::4], cmap=\"RdBu_r\");" |
567 | 570 | ] |
568 | 571 | }, |
569 | 572 | { |
|
628 | 631 | "metadata": { |
629 | 632 | "jupyter": { |
630 | 633 | "source_hidden": true |
631 | | - } |
| 634 | + }, |
| 635 | + "tags": [] |
632 | 636 | }, |
633 | 637 | "outputs": [], |
634 | 638 | "source": [ |
|
640 | 644 | "cell_type": "markdown", |
641 | 645 | "metadata": {}, |
642 | 646 | "source": [ |
643 | | - "**Plot the mean of this array along the time (`0th`) axis**" |
| 647 | + "**Plot the mean of this array along the time (`0th`) axis**\n", |
| 648 | + "\n", |
| 649 | + "Complete the following:\n", |
| 650 | + "\n", |
| 651 | + "```python\n", |
| 652 | + "result = ...\n", |
| 653 | + "fig = plt.figure(figsize=(16, 8))\n", |
| 654 | + "plt.imshow(result, cmap='RdBu_r')\n", |
| 655 | + "```" |
644 | 656 | ] |
645 | 657 | }, |
646 | 658 | { |
|
652 | 664 | ] |
653 | 665 | }, |
654 | 666 | "outputs": [], |
655 | | - "source": [ |
656 | | - "# complete the following:\n", |
657 | | - "fig = plt.figure(figsize=(16, 8))\n", |
658 | | - "plt.imshow(..., cmap='RdBu_r')" |
659 | | - ] |
| 667 | + "source": [] |
660 | 668 | }, |
661 | 669 | { |
662 | 670 | "cell_type": "code", |
663 | 671 | "execution_count": null, |
664 | 672 | "metadata": { |
665 | 673 | "jupyter": { |
666 | 674 | "source_hidden": true |
667 | | - } |
| 675 | + }, |
| 676 | + "tags": [] |
668 | 677 | }, |
669 | 678 | "outputs": [], |
670 | 679 | "source": [ |
671 | 680 | "result = x.mean(axis=0)\n", |
672 | 681 | "fig = plt.figure(figsize=(16, 8))\n", |
673 | | - "plt.imshow(result, cmap='RdBu_r');" |
| 682 | + "plt.imshow(result, cmap=\"RdBu_r\");" |
674 | 683 | ] |
675 | 684 | }, |
676 | 685 | { |
|
699 | 708 | "source": [ |
700 | 709 | "result = x[0] - x.mean(axis=0)\n", |
701 | 710 | "fig = plt.figure(figsize=(16, 8))\n", |
702 | | - "plt.imshow(result, cmap='RdBu_r');" |
| 711 | + "plt.imshow(result, cmap=\"RdBu_r\");" |
703 | 712 | ] |
704 | 713 | }, |
705 | 714 | { |
|
756 | 765 | }, |
757 | 766 | "outputs": [], |
758 | 767 | "source": [ |
759 | | - "import h5py\n", |
760 | | - "from glob import glob\n", |
761 | 768 | "import os\n", |
| 769 | + "from glob import glob\n", |
| 770 | + "\n", |
762 | 771 | "import dask.array as da\n", |
| 772 | + "import h5py\n", |
763 | 773 | "\n", |
764 | | - "filenames = sorted(glob(os.path.join('data', 'weather-big', '*.hdf5')))\n", |
765 | | - "dsets = [h5py.File(filename, mode='r')['/t2m'] for filename in filenames]\n", |
| 774 | + "filenames = sorted(glob(os.path.join(\"data\", \"weather-big\", \"*.hdf5\")))\n", |
| 775 | + "dsets = [h5py.File(filename, mode=\"r\")[\"/t2m\"] for filename in filenames]\n", |
766 | 776 | "\n", |
767 | 777 | "arrays = [da.from_array(dset, chunks=(500, 500)) for dset in dsets]\n", |
768 | 778 | "\n", |
769 | 779 | "x = da.stack(arrays, axis=0)\n", |
770 | 780 | "\n", |
771 | 781 | "result = x[:, ::2, ::2]\n", |
772 | 782 | "\n", |
773 | | - "da.to_zarr(result, os.path.join('data', 'myfile.zarr'), overwrite=True)" |
| 783 | + "da.to_zarr(result, os.path.join(\"data\", \"myfile.zarr\"), overwrite=True)" |
774 | 784 | ] |
775 | 785 | }, |
776 | 786 | { |
|
797 | 807 | "source": [ |
798 | 808 | "import numpy as np\n", |
799 | 809 | "\n", |
| 810 | + "\n", |
800 | 811 | "# make a random collection of particles\n", |
801 | 812 | "def make_cluster(natoms, radius=40, seed=1981):\n", |
802 | 813 | " np.random.seed(seed)\n", |
803 | | - " cluster = np.random.normal(0, radius, (natoms,3))-0.5\n", |
| 814 | + " cluster = np.random.normal(0, radius, (natoms, 3)) - 0.5\n", |
804 | 815 | " return cluster\n", |
805 | 816 | "\n", |
| 817 | + "\n", |
806 | 818 | "def lj(r2):\n", |
807 | | - " sr6 = (1./r2)**3\n", |
808 | | - " pot = 4.*(sr6*sr6 - sr6)\n", |
| 819 | + " sr6 = (1.0 / r2) ** 3\n", |
| 820 | + " pot = 4.0 * (sr6 * sr6 - sr6)\n", |
809 | 821 | " return pot\n", |
810 | 822 | "\n", |
| 823 | + "\n", |
811 | 824 | "# build the matrix of distances\n", |
812 | 825 | "def distances(cluster):\n", |
813 | 826 | " diff = cluster[:, np.newaxis, :] - cluster[np.newaxis, :, :]\n", |
814 | | - " mat = (diff*diff).sum(-1)\n", |
| 827 | + " mat = (diff * diff).sum(-1)\n", |
815 | 828 | " return mat\n", |
816 | 829 | "\n", |
| 830 | + "\n", |
817 | 831 | "# the lj function is evaluated over the upper triangle\n", |
818 | 832 | "# after removing distances near zero\n", |
819 | 833 | "def potential(cluster):\n", |
|
886 | 900 | "source": [ |
887 | 901 | "import dask.array as da\n", |
888 | 902 | "\n", |
| 903 | + "\n", |
889 | 904 | "# compute the potential on the entire\n", |
890 | 905 | "# matrix of distances and ignore division by zero\n", |
891 | 906 | "def potential_dask(cluster):\n", |
892 | 907 | " d2 = distances(cluster)\n", |
893 | | - " energy = da.nansum(lj(d2))/2.\n", |
| 908 | + " energy = da.nansum(lj(d2)) / 2.0\n", |
894 | 909 | " return energy" |
895 | 910 | ] |
896 | 911 | }, |
|
909 | 924 | "source": [ |
910 | 925 | "from os import cpu_count\n", |
911 | 926 | "\n", |
912 | | - "dcluster = da.from_array(cluster, chunks=cluster.shape[0]//cpu_count())" |
| 927 | + "dcluster = da.from_array(cluster, chunks=cluster.shape[0] // cpu_count())" |
913 | 928 | ] |
914 | 929 | }, |
915 | 930 | { |
|
974 | 989 | "metadata": { |
975 | 990 | "anaconda-cloud": {}, |
976 | 991 | "kernelspec": { |
977 | | - "display_name": "Python 3", |
| 992 | + "display_name": "Python 3 (ipykernel)", |
978 | 993 | "language": "python", |
979 | 994 | "name": "python3" |
980 | 995 | }, |
|
988 | 1003 | "name": "python", |
989 | 1004 | "nbconvert_exporter": "python", |
990 | 1005 | "pygments_lexer": "ipython3", |
991 | | - "version": "3.7.6" |
| 1006 | + "version": "3.10.4" |
992 | 1007 | } |
993 | 1008 | }, |
994 | 1009 | "nbformat": 4, |
|
0 commit comments